initial commit

author cmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>

Wed, 9 Feb 2011 01:09:48 +0000 (01:09 +0000)

committer cmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>

Wed, 9 Feb 2011 01:09:48 +0000 (01:09 +0000)
author cmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Wed, 9 Feb 2011 01:09:48 +0000 (01:09 +0000)
committer cmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Wed, 9 Feb 2011 01:09:48 +0000 (01:09 +0000)
diff --git a/forester/java/src/org/forester/go/BasicGoRelationship.java b/forester/java/src/org/forester/go/BasicGoRelationship.java

new file mode 100644 (file)

index 0000000..6f8dd32
--- /dev/null
+++ b/forester/java/src/org/forester/go/BasicGoRelationship.java
@@ -0,0 +1,131 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.go;
+
+public class BasicGoRelationship implements GoRelationship {
+
+    final Type _type;
+    final GoId _go_id;
+
+    public BasicGoRelationship( final String s ) {
+        final String[] sa = s.split( " " );
+        if ( sa.length != 2 ) {
+            throw new IllegalArgumentException( "unexpected format for GO relationship: " + s );
+        }
+        final String type = sa[ 0 ].trim();
+        final String go_id = sa[ 1 ].trim();
+        if ( type.toLowerCase().equals( PART_OF_STR ) ) {
+            _type = Type.PART_OF;
+        }
+        else if ( type.toLowerCase().equals( REGULATES_STR ) ) {
+            _type = Type.REGULATES;
+        }
+        else if ( type.toLowerCase().equals( NEGATIVELY_REGULATES_STR ) ) {
+            _type = Type.NEGATIVELY_REGULATES;
+        }
+        else if ( type.toLowerCase().equals( POSITIVELY_REGULATES_STR ) ) {
+            _type = Type.POSITIVELY_REGULATES;
+        }
+        else {
+            throw new IllegalArgumentException( "unknown GO relationship type: " + type );
+        }
+        _go_id = new GoId( go_id );
+    }
+
+    public BasicGoRelationship( final String type, final String go_id ) {
+        if ( type.toLowerCase().equals( PART_OF_STR ) ) {
+            _type = Type.PART_OF;
+        }
+        else {
+            throw new IllegalArgumentException( "unknown GO relationship type: " + type );
+        }
+        _go_id = new GoId( go_id );
+    }
+
+    public BasicGoRelationship( final Type type, final GoId go_id ) {
+        _type = type;
+        _go_id = go_id;
+    }
+
+    public int compareTo( final GoRelationship rel ) {
+        return getGoId().compareTo( rel.getGoId() );
+    }
+
+    /**
+     * Based on value and type.
+     * 
+     * 
+     */
+    @Override
+    public boolean equals( final Object o ) {
+        if ( this == o ) {
+            return true;
+        }
+        else if ( o == null ) {
+            throw new IllegalArgumentException( "attempt to check go relationship equality to null" );
+        }
+        else if ( o.getClass() != this.getClass() ) {
+            throw new IllegalArgumentException( "attempt to check go relationship equality to " + o + " ["
+                    + o.getClass() + "]" );
+        }
+        else {
+            return getType().equals( ( ( GoRelationship ) o ).getType() )
+                    && getGoId().equals( ( ( GoRelationship ) o ).getGoId() );
+        }
+    }
+
+    public GoId getGoId() {
+        return _go_id;
+    }
+
+    public Type getType() {
+        return _type;
+    }
+
+    @Override
+    public String toString() {
+        final StringBuffer sb = new StringBuffer();
+        switch ( getType() ) {
+            case PART_OF:
+                sb.append( PART_OF_STR );
+                break;
+            case NEGATIVELY_REGULATES:
+                sb.append( NEGATIVELY_REGULATES_STR );
+                break;
+            case POSITIVELY_REGULATES:
+                sb.append( POSITIVELY_REGULATES_STR );
+                break;
+            case REGULATES:
+                sb.append( REGULATES_STR );
+                break;
+            default:
+                new AssertionError( "unknown type: " + getType() );
+        }
+        sb.append( ": " );
+        sb.append( getGoId().toString() );
+        return sb.toString();
+    }
+}
diff --git a/forester/java/src/org/forester/go/BasicGoSubset.java b/forester/java/src/org/forester/go/BasicGoSubset.java

new file mode 100644 (file)

index 0000000..19d20ed
--- /dev/null
+++ b/forester/java/src/org/forester/go/BasicGoSubset.java
@@ -0,0 +1,125 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.go;
+
+public class BasicGoSubset implements GoSubset {
+
+    final Type _type;
+
+    public BasicGoSubset( final String s ) {
+        final String my_s = s.trim().toLowerCase();
+        if ( my_s.equals( GOSLIM_GENERIC_STR ) ) {
+            _type = Type.GOSLIM_GENERIC;
+        }
+        else if ( my_s.equals( GOSLIM_GOA_STR ) ) {
+            _type = Type.GOSLIM_GOA;
+        }
+        else if ( my_s.equals( GOSLIM_PIR_STR ) ) {
+            _type = Type.GOSLIM_PIR;
+        }
+        else if ( my_s.equals( GOSUBSET_PROK_STR ) ) {
+            _type = Type.GOSUBSET_PROK;
+        }
+        else if ( my_s.equals( GOSLIM_CANDIDA_STR ) ) {
+            _type = Type.GOSLIM_CANDIDA;
+        }
+        else if ( my_s.equals( GOSLIM_PLANT_STR ) ) {
+            _type = Type.GOSLIM_PLANT;
+        }
+        else if ( my_s.equals( GOSLIM_YEAST_STR ) ) {
+            _type = Type.GOSLIM_YEAST;
+        }
+        else if ( my_s.equals( GOSLIM_POMBE_STR ) ) {
+            _type = Type.GOSLIM_POMBE;
+        }
+        else {
+            throw new IllegalArgumentException( "unknown GO subset type: " + my_s );
+        }
+    }
+
+    public BasicGoSubset( final Type type ) {
+        _type = type;
+    }
+
+    public int compareTo( final GoSubset sub ) {
+        return getType().compareTo( sub.getType() );
+    }
+
+    @Override
+    public boolean equals( final Object o ) {
+        if ( this == o ) {
+            return true;
+        }
+        else if ( o == null ) {
+            throw new IllegalArgumentException( "attempt to check go subset equality to null" );
+        }
+        else if ( o.getClass() != this.getClass() ) {
+            throw new IllegalArgumentException( "attempt to check go subset equality to " + o + " [" + o.getClass()
+                    + "]" );
+        }
+        else {
+            return ( getType() == ( ( GoSubset ) o ).getType() );
+        }
+    }
+
+    public Type getType() {
+        return _type;
+    }
+
+    @Override
+    public String toString() {
+        final StringBuilder sb = new StringBuilder();
+        switch ( getType() ) {
+            case GOSLIM_CANDIDA:
+                sb.append( GOSLIM_CANDIDA_STR );
+                break;
+            case GOSLIM_GENERIC:
+                sb.append( GOSLIM_GENERIC_STR );
+                break;
+            case GOSLIM_GOA:
+                sb.append( GOSLIM_GOA_STR );
+                break;
+            case GOSLIM_PIR:
+                sb.append( GOSLIM_PIR_STR );
+                break;
+            case GOSLIM_PLANT:
+                sb.append( GOSLIM_PLANT_STR );
+                break;
+            case GOSLIM_YEAST:
+                sb.append( GOSLIM_YEAST_STR );
+                break;
+            case GOSUBSET_PROK:
+                sb.append( GOSUBSET_PROK_STR );
+                break;
+            case GOSLIM_POMBE:
+                sb.append( GOSLIM_POMBE_STR );
+                break;
+            default:
+                new AssertionError( "unknown type: " + getType() );
+        }
+        return sb.toString();
+    }
+}
diff --git a/forester/java/src/org/forester/go/BasicGoTerm.java b/forester/java/src/org/forester/go/BasicGoTerm.java

new file mode 100644 (file)

index 0000000..768de1c
--- /dev/null
+++ b/forester/java/src/org/forester/go/BasicGoTerm.java
@@ -0,0 +1,246 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.go;
+
+import java.io.IOException;
+import java.io.Writer;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.forester.phylogeny.data.PhylogenyData;
+import org.forester.util.ForesterUtil;
+
+public class BasicGoTerm implements GoTerm {
+
+    private final GoId           _id;
+    private final String         _name;
+    private final boolean        _is_obsolete;
+    private final GoNameSpace    _namespace;
+    private String               _definition;
+    private List<GoId>           _alt_ids;
+    private List<GoId>           _super_go_ids;
+    private List<GoXRef>         _go_xrefs;
+    private List<GoSubset>       _go_subsets;
+    private String               _comment;
+    private List<GoRelationship> _go_relationships;
+
+    public BasicGoTerm( final GoId id, final String name, final GoNameSpace namespace, final boolean is_obsolete ) {
+        if ( ( id == null ) || ForesterUtil.isEmpty( name ) || ( namespace == null ) ) {
+            throw new IllegalArgumentException( "attempt to create GO term with empty id, name, or namespace" );
+        }
+        _id = id;
+        _name = name;
+        _namespace = namespace;
+        _is_obsolete = is_obsolete;
+        init();
+    }
+
+    public BasicGoTerm( final String id, final String name, final String namespace, final boolean is_obsolete ) {
+        if ( ForesterUtil.isEmpty( id ) || ForesterUtil.isEmpty( name ) || ForesterUtil.isEmpty( namespace ) ) {
+            throw new IllegalArgumentException( "attempt to create GO term with empty id, name, or namespace" );
+        }
+        _id = new GoId( id );
+        _name = name;
+        _namespace = new GoNameSpace( namespace );
+        _is_obsolete = is_obsolete;
+        init();
+    }
+
+    public StringBuffer asSimpleText() {
+        return new StringBuffer( getGoId().toString() );
+    }
+
+    public StringBuffer asText() {
+        return new StringBuffer( toString() );
+    }
+
+    /**
+     * Compares based on GO id.
+     * 
+     */
+    public int compareTo( final GoTerm go_term ) {
+        return getGoId().compareTo( go_term.getGoId() );
+    }
+
+    /**
+     * Makes a shallow copy.
+     * 
+     * 
+     */
+    public PhylogenyData copy() {
+        final BasicGoTerm gt = new BasicGoTerm( getGoId(), getName(), getGoNameSpace(), isObsolete() );
+        gt.setGoXrefs( getGoXRefs() );
+        gt.setGoSubsets( getGoSubsets() );
+        gt.setSuperTerms( getSuperGoIds() );
+        gt.setAltIds( getAltIds() );
+        gt.setDefinition( getDefinition() );
+        return gt;
+    }
+
+    /**
+     * Return true if both GO id and namespace are equal.
+     * 
+     */
+    @Override
+    public boolean equals( final Object o ) {
+        if ( this == o ) {
+            return true;
+        }
+        else if ( o == null ) {
+            throw new IllegalArgumentException( "attempt to check go term equality to null" );
+        }
+        else if ( o.getClass() != this.getClass() ) {
+            throw new IllegalArgumentException( "attempt to check go term equality to " + o + " [" + o.getClass() + "]" );
+        }
+        else {
+            final GoTerm gt = ( GoTerm ) o;
+            return getGoNameSpace().equals( gt.getGoNameSpace() ) && getGoId().equals( gt.getGoId() );
+        }
+    }
+
+    public List<GoId> getAltIds() {
+        return _alt_ids;
+    }
+
+    @Override
+    public String getComment() {
+        return _comment;
+    }
+
+    @Override
+    public String getDefinition() {
+        return _definition;
+    }
+
+    public GoId getGoId() {
+        return _id;
+    }
+
+    public GoNameSpace getGoNameSpace() {
+        return _namespace;
+    }
+
+    @Override
+    public List<GoRelationship> getGoRelationships() {
+        return _go_relationships;
+    }
+
+    @Override
+    public List<GoSubset> getGoSubsets() {
+        return _go_subsets;
+    }
+
+    public List<GoXRef> getGoXRefs() {
+        return _go_xrefs;
+    }
+
+    public String getName() {
+        return _name;
+    }
+
+    public List<GoId> getSuperGoIds() {
+        return _super_go_ids;
+    }
+
+    /**
+     * Hashcode is based on hashcode of GO id.
+     * 
+     * 
+     */
+    @Override
+    public int hashCode() {
+        return getGoId().hashCode();
+    }
+
+    private void init() {
+        setGoXrefs( new ArrayList<GoXRef>() );
+        setSuperTerms( new ArrayList<GoId>() );
+        setAltIds( new ArrayList<GoId>() );
+        setGoRelationships( new ArrayList<GoRelationship>() );
+        setGoSubsets( new ArrayList<GoSubset>() );
+        setDefinition( "" );
+        setComment( "" );
+    }
+
+    public boolean isEqual( final PhylogenyData go_term ) {
+        return equals( go_term );
+    }
+
+    public boolean isObsolete() {
+        return _is_obsolete;
+    }
+
+    private void setAltIds( final List<GoId> alt_ids ) {
+        _alt_ids = alt_ids;
+    }
+
+    public void setComment( final String comment ) {
+        _comment = comment;
+    }
+
+    public void setDefinition( final String definition ) {
+        _definition = definition;
+    }
+
+    private void setGoRelationships( final List<GoRelationship> go_relationships ) {
+        _go_relationships = go_relationships;
+    }
+
+    public void setGoSubsets( final List<GoSubset> go_subsets ) {
+        _go_subsets = go_subsets;
+    }
+
+    private void setGoXrefs( final List<GoXRef> xrefs ) {
+        _go_xrefs = xrefs;
+    }
+
+    private void setSuperTerms( final List<GoId> super_terms ) {
+        _super_go_ids = super_terms;
+    }
+
+    public StringBuffer toNHX() {
+        throw new UnsupportedOperationException();
+    }
+
+    public void toPhyloXML( final Writer writer, final int level, final String indentation ) throws IOException {
+        throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public String toString() {
+        final StringBuffer sb = new StringBuffer();
+        sb.append( getGoId() );
+        sb.append( ": " );
+        sb.append( getName() );
+        sb.append( " [" );
+        sb.append( getGoNameSpace() );
+        sb.append( "]" );
+        if ( isObsolete() ) {
+            sb.append( " [is obsolete]" );
+        }
+        return sb.toString();
+    }
+}
diff --git a/forester/java/src/org/forester/go/BasicGoXRef.java b/forester/java/src/org/forester/go/BasicGoXRef.java

new file mode 100644 (file)

index 0000000..8d5b4ae
--- /dev/null
+++ b/forester/java/src/org/forester/go/BasicGoXRef.java
@@ -0,0 +1,182 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.go;
+
+public class BasicGoXRef implements GoXRef {
+
+    private final String _xref;
+    private final Type   _type;
+
+    public BasicGoXRef( final String s ) {
+        final String[] sa = s.split( ":" );
+        if ( sa.length < 2 ) {
+            throw new IllegalArgumentException( "unexpected format for GO xref: " + s );
+        }
+        final String type = sa[ 0 ].trim();
+        if ( type.equals( EC_STR ) ) {
+            _type = Type.EC;
+        }
+        else if ( type.equals( META_CYC_STR ) ) {
+            _type = Type.META_CYC;
+        }
+        else if ( type.equals( REACTOME_STR ) ) {
+            _type = Type.REACTOME;
+        }
+        else if ( type.equals( RESID_STR ) ) {
+            _type = Type.RESID;
+        }
+        else if ( type.equals( UM_BBD_ENZYME_ID_STR ) ) {
+            _type = Type.UM_BBD_ENZYME_ID;
+        }
+        else if ( type.equals( UM_BBD_PATHWAY_ID_STR ) ) {
+            _type = Type.UM_BBD_PATHWAY_ID;
+        }
+        else if ( type.equals( UM_BBD_REACTIONID_STR ) ) {
+            _type = Type.UM_BBD_REACTIONID;
+        }
+        else if ( type.equals( TC_STR ) ) {
+            _type = Type.TC;
+        }
+        else if ( type.equals( ARACYC_STR ) ) {
+            _type = Type.ARACYC;
+        }
+        else if ( type.equals( XX_STR ) ) {
+            _type = Type.XX;
+        }
+        else if ( type.equals( PMID_STR ) ) {
+            _type = Type.PMID;
+        }
+        else if ( type.equals( IMG_STR ) ) {
+            _type = Type.IMG;
+        }
+        else if ( type.equals( GOC_STR ) ) {
+            _type = Type.GOC;
+        }
+        else if ( type.equals( KEGG_STR ) ) {
+            _type = Type.KEGG;
+        }
+        else if ( type.equals( WIKIPEDIA_STR ) ) {
+            _type = Type.WIKIPEDIA;
+        }
+        else {
+            throw new IllegalArgumentException( "unknown GO xref type: " + type );
+        }
+        _xref = sa[ 1 ].trim();
+    }
+
+    public BasicGoXRef( final Type type, final String xref ) {
+        _type = type;
+        _xref = xref;
+    }
+
+    public int compareTo( final GoXRef xref ) {
+        return getXRef().compareTo( xref.getXRef() );
+    }
+
+    /**
+     * Based on value and type.
+     * 
+     * 
+     */
+    @Override
+    public boolean equals( final Object o ) {
+        if ( this == o ) {
+            return true;
+        }
+        else if ( o == null ) {
+            throw new IllegalArgumentException( "attempt to check go xref equality to null" );
+        }
+        else if ( o.getClass() != this.getClass() ) {
+            throw new IllegalArgumentException( "attempt to check go xref equality to " + o + " [" + o.getClass() + "]" );
+        }
+        else {
+            return getXRef().equals( ( ( GoXRef ) o ).getXRef() ) && getType().equals( ( ( GoXRef ) o ).getType() );
+        }
+    }
+
+    public Type getType() {
+        return _type;
+    }
+
+    @Override
+    public String getXRef() {
+        return _xref;
+    }
+
+    @Override
+    public String toString() {
+        final StringBuffer sb = new StringBuffer();
+        switch ( getType() ) {
+            case EC:
+                sb.append( EC_STR );
+                break;
+            case META_CYC:
+                sb.append( META_CYC_STR );
+                break;
+            case REACTOME:
+                sb.append( REACTOME_STR );
+                break;
+            case RESID:
+                sb.append( RESID_STR );
+                break;
+            case UM_BBD_ENZYME_ID:
+                sb.append( UM_BBD_ENZYME_ID_STR );
+                break;
+            case UM_BBD_PATHWAY_ID:
+                sb.append( UM_BBD_PATHWAY_ID_STR );
+                break;
+            case UM_BBD_REACTIONID:
+                sb.append( UM_BBD_REACTIONID_STR );
+                break;
+            case TC:
+                sb.append( TC_STR );
+                break;
+            case ARACYC:
+                sb.append( ARACYC_STR );
+                break;
+            case XX:
+                sb.append( XX_STR );
+                break;
+            case GOC:
+                sb.append( GOC_STR );
+                break;
+            case IMG:
+                sb.append( IMG_STR );
+                break;
+            case PMID:
+                sb.append( PMID_STR );
+                break;
+            case WIKIPEDIA:
+                sb.append( WIKIPEDIA_STR );
+                break;
+            default:
+                new AssertionError( "unknown type: " + getType() );
+        }
+        sb.append( ":" );
+        sb.append( getXRef() );
+        return sb.toString();
+    }
+}
diff --git a/forester/java/src/org/forester/go/GoId.java b/forester/java/src/org/forester/go/GoId.java

new file mode 100644 (file)

index 0000000..3ba3fe5
--- /dev/null
+++ b/forester/java/src/org/forester/go/GoId.java
@@ -0,0 +1,83 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.go;
+
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+public class GoId implements Comparable<GoId> {
+
+    private static final int     SIZE       = 10;
+    private static final String  GO_PREFIX  = "GO:";
+    private static final String  GO_FORMAT  = GO_PREFIX + "\\d{7}";
+    private static final Pattern GO_PATTERN = Pattern.compile( GO_FORMAT );
+    private final String         _id;
+
+    public GoId( final String id ) {
+        if ( id.length() != SIZE ) {
+            throw new IllegalArgumentException( "unexpected format for GO id: " + id );
+        }
+        final Matcher m = GO_PATTERN.matcher( id );
+        if ( !m.matches() ) {
+            throw new IllegalArgumentException( "unexpected format for GO id: " + id );
+        }
+        _id = id.substring( 3 );
+    }
+
+    public int compareTo( final GoId id ) {
+        return getId().compareTo( id.getId() );
+    }
+
+    @Override
+    public boolean equals( final Object o ) {
+        if ( this == o ) {
+            return true;
+        }
+        else if ( o == null ) {
+            throw new IllegalArgumentException( "attempt to check go id equality to null" );
+        }
+        else if ( o.getClass() != this.getClass() ) {
+            throw new IllegalArgumentException( "attempt to check go id equality to " + o + " [" + o.getClass() + "]" );
+        }
+        else {
+            return getId().equals( ( ( GoId ) o ).getId() );
+        }
+    }
+
+    public String getId() {
+        return GO_PREFIX + _id;
+    }
+
+    @Override
+    public int hashCode() {
+        return getId().hashCode();
+    }
+
+    @Override
+    public String toString() {
+        return getId();
+    }
+}
diff --git a/forester/java/src/org/forester/go/GoNameSpace.java b/forester/java/src/org/forester/go/GoNameSpace.java

new file mode 100644 (file)

index 0000000..fa76552
--- /dev/null
+++ b/forester/java/src/org/forester/go/GoNameSpace.java
@@ -0,0 +1,141 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.go;
+
+public class GoNameSpace {
+
+    public final String           MOLECULAR_FUNCTION_STR = "molecular_function";
+    public final String           BIOLOGICAL_PROCESS_STR = "biological_process";
+    public final String           CELLULAR_COMPONENT_STR = "cellular_component";
+    public final String           UNASSIGNED_STR         = "unassigned";
+    private final GoNamespaceType _type;
+
+    public GoNameSpace( final GoNamespaceType type ) {
+        _type = type;
+    };
+
+    public GoNameSpace( final String type ) {
+        if ( type.toLowerCase().equals( MOLECULAR_FUNCTION_STR ) ) {
+            _type = GoNamespaceType.MOLECULAR_FUNCTION;
+        }
+        else if ( type.toLowerCase().equals( BIOLOGICAL_PROCESS_STR ) ) {
+            _type = GoNamespaceType.BIOLOGICAL_PROCESS;
+        }
+        else if ( type.toLowerCase().equals( CELLULAR_COMPONENT_STR ) ) {
+            _type = GoNamespaceType.CELLULAR_COMPONENT;
+        }
+        else if ( type.toLowerCase().equals( UNASSIGNED_STR ) ) {
+            _type = GoNamespaceType.UNASSIGNED;
+        }
+        else {
+            throw new IllegalArgumentException( "unknown GO namespace: " + type );
+        }
+    }
+
+    @Override
+    public boolean equals( final Object o ) {
+        if ( this == o ) {
+            return true;
+        }
+        else if ( ( o == null ) || ( o.getClass() != this.getClass() ) ) {
+            return false;
+        }
+        else {
+            return getType() == ( ( GoNameSpace ) o ).getType();
+        }
+    }
+
+    public GoNamespaceType getType() {
+        return _type;
+    }
+
+    public boolean isBiologicalProcess() {
+        return getType() == GoNamespaceType.BIOLOGICAL_PROCESS;
+    }
+
+    public boolean isCellularComponent() {
+        return getType() == GoNamespaceType.CELLULAR_COMPONENT;
+    }
+
+    public boolean isMolecularFunction() {
+        return getType() == GoNamespaceType.MOLECULAR_FUNCTION;
+    }
+
+    public boolean isUnassigned() {
+        return getType() == GoNamespaceType.UNASSIGNED;
+    }
+
+    public String toShortString() {
+        switch ( _type ) {
+            case BIOLOGICAL_PROCESS:
+                return ( "B" );
+            case CELLULAR_COMPONENT:
+                return ( "C" );
+            case MOLECULAR_FUNCTION:
+                return ( "M" );
+            case UNASSIGNED:
+                return ( "?" );
+            default:
+                throw new RuntimeException();
+        }
+    }
+
+    @Override
+    public String toString() {
+        switch ( _type ) {
+            case BIOLOGICAL_PROCESS:
+                return ( BIOLOGICAL_PROCESS_STR );
+            case CELLULAR_COMPONENT:
+                return ( CELLULAR_COMPONENT_STR );
+            case MOLECULAR_FUNCTION:
+                return ( MOLECULAR_FUNCTION_STR );
+            case UNASSIGNED:
+                return ( UNASSIGNED_STR );
+            default:
+                throw new RuntimeException();
+        }
+    }
+
+    public static GoNameSpace createBiologicalProcess() {
+        return new GoNameSpace( GoNamespaceType.BIOLOGICAL_PROCESS );
+    }
+
+    public static GoNameSpace createCellularComponent() {
+        return new GoNameSpace( GoNamespaceType.CELLULAR_COMPONENT );
+    }
+
+    public static GoNameSpace createMolecularFunction() {
+        return new GoNameSpace( GoNamespaceType.MOLECULAR_FUNCTION );
+    }
+
+    public static GoNameSpace createUnassigned() {
+        return new GoNameSpace( GoNamespaceType.UNASSIGNED );
+    }
+
+    public static enum GoNamespaceType {
+        MOLECULAR_FUNCTION, BIOLOGICAL_PROCESS, CELLULAR_COMPONENT, UNASSIGNED;
+    }
+}
diff --git a/forester/java/src/org/forester/go/GoRelationship.java b/forester/java/src/org/forester/go/GoRelationship.java

new file mode 100644 (file)

index 0000000..d7f5e79
--- /dev/null
+++ b/forester/java/src/org/forester/go/GoRelationship.java
@@ -0,0 +1,42 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.go;
+
+public interface GoRelationship extends Comparable<GoRelationship> {
+
+    public static final String PART_OF_STR              = "part_of";
+    public static final String REGULATES_STR            = "regulates";
+    public static final String NEGATIVELY_REGULATES_STR = "negatively_regulates";
+    public static final String POSITIVELY_REGULATES_STR = "positively_regulates";
+
+    public GoId getGoId();
+
+    public Type getType();
+
+    public static enum Type {
+        PART_OF, REGULATES, NEGATIVELY_REGULATES, POSITIVELY_REGULATES;
+    }
+}
diff --git a/forester/java/src/org/forester/go/GoSubset.java b/forester/java/src/org/forester/go/GoSubset.java

new file mode 100644 (file)

index 0000000..c963217
--- /dev/null
+++ b/forester/java/src/org/forester/go/GoSubset.java
@@ -0,0 +1,44 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.go;
+
+public interface GoSubset extends Comparable<GoSubset> {
+
+    public static final String GOSLIM_GENERIC_STR = "goslim_generic";
+    public static final String GOSLIM_GOA_STR     = "goslim_goa";
+    public static final String GOSLIM_PIR_STR     = "goslim_pir";
+    public static final String GOSUBSET_PROK_STR  = "gosubset_prok";
+    public static final String GOSLIM_CANDIDA_STR = "goslim_candida";
+    public static final String GOSLIM_PLANT_STR   = "goslim_plant";
+    public static final String GOSLIM_YEAST_STR   = "goslim_yeast";
+    public static final String GOSLIM_POMBE_STR   = "goslim_pombe";
+
+    public Type getType();
+
+    public static enum Type {
+        GOSLIM_GENERIC, GOSLIM_GOA, GOSLIM_PIR, GOSUBSET_PROK, GOSLIM_CANDIDA, GOSLIM_PLANT, GOSLIM_YEAST, GOSLIM_POMBE;
+    }
+}
diff --git a/forester/java/src/org/forester/go/GoTerm.java b/forester/java/src/org/forester/go/GoTerm.java

new file mode 100644 (file)

index 0000000..7068ab6
--- /dev/null
+++ b/forester/java/src/org/forester/go/GoTerm.java
@@ -0,0 +1,55 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.go;
+
+import java.util.List;
+
+import org.forester.phylogeny.data.PhylogenyData;
+
+public interface GoTerm extends PhylogenyData, Comparable<GoTerm> {
+
+    public List<GoId> getAltIds();
+
+    public String getComment();
+
+    public String getDefinition();
+
+    public GoId getGoId();
+
+    public GoNameSpace getGoNameSpace();
+
+    public List<GoRelationship> getGoRelationships();
+
+    public List<GoSubset> getGoSubsets();
+
+    public List<GoXRef> getGoXRefs();
+
+    public String getName();
+
+    public List<GoId> getSuperGoIds();
+
+    public boolean isObsolete();
+}
diff --git a/forester/java/src/org/forester/go/GoUtils.java b/forester/java/src/org/forester/go/GoUtils.java

new file mode 100644 (file)

index 0000000..680489a
--- /dev/null
+++ b/forester/java/src/org/forester/go/GoUtils.java
@@ -0,0 +1,221 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.go;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.SortedMap;
+import java.util.SortedSet;
+import java.util.TreeMap;
+import java.util.TreeSet;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.forester.util.ForesterUtil;
+
+public final class GoUtils {
+
+    private GoUtils() {
+    }
+
+    /**
+     * This is for counting the how many times each GO term in 'categories'
+     * is a (direct or indirect) super term of the GO terms in 'experiment_set'. 
+     * 
+     * 
+     * @param categories the set of super terms to be counted
+     * @param experiment_set the list of GO terms to be analyzed
+     * @param all_go_terms all terms in the ontology
+     * @return
+     */
+    public static LinkedHashMap<GoId, Integer> countCategories( final List<GoTerm> categories,
+                                                                final List<GoTerm> experiment_set,
+                                                                final Map<GoId, GoTerm> all_go_terms ) {
+        final LinkedHashMap<GoId, Integer> counts = new LinkedHashMap<GoId, Integer>();
+        for( final GoTerm experiment_term : experiment_set ) {
+            final Set<GoTerm> super_terms = getAllSuperGoTerms( experiment_term.getGoId(), all_go_terms );
+            super_terms.add( experiment_term );
+            for( final GoTerm cat : categories ) {
+                if ( !counts.containsKey( cat.getGoId() ) ) {
+                    counts.put( cat.getGoId(), 0 );
+                }
+                if ( super_terms.contains( cat ) ) {
+                    counts.put( cat.getGoId(), 1 + counts.get( cat.getGoId() ) );
+                }
+            }
+        }
+        return counts;
+    }
+
+    public static LinkedHashMap<GoId, Integer> countCategoriesId( final List<GoId> categories,
+                                                                  final List<GoId> experiment_set,
+                                                                  final Map<GoId, GoTerm> all_go_terms ) {
+        final LinkedHashMap<GoId, Integer> counts = new LinkedHashMap<GoId, Integer>();
+        for( final GoId experiment_id : experiment_set ) {
+            final Set<GoId> super_ids = new HashSet<GoId>();
+            for( final GoTerm term : getAllSuperGoTerms( experiment_id, all_go_terms ) ) {
+                super_ids.add( term.getGoId() );
+            }
+            super_ids.add( experiment_id );
+            for( final GoId cat : categories ) {
+                if ( !counts.containsKey( cat ) ) {
+                    counts.put( cat, 0 );
+                }
+                if ( super_ids.contains( cat ) ) {
+                    counts.put( cat, 1 + counts.get( cat ) );
+                }
+            }
+        }
+        return counts;
+    }
+
+    public static Map<GoId, GoTerm> createGoIdToGoTermMap( final List<GoTerm> go_terms ) {
+        final Map<GoId, GoTerm> go_id_to_term_map = new HashMap<GoId, GoTerm>();
+        for( final GoTerm go_term : go_terms ) {
+            go_id_to_term_map.put( go_term.getGoId(), go_term );
+            for( final GoId alt_id : go_term.getAltIds() ) {
+                go_id_to_term_map.put( alt_id, go_term );
+            }
+        }
+        return go_id_to_term_map;
+    }
+
+    public static SortedSet<GoId> getAllSuperGoIds( final GoId go_id, final Map<GoId, GoTerm> goid_to_term_map ) {
+        final SortedSet<GoId> ids = new TreeSet<GoId>();
+        final SortedSet<GoTerm> terms = GoUtils.getAllSuperGoTerms( go_id, goid_to_term_map );
+        for( final GoTerm term : terms ) {
+            ids.add( term.getGoId() );
+        }
+        return ids;
+    }
+
+    public static SortedSet<GoTerm> getAllSuperGoTerms( final GoId go_id, final List<GoTerm> go_terms ) {
+        final Map<GoId, GoTerm> goid_to_term_map = GoUtils.createGoIdToGoTermMap( go_terms );
+        return getAllSuperGoTerms( go_id, goid_to_term_map );
+    }
+
+    public static SortedSet<GoTerm> getAllSuperGoTerms( final GoId go_id, final Map<GoId, GoTerm> goid_to_term_map ) {
+        if ( !goid_to_term_map.containsKey( go_id ) ) {
+            throw new IllegalArgumentException( "GO id [" + go_id + "] not found in GO id to term map" );
+        }
+        final GoTerm go_term = goid_to_term_map.get( go_id );
+        return getAllSuperGoTerms( go_term, goid_to_term_map );
+    }
+
+    public static SortedSet<GoTerm> getAllSuperGoTerms( final GoTerm go_term, final Map<GoId, GoTerm> goid_to_term_map ) {
+        final SortedSet<GoTerm> supers = new TreeSet<GoTerm>();
+        getAllSuperGoTerms( go_term, goid_to_term_map, supers );
+        return supers;
+    }
+
+    private static void getAllSuperGoTerms( final GoTerm go_term,
+                                            final Map<GoId, GoTerm> goid_to_term_map,
+                                            final Set<GoTerm> supers ) {
+        if ( ( go_term.getSuperGoIds() != null ) && ( go_term.getSuperGoIds().size() > 0 ) ) {
+            for( final GoId super_go_id : go_term.getSuperGoIds() ) {
+                if ( !goid_to_term_map.containsKey( super_go_id ) ) {
+                    throw new IllegalArgumentException( "GO id [" + super_go_id + "] not found in GO id to term map" );
+                }
+                final GoTerm super_go_term = goid_to_term_map.get( super_go_id );
+                supers.add( super_go_term );
+                getAllSuperGoTerms( super_go_term, goid_to_term_map, supers );
+            }
+        }
+    }
+
+    public static GoTerm getPenultimateGoTerm( final GoTerm go_term, final Map<GoId, GoTerm> map ) {
+        GoTerm my_go_term = go_term;
+        GoTerm penultimate = my_go_term;
+        while ( ( my_go_term.getSuperGoIds() != null ) && ( my_go_term.getSuperGoIds().size() > 0 ) ) {
+            penultimate = my_go_term;
+            if ( !map.containsKey( my_go_term.getSuperGoIds().get( 0 ) ) ) {
+                throw new IllegalArgumentException( "GO-id [" + my_go_term.getSuperGoIds().get( 0 )
+                        + "] not found in map" );
+            }
+            my_go_term = map.get( my_go_term.getSuperGoIds().get( 0 ) );
+        }
+        return penultimate;
+    }
+
+    public static GoTerm getUltimateGoTerm( final GoTerm go_term, final Map<GoId, GoTerm> map ) {
+        GoTerm my_go_term = go_term;
+        while ( ( my_go_term.getSuperGoIds() != null ) && ( my_go_term.getSuperGoIds().size() > 0 ) ) {
+            if ( !map.containsKey( my_go_term.getSuperGoIds().get( 0 ) ) ) {
+                throw new IllegalArgumentException( "GO-id [" + my_go_term.getSuperGoIds().get( 0 )
+                        + "] not found in map" );
+            }
+            my_go_term = map.get( my_go_term.getSuperGoIds().get( 0 ) );
+        }
+        return my_go_term;
+    }
+
+    public static SortedMap<String, List<GoId>> parseGoIds( final Object source,
+                                                            final String start_of_comment_line,
+                                                            final String start_of_label_line ) throws IOException {
+        final Pattern label_matcher = Pattern.compile( start_of_label_line + "\\s*(.+?)" );
+        final BufferedReader reader = ForesterUtil.obtainReader( source );
+        final SortedMap<String, List<GoId>> results = new TreeMap<String, List<GoId>>();
+        String line = "";
+        String label = "";
+        final boolean use_label = !ForesterUtil.isEmpty( start_of_label_line );
+        final boolean use_comment = !ForesterUtil.isEmpty( start_of_comment_line );
+        List<GoId> current_list = new ArrayList<GoId>();
+        while ( ( line = reader.readLine() ) != null ) {
+            line = line.trim();
+            if ( ForesterUtil.isEmpty( line ) || ( use_comment && line.startsWith( start_of_comment_line ) ) ) {
+                continue;
+            }
+            else if ( use_label && line.startsWith( start_of_label_line ) ) {
+                final Matcher matcher = label_matcher.matcher( line );
+                if ( matcher.matches() ) {
+                    if ( !ForesterUtil.isEmpty( label ) ) {
+                        results.put( label, current_list );
+                        current_list = new ArrayList<GoId>();
+                    }
+                    label = matcher.group( 1 );
+                }
+            }
+            else {
+                final String[] s = line.split( "\\s+" );
+                final GoId id = new GoId( s[ 0 ] );
+                current_list.add( id );
+            }
+        }
+        if ( ForesterUtil.isEmpty( label ) ) {
+            label = "";
+        }
+        results.put( label, current_list );
+        reader.close();
+        return results;
+    }
+}
diff --git a/forester/java/src/org/forester/go/GoXRef.java b/forester/java/src/org/forester/go/GoXRef.java

new file mode 100644 (file)

index 0000000..fe96c4b
--- /dev/null
+++ b/forester/java/src/org/forester/go/GoXRef.java
@@ -0,0 +1,67 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.go;
+
+public interface GoXRef extends Comparable<GoXRef> {
+
+    public static final String EC_STR                = "EC";
+    public static final String META_CYC_STR          = "MetaCyc";
+    public static final String REACTOME_STR          = "Reactome";
+    public static final String RESID_STR             = "RESID";
+    public static final String UM_BBD_ENZYME_ID_STR  = "UM-BBD_enzymeID";
+    public static final String UM_BBD_PATHWAY_ID_STR = "UM-BBD_pathwayID";
+    public static final String UM_BBD_REACTIONID_STR = "UM-BBD_reactionID";
+    public static final String TC_STR                = "TC";
+    public static final String ARACYC_STR            = "AraCyc";
+    public static final String XX_STR                = "XX";
+    public static final String PMID_STR              = "PMID";
+    public static final String IMG_STR               = "IMG";
+    public static final String GOC_STR               = "GOC";
+    public static final String WIKIPEDIA_STR         = "Wikipedia";
+    public static final String KEGG_STR              = "KEGG";
+
+    public Type getType();
+
+    public String getXRef();
+
+    public static enum Type {
+        EC,
+        META_CYC,
+        REACTOME,
+        RESID,
+        UM_BBD_ENZYME_ID,
+        UM_BBD_PATHWAY_ID,
+        UM_BBD_REACTIONID,
+        TC,
+        ARACYC,
+        XX,
+        PMID,
+        IMG,
+        GOC,
+        WIKIPEDIA,
+        KEGG;
+    }
+}
diff --git a/forester/java/src/org/forester/go/Mapping.java b/forester/java/src/org/forester/go/Mapping.java

new file mode 100644 (file)

index 0000000..9a2fbb5
--- /dev/null
+++ b/forester/java/src/org/forester/go/Mapping.java
@@ -0,0 +1,33 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.go;
+
+public interface Mapping extends Comparable<Mapping> {
+
+    public Object getKey();
+
+    public GoId getValue();
+}
diff --git a/forester/java/src/org/forester/go/OBOparser.java b/forester/java/src/org/forester/go/OBOparser.java

new file mode 100644 (file)

index 0000000..b79e172
--- /dev/null
+++ b/forester/java/src/org/forester/go/OBOparser.java
@@ -0,0 +1,271 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.go;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+import org.forester.util.ForesterUtil;
+
+public class OBOparser {
+
+    private final File       _input_file;    ;
+    private final ReturnType _return_type;
+    private int              _go_term_count;
+
+    public OBOparser( final File input_file, final ReturnType return_type ) {
+        switch ( return_type ) {
+            case BASIC_GO_TERM:
+                break;
+            default:
+                throw new IllegalArgumentException( "unknown return type: " + return_type );
+        }
+        _input_file = input_file;
+        _return_type = return_type;
+        init();
+    }
+
+    private GoTerm createNewBasicGoTerm( final String id,
+                                         final String name,
+                                         final String namespace,
+                                         final String is_obsolete,
+                                         final String comment,
+                                         final String definition,
+                                         final Set<String> alt_ids,
+                                         final List<GoXRef> go_xrefs,
+                                         final List<GoId> super_go_ids,
+                                         final List<GoRelationship> go_relationships,
+                                         final List<GoSubset> go_subsets ) {
+        final GoTerm gt = new BasicGoTerm( id, name, namespace, is_obsolete.trim().toLowerCase().equals( "true" ) );
+        ( ( BasicGoTerm ) gt ).setComment( comment );
+        ( ( BasicGoTerm ) gt ).setDefinition( definition );
+        for( final GoXRef x : go_xrefs ) {
+            gt.getGoXRefs().add( x );
+        }
+        for( final GoId s : super_go_ids ) {
+            gt.getSuperGoIds().add( s );
+        }
+        for( final GoRelationship r : go_relationships ) {
+            gt.getGoRelationships().add( r );
+        }
+        for( final GoSubset sub : go_subsets ) {
+            gt.getGoSubsets().add( sub );
+        }
+        for( final String alt_id : alt_ids ) {
+            gt.getAltIds().add( new GoId( alt_id ) );
+        }
+        ++_go_term_count;
+        return gt;
+    }
+
+    private void createNewGoTerm( final List<GoTerm> go_terms,
+                                  final String id,
+                                  final String name,
+                                  final String namespace,
+                                  final String is_obsolete,
+                                  final String comment,
+                                  final String definition,
+                                  final Set<String> alt_ids,
+                                  final List<GoXRef> go_xrefs,
+                                  final List<GoId> super_go_ids,
+                                  final List<GoRelationship> go_relationships,
+                                  final List<GoSubset> go_subsets ) {
+        GoTerm gt;
+        switch ( getReturnType() ) {
+            case BASIC_GO_TERM:
+                gt = createNewBasicGoTerm( id,
+                                           name,
+                                           namespace,
+                                           is_obsolete,
+                                           comment,
+                                           definition,
+                                           alt_ids,
+                                           go_xrefs,
+                                           super_go_ids,
+                                           go_relationships,
+                                           go_subsets );
+                break;
+            default:
+                throw new AssertionError( "unknown return type: " + getReturnType() );
+        }
+        go_terms.add( gt );
+    }
+
+    public int getGoTermCount() {
+        return _go_term_count;
+    }
+
+    private File getInputFile() {
+        return _input_file;
+    }
+
+    private ReturnType getReturnType() {
+        return _return_type;
+    }
+
+    private void init() {
+        setGoTermCount( 0 );
+    }
+
+    public List<GoTerm> parse() throws IOException {
+        final String error = ForesterUtil.isReadableFile( getInputFile() );
+        if ( !ForesterUtil.isEmpty( error ) ) {
+            throw new IOException( error );
+        }
+        final BufferedReader br = new BufferedReader( new FileReader( getInputFile() ) );
+        String line;
+        final List<GoTerm> go_terms = new ArrayList<GoTerm>();
+        int line_number = 0;
+        boolean in_term = false;
+        String id = "";
+        String name = "";
+        String namespace = "";
+        String def = "";
+        String comment = "";
+        String is_obsolete = "";
+        HashSet<String> alt_ids = new HashSet<String>();
+        List<GoId> super_go_ids = new ArrayList<GoId>();
+        List<GoXRef> go_xrefs = new ArrayList<GoXRef>();
+        List<GoRelationship> go_relationships = new ArrayList<GoRelationship>();
+        List<GoSubset> go_subsets = new ArrayList<GoSubset>();
+        try {
+            while ( ( line = br.readLine() ) != null ) {
+                line_number++;
+                line = line.trim();
+                if ( line.length() < 1 ) {
+                    if ( in_term ) {
+                        in_term = false;
+                    }
+                }
+                else if ( line.startsWith( "[Term]" ) ) {
+                    in_term = true;
+                    if ( id.length() > 0 ) {
+                        createNewGoTerm( go_terms,
+                                         id,
+                                         name,
+                                         namespace,
+                                         is_obsolete,
+                                         comment,
+                                         def,
+                                         alt_ids,
+                                         go_xrefs,
+                                         super_go_ids,
+                                         go_relationships,
+                                         go_subsets );
+                    }
+                    id = "";
+                    name = "";
+                    namespace = "";
+                    alt_ids = new HashSet<String>();
+                    def = "";
+                    comment = "";
+                    is_obsolete = "";
+                    super_go_ids = new ArrayList<GoId>();
+                    go_xrefs = new ArrayList<GoXRef>();
+                    go_relationships = new ArrayList<GoRelationship>();
+                    go_subsets = new ArrayList<GoSubset>();
+                }
+                else if ( in_term && line.startsWith( "id:" ) ) {
+                    id = line.substring( 3 ).trim();
+                }
+                else if ( in_term && line.startsWith( "name:" ) ) {
+                    name = line.substring( 5 ).trim();
+                }
+                else if ( in_term && line.startsWith( "namespace:" ) ) {
+                    namespace = line.substring( 10 ).trim();
+                }
+                else if ( in_term && line.startsWith( "alt_id:" ) ) {
+                    alt_ids.add( line.substring( 7 ).trim() );
+                }
+                else if ( in_term && line.startsWith( "def:" ) ) {
+                    def = line.substring( 4 ).trim();
+                }
+                else if ( in_term && line.startsWith( "is_obsolete:" ) ) {
+                    is_obsolete = line.substring( 12 ).trim();
+                }
+                else if ( in_term && line.startsWith( "comment:" ) ) {
+                    comment = line.substring( 8 ).trim();
+                }
+                else if ( in_term && line.startsWith( "xref:" ) ) {
+                    final String s = trimOffComment( line.substring( 5 ).trim() );
+                    go_xrefs.add( new BasicGoXRef( s ) );
+                }
+                else if ( in_term && line.startsWith( "is_a:" ) ) {
+                    final String s = trimOffComment( line.substring( 5 ).trim() );
+                    super_go_ids.add( new GoId( s ) );
+                }
+                else if ( in_term && line.startsWith( "relationship:" ) ) {
+                    final String s = trimOffComment( line.substring( 13 ).trim() );
+                    go_relationships.add( new BasicGoRelationship( s ) );
+                }
+                else if ( in_term && line.startsWith( "subset:" ) ) {
+                    final String s = line.substring( 8 ).trim();
+                    go_subsets.add( new BasicGoSubset( s ) );
+                }
+            } // while ( ( line = br.readLine() ) != null )
+        }
+        catch ( final Exception e ) {
+            throw new IOException( "parsing problem: " + e.getMessage() + " [at line " + line_number + "]" );
+        }
+        if ( id.length() > 0 ) {
+            createNewGoTerm( go_terms,
+                             id,
+                             name,
+                             namespace,
+                             is_obsolete,
+                             comment,
+                             def,
+                             alt_ids,
+                             go_xrefs,
+                             super_go_ids,
+                             go_relationships,
+                             go_subsets );
+        }
+        return go_terms;
+    }
+
+    private void setGoTermCount( final int go_term_count ) {
+        _go_term_count = go_term_count;
+    }
+
+    private String trimOffComment( String xref ) {
+        final int i = xref.indexOf( '!' );
+        if ( i > 0 ) {
+            xref = xref.substring( 0, xref.indexOf( '!' ) ).trim();
+        }
+        return xref;
+    }
+
+    public static enum ReturnType {
+        BASIC_GO_TERM
+    }
+}
diff --git a/forester/java/src/org/forester/go/PfamToGoMapping.java b/forester/java/src/org/forester/go/PfamToGoMapping.java

new file mode 100644 (file)

index 0000000..93ee62a
--- /dev/null
+++ b/forester/java/src/org/forester/go/PfamToGoMapping.java
@@ -0,0 +1,89 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.go;
+
+import org.forester.surfacing.DomainId;
+
+public class PfamToGoMapping implements Mapping {
+
+    private final DomainId _pfam_domain_id;
+    private final GoId     _go_id;
+
+    public PfamToGoMapping( final DomainId pfam_domain_id, final GoId go_id ) {
+        _pfam_domain_id = pfam_domain_id;
+        _go_id = go_id;
+    }
+
+    @Override
+    public int compareTo( final Mapping m ) {
+        if ( this == m ) {
+            return 0;
+        }
+        return getKey().compareTo( ( DomainId ) m.getKey() );
+    }
+
+    /**
+     * Based on key and value.
+     * 
+     * 
+     */
+    @Override
+    public boolean equals( final Object o ) {
+        if ( this == o ) {
+            return true;
+        }
+        else if ( o == null ) {
+            throw new IllegalArgumentException( "attempt to check pfam to go mapping equality to null" );
+        }
+        else if ( o.getClass() != this.getClass() ) {
+            throw new IllegalArgumentException( "attempt to check pfam to go mapping equality to " + o + " ["
+                    + o.getClass() + "]" );
+        }
+        else {
+            return getKey().equals( ( ( PfamToGoMapping ) o ).getKey() )
+                    && getValue().equals( ( ( PfamToGoMapping ) o ).getValue() );
+        }
+    }
+
+    @Override
+    public DomainId getKey() {
+        return _pfam_domain_id;
+    }
+
+    @Override
+    public GoId getValue() {
+        return _go_id;
+    }
+
+    @Override
+    public String toString() {
+        final StringBuffer sb = new StringBuffer();
+        sb.append( getKey().toString() );
+        sb.append( " > " );
+        sb.append( getValue().toString() );
+        return sb.toString();
+    }
+}
diff --git a/forester/java/src/org/forester/go/PfamToGoParser.java b/forester/java/src/org/forester/go/PfamToGoParser.java

new file mode 100644 (file)

index 0000000..11a6fc4
--- /dev/null
+++ b/forester/java/src/org/forester/go/PfamToGoParser.java
@@ -0,0 +1,100 @@
+
+package org.forester.go;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.forester.surfacing.DomainId;
+import org.forester.util.ForesterUtil;
+
+public class PfamToGoParser {
+
+    // Pfam:PF00001 7tm_1 > GO:rhodopsin-like receptor activity ; GO:0001584
+    private static final String  PFAM_TO_GO_FORMAT     = "Pfam:\\S+\\s+(\\S+)\\s*>\\s*GO:.+;\\s*(\\S+)";
+    private static final Pattern PFAM_TO_GO_PATTERN    = Pattern.compile( PFAM_TO_GO_FORMAT );
+    private static final String  PFAMACC_TO_GO_FORMAT  = "Pfam:(\\S+)\\s+\\S+\\s*>\\s*GO:.+;\\s*(\\S+)";
+    private static final Pattern PFAMACC_TO_GO_PATTERN = Pattern.compile( PFAMACC_TO_GO_FORMAT );
+    private final File           _input_file;
+    private int                  _mapping_count;
+    private boolean              _use_acc;
+
+    public PfamToGoParser( final File input_file ) {
+        _input_file = input_file;
+        init();
+    }
+
+    private File getInputFile() {
+        return _input_file;
+    }
+
+    public int getMappingCount() {
+        return _mapping_count;
+    }
+
+    private void init() {
+        setMappingCount( 0 );
+        setUseAccessors( false );
+    }
+
+    public boolean isUseAccessors() {
+        return _use_acc;
+    }
+
+    public List<PfamToGoMapping> parse() throws IOException {
+        final String error = ForesterUtil.isReadableFile( getInputFile() );
+        if ( !ForesterUtil.isEmpty( error ) ) {
+            throw new IOException( error );
+        }
+        final BufferedReader br = new BufferedReader( new FileReader( getInputFile() ) );
+        String line;
+        final List<PfamToGoMapping> mappings = new ArrayList<PfamToGoMapping>();
+        int line_number = 0;
+        try {
+            while ( ( line = br.readLine() ) != null ) {
+                line_number++;
+                line = line.trim();
+                if ( ( line.length() > 0 ) && !line.startsWith( "!" ) ) {
+                    Matcher m = null;
+                    if ( isUseAccessors() ) {
+                        m = PFAMACC_TO_GO_PATTERN.matcher( line );
+                    }
+                    else {
+                        m = PFAM_TO_GO_PATTERN.matcher( line );
+                    }
+                    if ( !m.matches() ) {
+                        throw new IOException( "unexpected format [\"" + line + "\"]" );
+                    }
+                    if ( m.groupCount() != 2 ) {
+                        throw new IOException( "unexpected format [\"" + line + "\"]" );
+                    }
+                    final String pfam = m.group( 1 );
+                    final String go = m.group( 2 );
+                    if ( ForesterUtil.isEmpty( pfam ) || ForesterUtil.isEmpty( go ) ) {
+                        throw new IOException( "unexpected format [\"" + line + "\"]" );
+                    }
+                    final PfamToGoMapping map = new PfamToGoMapping( new DomainId( pfam ), new GoId( go ) );
+                    ++_mapping_count;
+                    mappings.add( map );
+                }
+            } // while ( ( line = br.readLine() ) != null )
+        }
+        catch ( final Exception e ) {
+            throw new IOException( "parsing problem: " + e.getMessage() + " [at line " + line_number + "]" );
+        }
+        return mappings;
+    }
+
+    private void setMappingCount( final int mapping_count ) {
+        _mapping_count = mapping_count;
+    }
+
+    public void setUseAccessors( final boolean use_ids ) {
+        _use_acc = use_ids;
+    }
+}
diff --git a/forester/java/src/org/forester/go/TestGo.java b/forester/java/src/org/forester/go/TestGo.java

new file mode 100644 (file)

index 0000000..4fdcd29
--- /dev/null
+++ b/forester/java/src/org/forester/go/TestGo.java
@@ -0,0 +1,698 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.go;
+
+import java.io.File;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.SortedSet;
+
+import org.forester.surfacing.DomainId;
+import org.forester.util.ForesterUtil;
+
+public class TestGo {
+
+    private final static double ZERO_DIFF = 1.0E-9;
+
+    public static boolean isEqual( final double a, final double b ) {
+        return ( ( Math.abs( a - b ) ) < ZERO_DIFF );
+    }
+
+    public static boolean test( final File test_dir ) {
+        System.out.print( "  GO ID: " );
+        if ( !testGoId() ) {
+            System.out.println( "failed." );
+            return false;
+        }
+        System.out.println( "OK." );
+        System.out.print( "  Namespace: " );
+        if ( !testNamespace() ) {
+            System.out.println( "failed." );
+            return false;
+        }
+        System.out.println( "OK." );
+        System.out.print( "  Basic GO term: " );
+        if ( !testBasicGoTerm() ) {
+            System.out.println( "failed." );
+            return false;
+        }
+        System.out.println( "OK." );
+        System.out.print( "  OBO parser: " );
+        if ( !testOBOparser( test_dir ) ) {
+            System.out.println( "failed." );
+            return false;
+        }
+        System.out.println( "OK." );
+        System.out.print( "  Pfam to GO mapping: " );
+        if ( !testPfamToGoMapping() ) {
+            System.out.println( "failed." );
+            return false;
+        }
+        System.out.println( "OK." );
+        System.out.print( "  Pfam to GO parser: " );
+        if ( !testPfamToGoParser( test_dir ) ) {
+            System.out.println( "failed." );
+            return false;
+        }
+        System.out.println( "OK." );
+        System.out.print( "  Super terms: " );
+        if ( !testSuperTermGetting( test_dir ) ) {
+            System.out.println( "failed." );
+            return false;
+        }
+        System.out.println( "OK." );
+        System.out.print( "  Super term counting: " );
+        if ( !testSuperTermCounting( test_dir ) ) {
+            System.out.println( "failed." );
+            return false;
+        }
+        System.out.println( "OK." );
+        return true;
+    }
+
+    private static boolean testBasicGoTerm() {
+        try {
+            final GoTerm gt1 = new BasicGoTerm( "GO:0047579",
+                                                "4-hydroxymandelate oxidase activity",
+                                                "molecular_function",
+                                                false );
+            final GoTerm gt2 = new BasicGoTerm( "GO:0047579",
+                                                "4-hydroxymandelate oxidase activity",
+                                                "molecular_function",
+                                                false );
+            final GoTerm gt3 = new BasicGoTerm( "GO:0047579", "?", "molecular_function", true );
+            final GoTerm gt4 = new BasicGoTerm( "GO:0047579",
+                                                "4-hydroxymandelate oxidase activity",
+                                                "biological_process",
+                                                false );
+            final GoTerm gt5 = new BasicGoTerm( "GO:0047578",
+                                                "4-hydroxymandelate oxidase activity",
+                                                "molecular_function",
+                                                false );
+            if ( !gt1.equals( gt2 ) ) {
+                return false;
+            }
+            if ( !gt1.equals( gt3 ) ) {
+                return false;
+            }
+            if ( gt1.equals( gt4 ) ) {
+                return false;
+            }
+            if ( gt1.hashCode() != gt4.hashCode() ) {
+                return false;
+            }
+            if ( gt1.equals( gt5 ) ) {
+                return false;
+            }
+            final GoTerm gt6 = ( GoTerm ) gt5.copy();
+            if ( !gt6.equals( gt5 ) ) {
+                return false;
+            }
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace( System.out );
+            return false;
+        }
+        return true;
+    }
+
+    private static boolean testGoId() {
+        try {
+            final GoId id1 = new GoId( "GO:0042617" );
+            final GoId id2 = new GoId( "GO:0042630" );
+            final GoId id3 = new GoId( "GO:0042630" );
+            if ( id1.equals( id2 ) ) {
+                return false;
+            }
+            if ( !id2.equals( id3 ) ) {
+                return false;
+            }
+            if ( !id1.toString().equals( "GO:0042617" ) ) {
+                return false;
+            }
+            if ( id2.hashCode() != id3.hashCode() ) {
+                return false;
+            }
+            if ( id1.hashCode() == id2.hashCode() ) {
+                return false;
+            }
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace( System.out );
+            return false;
+        }
+        return true;
+    }
+
+    private static boolean testNamespace() {
+        try {
+            final GoNameSpace b = new GoNameSpace( "Biological_process" );
+            final GoNameSpace c = new GoNameSpace( "Cellular_Component" );
+            final GoNameSpace m = new GoNameSpace( "molecular_function" );
+            final GoNameSpace m2 = new GoNameSpace( GoNameSpace.GoNamespaceType.MOLECULAR_FUNCTION );
+            if ( b.equals( c ) ) {
+                return false;
+            }
+            if ( !m.equals( m2 ) ) {
+                return false;
+            }
+            if ( !b.toString().equals( "biological_process" ) ) {
+                return false;
+            }
+            if ( !c.toString().equals( "cellular_component" ) ) {
+                return false;
+            }
+            if ( !m.toString().equals( "molecular_function" ) ) {
+                return false;
+            }
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace( System.out );
+            return false;
+        }
+        return true;
+    }
+
+    private static boolean testOBOparser( final File test_dir ) {
+        try {
+            final OBOparser parser = new OBOparser( new File( test_dir + ForesterUtil.getFileSeparator() + "obo_test" ),
+                                                    OBOparser.ReturnType.BASIC_GO_TERM );
+            final List<GoTerm> go_terms = parser.parse();
+            if ( parser.getGoTermCount() != 26 ) {
+                return false;
+            }
+            final GoTerm g0 = go_terms.get( 0 );
+            final GoTerm g1 = go_terms.get( 1 );
+            final GoTerm g3 = go_terms.get( 2 );
+            final GoTerm g2 = go_terms.get( 25 );
+            if ( !g0.getComment().equals( "" ) ) {
+                return false;
+            }
+            if ( !g0
+                    .getDefinition()
+                    .equals( "\"The distribution of mitochondria, including the mitochondrial genome, into daughter cells after mitosis or meiosis, mediated by interactions between mitochondria and the cytoskeleton.\" [GOC:mcc, PMID:10873824, PMID:11389764]" ) ) {
+                return false;
+            }
+            if ( !g0.getGoId().getId().equals( "GO:0000001" ) ) {
+                return false;
+            }
+            if ( g0.getGoNameSpace().equals( GoNameSpace.GoNamespaceType.BIOLOGICAL_PROCESS ) ) {
+                return false;
+            }
+            if ( g0.getGoNameSpace().getType() != GoNameSpace.GoNamespaceType.BIOLOGICAL_PROCESS ) {
+                return false;
+            }
+            if ( g0.getGoRelationships().size() != 0 ) {
+                return false;
+            }
+            if ( g0.getGoXRefs().size() != 0 ) {
+                return false;
+            }
+            if ( !g0.getName().equals( "mitochondrion inheritance" ) ) {
+                return false;
+            }
+            if ( g0.getSuperGoIds().size() != 2 ) {
+                return false;
+            }
+            if ( !g0.isObsolete() ) {
+                return false;
+            }
+            if ( !g1.getComment().equals( "comment" ) ) {
+                return false;
+            }
+            if ( !g1
+                    .getDefinition()
+                    .equals( "\"The maintenance of the structure and integrity of the mitochondrial genome.\" [GOC:ai]" ) ) {
+                return false;
+            }
+            if ( !g1.getGoId().getId().equals( "GO:0000002" ) ) {
+                return false;
+            }
+            if ( g1.getGoNameSpace().equals( GoNameSpace.GoNamespaceType.BIOLOGICAL_PROCESS ) ) {
+                return false;
+            }
+            if ( g1.getGoNameSpace().getType() != GoNameSpace.GoNamespaceType.BIOLOGICAL_PROCESS ) {
+                return false;
+            }
+            if ( g1.getGoRelationships().size() != 1 ) {
+                return false;
+            }
+            if ( g1.getGoXRefs().size() != 5 ) {
+                return false;
+            }
+            if ( !g1.getName().equals( "mitochondrial genome maintenance" ) ) {
+                return false;
+            }
+            if ( g1.getSuperGoIds().size() != 1 ) {
+                return false;
+            }
+            if ( g1.isObsolete() ) {
+                return false;
+            }
+            if ( !g1.getGoXRefs().get( 0 ).equals( new BasicGoXRef( "EC:2.4.1.-" ) ) ) {
+                return false;
+            }
+            if ( !g1.getGoXRefs().get( 0 ).getXRef().equals( "2.4.1.-" ) ) {
+                return false;
+            }
+            if ( g1.getGoXRefs().get( 0 ).getType() != GoXRef.Type.EC ) {
+                return false;
+            }
+            if ( g1.getGoXRefs().get( 0 ).equals( new BasicGoXRef( "EC:2.4.1.1" ) ) ) {
+                return false;
+            }
+            if ( g1.getGoXRefs().get( 0 ).equals( new BasicGoXRef( "Reactome:2.4.1.-" ) ) ) {
+                return false;
+            }
+            if ( !g1.getGoXRefs().get( 1 ).equals( new BasicGoXRef( "Reactome:7672" ) ) ) {
+                return false;
+            }
+            if ( !g1.getGoXRefs().get( 2 ).equals( new BasicGoXRef( "MetaCyc:SIROHEME-FERROCHELAT-RXN" ) ) ) {
+                return false;
+            }
+            if ( !g1.getGoXRefs().get( 3 ).equals( new BasicGoXRef( "RESID:AA02376" ) ) ) {
+                return false;
+            }
+            if ( !g1.getGoXRefs().get( 4 ).equals( new BasicGoXRef( "UM-BBD_enzymeID:e0271" ) ) ) {
+                return false;
+            }
+            if ( !g1.getGoRelationships().get( 0 ).equals( new BasicGoRelationship( "part_of GO:0007052" ) ) ) {
+                return false;
+            }
+            if ( !g1.getGoRelationships().get( 0 ).getGoId().equals( new GoId( "GO:0007052" ) ) ) {
+                return false;
+            }
+            if ( !g1.getGoRelationships().get( 0 ).getGoId().getId().equals( "GO:0007052" ) ) {
+                return false;
+            }
+            if ( g1.getGoRelationships().get( 0 ).getType() != GoRelationship.Type.PART_OF ) {
+                return false;
+            }
+            if ( g1.getGoRelationships().get( 0 ).equals( new BasicGoRelationship( "part_of GO:1007052" ) ) ) {
+                return false;
+            }
+            if ( !g1.getSuperGoIds().get( 0 ).equals( new GoId( "GO:0007005" ) ) ) {
+                return false;
+            }
+            if ( g1.getSuperGoIds().get( 0 ).equals( new GoId( "GO:1007005" ) ) ) {
+                return false;
+            }
+            if ( !g2.getGoId().getId().equals( "GO:0000030" ) ) {
+                return false;
+            }
+            if ( !g2.getGoId().equals( new GoId( "GO:0000030" ) ) ) {
+                return false;
+            }
+            if ( g2.getGoId().getId().equals( "GO:0000031" ) ) {
+                return false;
+            }
+            if ( g2.getGoId().equals( new GoId( "GO:0000031" ) ) ) {
+                return false;
+            }
+            if ( g3.getGoSubsets().size() != 3 ) {
+                return false;
+            }
+            if ( !g3.getGoSubsets().contains( new BasicGoSubset( "goslim_generic" ) ) ) {
+                return false;
+            }
+            if ( !g3.getGoSubsets().contains( new BasicGoSubset( "goslim_plant" ) ) ) {
+                return false;
+            }
+            if ( !g3.getGoSubsets().contains( new BasicGoSubset( "gosubset_prok" ) ) ) {
+                return false;
+            }
+            if ( g3.getGoSubsets().contains( new BasicGoSubset( "goslim_candida" ) ) ) {
+                return false;
+            }
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace( System.out );
+            return false;
+        }
+        return true;
+    }
+
+    private static boolean testPfamToGoMapping() {
+        try {
+            final PfamToGoMapping pg0 = new PfamToGoMapping( new DomainId( "A" ), new GoId( "GO:0000001" ) );
+            final PfamToGoMapping pg1 = new PfamToGoMapping( new DomainId( "A" ), new GoId( "GO:0000001" ) );
+            final PfamToGoMapping pg2 = new PfamToGoMapping( new DomainId( "B" ), new GoId( "GO:0000001" ) );
+            final PfamToGoMapping pg3 = new PfamToGoMapping( new DomainId( "A" ), new GoId( "GO:0000002" ) );
+            final PfamToGoMapping pg4 = new PfamToGoMapping( new DomainId( "B" ), new GoId( "GO:0000002" ) );
+            if ( !pg0.equals( pg0 ) ) {
+                return false;
+            }
+            if ( !pg0.equals( pg1 ) ) {
+                return false;
+            }
+            if ( pg0.equals( pg2 ) ) {
+                return false;
+            }
+            if ( pg0.equals( pg3 ) ) {
+                return false;
+            }
+            if ( pg0.equals( pg4 ) ) {
+                return false;
+            }
+            if ( pg0.compareTo( pg3 ) != 0 ) {
+                return false;
+            }
+            if ( pg0.compareTo( pg2 ) >= 0 ) {
+                return false;
+            }
+            if ( pg2.compareTo( pg0 ) <= 0 ) {
+                return false;
+            }
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace( System.out );
+            return false;
+        }
+        return true;
+    }
+
+    private static boolean testPfamToGoParser( final File test_dir ) {
+        try {
+            final PfamToGoParser parser = new PfamToGoParser( new File( test_dir + ForesterUtil.getFileSeparator()
+                    + "pfam_to_go_test" ) );
+            final List<PfamToGoMapping> mappings = parser.parse();
+            if ( parser.getMappingCount() != 426 ) {
+                return false;
+            }
+            if ( mappings.size() != 426 ) {
+                return false;
+            }
+            final PfamToGoMapping m0 = mappings.get( 0 );
+            final PfamToGoMapping m1 = mappings.get( 1 );
+            final PfamToGoMapping m2 = mappings.get( 2 );
+            final PfamToGoMapping m3 = mappings.get( 3 );
+            final PfamToGoMapping m4 = mappings.get( 4 );
+            final PfamToGoMapping m5 = mappings.get( 5 );
+            final PfamToGoMapping m424 = mappings.get( 424 );
+            final PfamToGoMapping m425 = mappings.get( 425 );
+            if ( !m0.getKey().equals( new DomainId( "7tm_1" ) ) ) {
+                return false;
+            }
+            if ( !m0.getValue().equals( new GoId( "GO:0001584" ) ) ) {
+                return false;
+            }
+            if ( m0.getKey().equals( new DomainId( "7tm_x" ) ) ) {
+                return false;
+            }
+            if ( m0.getValue().equals( new GoId( "GO:0001585" ) ) ) {
+                return false;
+            }
+            if ( !m1.getKey().equals( new DomainId( "7tm_1" ) ) ) {
+                return false;
+            }
+            if ( !m1.getValue().equals( new GoId( "GO:0007186" ) ) ) {
+                return false;
+            }
+            if ( !m2.getKey().equals( new DomainId( "7tm_1" ) ) ) {
+                return false;
+            }
+            if ( !m2.getValue().equals( new GoId( "GO:0016021" ) ) ) {
+                return false;
+            }
+            if ( !m3.getKey().equals( new DomainId( "7tm_2" ) ) ) {
+                return false;
+            }
+            if ( !m3.getValue().equals( new GoId( "GO:0004930" ) ) ) {
+                return false;
+            }
+            if ( !m4.getKey().equals( new DomainId( "7tm_2" ) ) ) {
+                return false;
+            }
+            if ( !m4.getValue().equals( new GoId( "GO:0016020" ) ) ) {
+                return false;
+            }
+            if ( !m5.getKey().equals( new DomainId( "7tm_3" ) ) ) {
+                return false;
+            }
+            if ( !m5.getValue().equals( new GoId( "GO:0008067" ) ) ) {
+                return false;
+            }
+            if ( !m424.getKey().equals( new DomainId( "OMPdecase" ) ) ) {
+                return false;
+            }
+            if ( !m424.getValue().equals( new GoId( "GO:0006207" ) ) ) {
+                return false;
+            }
+            if ( !m425.getKey().equals( new DomainId( "Bac_DNA_binding" ) ) ) {
+                return false;
+            }
+            if ( !m425.getValue().equals( new GoId( "GO:0003677" ) ) ) {
+                return false;
+            }
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace( System.out );
+            return false;
+        }
+        return true;
+    }
+
+    private static boolean testSuperTermCounting( final File test_dir ) {
+        try {
+            final OBOparser parser = new OBOparser( new File( test_dir + ForesterUtil.getFileSeparator()
+                    + "gene_ontology_edit.obo" ), OBOparser.ReturnType.BASIC_GO_TERM );
+            final List<GoTerm> all_go_terms = parser.parse();
+            if ( parser.getGoTermCount() != 27748 ) {
+                return false;
+            }
+            final Map<GoId, GoTerm> goid_to_term_map = GoUtils.createGoIdToGoTermMap( all_go_terms );
+            final List<GoTerm> categories = new ArrayList<GoTerm>();
+            final List<GoTerm> experiment_set = new ArrayList<GoTerm>();
+            experiment_set.add( new BasicGoTerm( new GoId( "GO:0005690" ), "snRNP U4atac", GoNameSpace
+                    .createUnassigned(), false ) );
+            experiment_set.add( new BasicGoTerm( new GoId( "GO:0009698" ),
+                                                 "phenylpropanoid metabolic process",
+                                                 GoNameSpace.createUnassigned(),
+                                                 false ) );
+            experiment_set.add( new BasicGoTerm( new GoId( "GO:0008150" ), "biological_process", GoNameSpace
+                    .createUnassigned(), false ) );
+            experiment_set.add( new BasicGoTerm( new GoId( "GO:0006915" ),
+                                                 "apoptosis",
+                                                 GoNameSpace.createUnassigned(),
+                                                 false ) );
+            experiment_set.add( new BasicGoTerm( new GoId( "GO:0001783" ), "B cell apoptosis", GoNameSpace
+                    .createUnassigned(), false ) );
+            experiment_set.add( new BasicGoTerm( new GoId( "GO:0010657" ), "muscle cell apoptosis", GoNameSpace
+                    .createUnassigned(), false ) );
+            experiment_set.add( new BasicGoTerm( new GoId( "GO:0010657" ), "muscle cell apoptosis", GoNameSpace
+                    .createUnassigned(), false ) );
+            experiment_set.add( new BasicGoTerm( new GoId( "GO:0010658" ),
+                                                 "striated muscle cell apoptosis",
+                                                 GoNameSpace.createUnassigned(),
+                                                 false ) );
+            experiment_set.add( new BasicGoTerm( new GoId( "GO:0043065" ),
+                                                 "positive regulation of apoptosis",
+                                                 GoNameSpace.createUnassigned(),
+                                                 false ) );
+            categories
+                    .add( new BasicGoTerm( new GoId( "GO:0016265" ), "death", GoNameSpace.createUnassigned(), false ) );
+            categories.add( new BasicGoTerm( new GoId( "GO:0006915" ),
+                                             "apoptosis",
+                                             GoNameSpace.createUnassigned(),
+                                             false ) );
+            categories.add( new BasicGoTerm( new GoId( "GO:0008150" ), "biological_process", GoNameSpace
+                    .createUnassigned(), false ) );
+            categories.add( new BasicGoTerm( new GoId( "GO:0010657" ), "muscle cell apoptosis", GoNameSpace
+                    .createUnassigned(), false ) );
+            categories.add( new BasicGoTerm( new GoId( "GO:0010658" ), "striated muscle cell apoptosis", GoNameSpace
+                    .createUnassigned(), false ) );
+            categories.add( new BasicGoTerm( new GoId( "GO:0046242" ), "o-xylene biosynthetic process", GoNameSpace
+                    .createUnassigned(), false ) );
+            categories.add( new BasicGoTerm( new GoId( "GO:0016326" ), "kinesin motor activity", GoNameSpace
+                    .createUnassigned(), false ) );
+            categories.add( new BasicGoTerm( new GoId( "GO:0005575" ), "cellular_component", GoNameSpace
+                    .createUnassigned(), false ) );
+            categories.add( new BasicGoTerm( new GoId( "GO:0032502" ), "developmental process", GoNameSpace
+                    .createUnassigned(), false ) );
+            categories.add( new BasicGoTerm( new GoId( "GO:0051094" ),
+                                             "positive regulation of developmental process",
+                                             GoNameSpace.createUnassigned(),
+                                             false ) );
+            categories.add( new BasicGoTerm( new GoId( "GO:0048522" ),
+                                             "positive regulation of cellular process",
+                                             GoNameSpace.createUnassigned(),
+                                             false ) );
+            final Map<GoId, Integer> counts = GoUtils.countCategories( categories, experiment_set, goid_to_term_map );
+            // death
+            if ( counts.get( new GoId( "GO:0016265" ) ) != 5 ) {
+                return false;
+            }
+            // apoptosis
+            if ( counts.get( new GoId( "GO:0006915" ) ) != 5 ) {
+                return false;
+            }
+            // biological_process
+            if ( counts.get( new GoId( "GO:0008150" ) ) != 8 ) {
+                return false;
+            }
+            // muscle cell apoptosis
+            if ( counts.get( new GoId( "GO:0010657" ) ) != 3 ) {
+                return false;
+            }
+            // striated muscle cell apoptosis
+            if ( counts.get( new GoId( "GO:0010658" ) ) != 1 ) {
+                return false;
+            }
+            // o-xylene biosynthetic process
+            if ( counts.get( new GoId( "GO:0046242" ) ) != 0 ) {
+                return false;
+            }
+            // kinesin motor activity
+            if ( counts.get( new GoId( "GO:0016326" ) ) != 0 ) {
+                return false;
+            }
+            // cellular_component
+            if ( counts.get( new GoId( "GO:0005575" ) ) != 1 ) {
+                return false;
+            }
+            // developmental process
+            if ( counts.get( new GoId( "GO:0032502" ) ) != 5 ) {
+                return false;
+            }
+            // positive regulation of developmental process
+            if ( counts.get( new GoId( "GO:0051094" ) ) != 1 ) {
+                return false;
+            }
+            // positive regulation of cellular process
+            if ( counts.get( new GoId( "GO:0048522" ) ) != 1 ) {
+                return false;
+            }
+            final List<GoId> categories_id = new ArrayList<GoId>();
+            final List<GoId> experiment_set_id = new ArrayList<GoId>();
+            experiment_set_id.add( new GoId( "GO:0005690" ) );
+            experiment_set_id.add( new GoId( "GO:0009698" ) );
+            experiment_set_id.add( new GoId( "GO:0008150" ) );
+            experiment_set_id.add( new GoId( "GO:0006915" ) );
+            experiment_set_id.add( new GoId( "GO:0001783" ) );
+            experiment_set_id.add( new GoId( "GO:0010657" ) );
+            experiment_set_id.add( new GoId( "GO:0010657" ) );
+            experiment_set_id.add( new GoId( "GO:0010658" ) );
+            categories_id.add( new GoId( "GO:0016265" ) );
+            categories_id.add( new GoId( "GO:0006915" ) );
+            categories_id.add( new GoId( "GO:0008150" ) );
+            categories_id.add( new GoId( "GO:0010657" ) );
+            categories_id.add( new GoId( "GO:0010658" ) );
+            categories_id.add( new GoId( "GO:0046242" ) );
+            categories_id.add( new GoId( "GO:0016326" ) );
+            categories_id.add( new GoId( "GO:0005575" ) );
+            final Map<GoId, Integer> counts_id = GoUtils.countCategoriesId( categories_id,
+                                                                            experiment_set_id,
+                                                                            goid_to_term_map );
+            // death
+            if ( counts_id.get( new GoId( "GO:0016265" ) ) != 5 ) {
+                return false;
+            }
+            // apoptosis
+            if ( counts_id.get( new GoId( "GO:0006915" ) ) != 5 ) {
+                return false;
+            }
+            // biological_process
+            if ( counts_id.get( new GoId( "GO:0008150" ) ) != 7 ) {
+                return false;
+            }
+            // muscle cell apoptosis
+            if ( counts_id.get( new GoId( "GO:0010657" ) ) != 3 ) {
+                return false;
+            }
+            // striated muscle cell apoptosis
+            if ( counts_id.get( new GoId( "GO:0010658" ) ) != 1 ) {
+                return false;
+            }
+            // o-xylene biosynthetic process
+            if ( counts_id.get( new GoId( "GO:0046242" ) ) != 0 ) {
+                return false;
+            }
+            // kinesin motor activity
+            if ( counts_id.get( new GoId( "GO:0016326" ) ) != 0 ) {
+                return false;
+            }
+            // cellular_componen
+            if ( counts_id.get( new GoId( "GO:0005575" ) ) != 1 ) {
+                return false;
+            }
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace( System.out );
+            return false;
+        }
+        return true;
+    }
+
+    private static boolean testSuperTermGetting( final File test_dir ) {
+        try {
+            final OBOparser parser = new OBOparser( new File( test_dir + ForesterUtil.getFileSeparator()
+                    + "gene_ontology_edit.obo" ), OBOparser.ReturnType.BASIC_GO_TERM );
+            final List<GoTerm> go_terms = parser.parse();
+            if ( parser.getGoTermCount() != 27748 ) {
+                return false;
+            }
+            final Map<GoId, GoTerm> goid_to_term_map = GoUtils.createGoIdToGoTermMap( go_terms );
+            final SortedSet<GoTerm> b_cell_selection = GoUtils.getAllSuperGoTerms( new GoId( "GO:0002339" ),
+                                                                                   goid_to_term_map );
+            if ( b_cell_selection.size() != 2 ) {
+                return false;
+            }
+            if ( !b_cell_selection.contains( new BasicGoTerm( new GoId( "GO:0002376" ),
+                                                              "immune system process",
+                                                              GoNameSpace.createBiologicalProcess(),
+                                                              false ) ) ) {
+                return false;
+            }
+            if ( !b_cell_selection.contains( new BasicGoTerm( new GoId( "GO:0008150" ),
+                                                              "biological process",
+                                                              GoNameSpace.createBiologicalProcess(),
+                                                              false ) ) ) {
+                return false;
+            }
+            final SortedSet<GoTerm> b_cell_differentation = GoUtils.getAllSuperGoTerms( new GoId( "GO:0030183" ),
+                                                                                        goid_to_term_map );
+            if ( b_cell_differentation.size() != 12 ) {
+                return false;
+            }
+            final SortedSet<GoTerm> biological_process = GoUtils.getAllSuperGoTerms( new GoId( "GO:0008150" ),
+                                                                                     goid_to_term_map );
+            if ( biological_process.size() != 0 ) {
+                return false;
+            }
+            final SortedSet<GoTerm> protein_aa_phosphorylation = GoUtils.getAllSuperGoTerms( new GoId( "GO:0006468" ),
+                                                                                             goid_to_term_map );
+            if ( protein_aa_phosphorylation.size() != 16 ) {
+                return false;
+            }
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace( System.out );
+            return false;
+        }
+        return true;
+    }
+}
diff --git a/forester/java/src/org/forester/go/etc/MetaOntologizer.java b/forester/java/src/org/forester/go/etc/MetaOntologizer.java

new file mode 100644 (file)

index 0000000..970f939
--- /dev/null
+++ b/forester/java/src/org/forester/go/etc/MetaOntologizer.java
@@ -0,0 +1,639 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org
+
+package org.forester.go.etc;
+
+import java.awt.Color;
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+import java.io.Writer;
+import java.text.DecimalFormat;
+import java.text.NumberFormat;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.SortedMap;
+import java.util.SortedSet;
+import java.util.TreeMap;
+import java.util.TreeSet;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.forester.go.GoId;
+import org.forester.go.GoNameSpace;
+import org.forester.go.GoTerm;
+import org.forester.go.GoUtils;
+import org.forester.go.OBOparser;
+import org.forester.go.PfamToGoMapping;
+import org.forester.surfacing.BasicSpecies;
+import org.forester.surfacing.DomainId;
+import org.forester.surfacing.Species;
+import org.forester.surfacing.SurfacingConstants;
+import org.forester.surfacing.SurfacingUtil;
+import org.forester.util.ForesterUtil;
+
+public class MetaOntologizer {
+
+    private final static NumberFormat FORMATER                         = new DecimalFormat( "0.00E0" );
+    private final static Color        MIN_COLOR                        = new Color( 0, 200, 50 );
+    private final static Color        MAX_COLOR                        = new Color( 0, 0, 0 );
+    final static private String       PRG_NAME                         = "meta_ontologizer";
+    private static final boolean      VERBOSE                          = true;
+    //table-a_41_dollo_all_gains_d-Topology-Elim-Bonferroni.txt:
+    private final static Pattern      PATTERN_ONTOLOGIZER_TABLE_OUTPUT = Pattern.compile( ".*table-(.+)_dollo_.*",
+                                                                                          Pattern.CASE_INSENSITIVE ); //TODO this might need some work...
+
+    private static boolean hasResultsForSpecies( final Map<GoId, GoTerm> go_id_to_terms,
+                                                 final SortedMap<String, SortedSet<OntologizerResult>> species_to_results_map,
+                                                 final String species,
+                                                 final GoNameSpace.GoNamespaceType namespace ) {
+        for( final OntologizerResult ontologizer_result : species_to_results_map.get( species ) ) {
+            if ( go_id_to_terms.get( ontologizer_result.getGoId() ).getGoNameSpace().getType() == namespace ) {
+                return true;
+            }
+        }
+        return false;
+    }
+
+    private static StringBuilder obtainDomainsForGoId( final List<PfamToGoMapping> pfam_to_go,
+                                                       final SortedSet<DomainId> domains_per_species,
+                                                       final Map<GoId, GoTerm> all_go_terms,
+                                                       final GoId query_go_id,
+                                                       final Set<DomainId> found_domain_ids ) {
+        final StringBuilder sb = new StringBuilder();
+        D: for( final DomainId domain_id : domains_per_species ) {
+            for( final PfamToGoMapping ptg : pfam_to_go ) {
+                if ( ptg.getKey().equals( domain_id ) ) {
+                    final GoId go_id = ptg.getValue();
+                    final Set<GoId> super_ids = new HashSet<GoId>();
+                    for( final GoTerm term : GoUtils.getAllSuperGoTerms( go_id, all_go_terms ) ) {
+                        super_ids.add( term.getGoId() );
+                    }
+                    super_ids.add( go_id );
+                    if ( super_ids.contains( query_go_id ) ) {
+                        sb.append( "[<a href=\"" + SurfacingConstants.PFAM_FAMILY_ID_LINK + domain_id + "\">"
+                                + domain_id + "</a>] " );
+                        found_domain_ids.add( domain_id );
+                        continue D;
+                    }
+                }
+            }
+        }
+        return sb;
+    }
+
+    private static String obtainSpecies( final File ontologizer_outfile ) {
+        final Matcher matcher = PATTERN_ONTOLOGIZER_TABLE_OUTPUT.matcher( ontologizer_outfile.getName() );
+        String species = null;
+        if ( matcher.matches() ) {
+            species = matcher.group( 1 );
+            if ( VERBOSE ) {
+                ForesterUtil
+                        .programMessage( PRG_NAME, "species for [" + ontologizer_outfile + "] is [" + species + "]" );
+            }
+        }
+        else {
+            throw new RuntimeException( "pattern [" + PATTERN_ONTOLOGIZER_TABLE_OUTPUT + "] did not match ["
+                    + ontologizer_outfile.getName() + "]" );
+        }
+        return species;
+    }
+
+    private static SortedMap<Species, SortedSet<DomainId>> parseDomainGainLossFile( final File input )
+            throws IOException {
+        final String error = ForesterUtil.isReadableFile( input );
+        if ( !ForesterUtil.isEmpty( error ) ) {
+            throw new IOException( error );
+        }
+        final SortedMap<Species, SortedSet<DomainId>> speciesto_to_domain_id = new TreeMap<Species, SortedSet<DomainId>>();
+        final BufferedReader br = new BufferedReader( new FileReader( input ) );
+        String line;
+        int line_number = 0;
+        Species current_species = null;
+        try {
+            while ( ( line = br.readLine() ) != null ) {
+                line_number++;
+                line = line.trim();
+                if ( ( ForesterUtil.isEmpty( line ) ) || ( line.startsWith( "##" ) ) ) {
+                    // Ignore.
+                }
+                else if ( line.startsWith( "#" ) ) {
+                    current_species = new BasicSpecies( line.substring( 1 ) );
+                    speciesto_to_domain_id.put( current_species, new TreeSet<DomainId>() );
+                }
+                else {
+                    if ( current_species == null ) {
+                        throw new IOException( "parsing problem [at line " + line_number + "] in [" + input + "]" );
+                    }
+                    speciesto_to_domain_id.get( current_species ).add( new DomainId( line ) );
+                }
+            }
+        }
+        catch ( final Exception e ) {
+            throw new IOException( "parsing problem [at line " + line_number + "] in [" + input + "]: "
+                    + e.getMessage() );
+        }
+        return speciesto_to_domain_id;
+    }
+
+    private static void processOneSpecies( final Map<GoId, GoTerm> go_id_to_terms,
+                                           final Writer b_html_writer,
+                                           final Writer b_tab_writer,
+                                           final Writer c_html_writer,
+                                           final Writer c_tab_writer,
+                                           final Writer m_html_writer,
+                                           final Writer m_tab_writer,
+                                           final SortedMap<String, SortedSet<OntologizerResult>> species_to_results_map,
+                                           final String species,
+                                           final double p_adjusted_upper_limit,
+                                           final SortedSet<DomainId> domains_per_species,
+                                           final List<PfamToGoMapping> pfam_to_go,
+                                           final Set<DomainId> domain_ids_with_go_annot ) throws IOException {
+        final SortedSet<OntologizerResult> ontologizer_results = species_to_results_map.get( species );
+        for( final OntologizerResult ontologizer_result : ontologizer_results ) {
+            final GoTerm go_term = go_id_to_terms.get( ontologizer_result.getGoId() );
+            Writer current_html_writer = b_html_writer;
+            Writer current_tab_writer = b_tab_writer;
+            switch ( go_term.getGoNameSpace().getType() ) {
+                case CELLULAR_COMPONENT:
+                    current_html_writer = c_html_writer;
+                    current_tab_writer = c_tab_writer;
+                    break;
+                case MOLECULAR_FUNCTION:
+                    current_html_writer = m_html_writer;
+                    current_tab_writer = m_tab_writer;
+                    break;
+            }
+            writeValuesToTabWriter( species, ontologizer_result, go_term, current_tab_writer );
+            writeValuesToHtmlWriter( ontologizer_result,
+                                     go_term,
+                                     current_html_writer,
+                                     p_adjusted_upper_limit,
+                                     species,
+                                     go_id_to_terms,
+                                     domains_per_species,
+                                     pfam_to_go,
+                                     domain_ids_with_go_annot );
+        }
+    }
+
+    public static void reformat( final File ontologizer_outdir,
+                                 final String result_file_prefix,
+                                 final File domain_gain_loss_file,
+                                 final String outfile_base,
+                                 final File obo_file,
+                                 final double p_adjusted_upper_limit,
+                                 final String comment,
+                                 final List<PfamToGoMapping> pfam_to_go ) throws IOException {
+        if ( !ontologizer_outdir.exists() ) {
+            throw new IllegalArgumentException( "[" + ontologizer_outdir + "] does not exist" );
+        }
+        if ( !ontologizer_outdir.isDirectory() ) {
+            throw new IllegalArgumentException( "[" + ontologizer_outdir + "] is not a directory" );
+        }
+        if ( !obo_file.exists() ) {
+            throw new IllegalArgumentException( "[" + obo_file + "] does not exist" );
+        }
+        if ( ( p_adjusted_upper_limit < 0.0 ) || ( p_adjusted_upper_limit > 1.0 ) ) {
+            throw new IllegalArgumentException( "adjusted P values limit [" + p_adjusted_upper_limit
+                    + "] is out of range" );
+        }
+        SortedMap<Species, SortedSet<DomainId>> speciesto_to_domain_id = null;
+        if ( domain_gain_loss_file != null ) {
+            if ( !domain_gain_loss_file.exists() ) {
+                throw new IllegalArgumentException( "[" + domain_gain_loss_file + "] does not exist" );
+            }
+            speciesto_to_domain_id = parseDomainGainLossFile( domain_gain_loss_file );
+            if ( VERBOSE ) {
+                ForesterUtil.programMessage( PRG_NAME, "parsed gain/loss domains for " + speciesto_to_domain_id.size()
+                        + " species from [" + domain_gain_loss_file + "]" );
+            }
+        }
+        final String[] children = ontologizer_outdir.list();
+        final List<File> ontologizer_outfiles = new ArrayList<File>();
+        if ( children == null ) {
+            throw new IllegalArgumentException( "problem with [" + ontologizer_outdir + "]" );
+        }
+        else {
+            for( final String filename : children ) {
+                if ( filename.startsWith( result_file_prefix ) ) {
+                    ontologizer_outfiles.add( new File( filename ) );
+                }
+            }
+        }
+        if ( VERBOSE ) {
+            ForesterUtil.programMessage( PRG_NAME, "need to analyze " + ontologizer_outfiles.size()
+                    + " Ontologizer outfiles from [" + ontologizer_outdir + "]" );
+        }
+        final OBOparser parser = new OBOparser( obo_file, OBOparser.ReturnType.BASIC_GO_TERM );
+        final List<GoTerm> go_terms = parser.parse();
+        if ( VERBOSE ) {
+            ForesterUtil.programMessage( PRG_NAME, "parsed " + go_terms.size() + " GO terms from [" + obo_file + "]" );
+        }
+        final Map<GoId, GoTerm> go_id_to_terms = GoUtils.createGoIdToGoTermMap( go_terms );
+        //FIXME not needed? when doe sthis error arise?
+        //   if ( go_id_to_terms.size() != go_terms.size() ) {
+        //       throw new IllegalArgumentException( "GO terms with non-unique ids found" );
+        //   }
+        final String b_file_html = outfile_base + "_B.html";
+        final String b_file_txt = outfile_base + "_B.txt";
+        final String m_file_html = outfile_base + "_C.html";
+        final String m_file_txt = outfile_base + "_C.txt";
+        final String c_file_html = outfile_base + "_M.html";
+        final String c_file_txt = outfile_base + "_M.txt";
+        final Writer b_html_writer = ForesterUtil.createBufferedWriter( b_file_html );
+        final Writer b_tab_writer = ForesterUtil.createBufferedWriter( b_file_txt );
+        final Writer c_html_writer = ForesterUtil.createBufferedWriter( m_file_html );
+        final Writer c_tab_writer = ForesterUtil.createBufferedWriter( m_file_txt );
+        final Writer m_html_writer = ForesterUtil.createBufferedWriter( c_file_html );
+        final Writer m_tab_writer = ForesterUtil.createBufferedWriter( c_file_txt );
+        final SortedMap<String, SortedSet<OntologizerResult>> species_to_results_map = new TreeMap<String, SortedSet<OntologizerResult>>();
+        for( final File ontologizer_outfile : ontologizer_outfiles ) {
+            final String species = obtainSpecies( ontologizer_outfile );
+            final List<OntologizerResult> ontologizer_results = OntologizerResult.parse( new File( ontologizer_outdir
+                    + ForesterUtil.FILE_SEPARATOR + ontologizer_outfile ) );
+            final SortedSet<OntologizerResult> filtered_ontologizer_results = new TreeSet<OntologizerResult>();
+            for( final OntologizerResult ontologizer_result : ontologizer_results ) {
+                if ( ontologizer_result.getPAdjusted() <= p_adjusted_upper_limit ) {
+                    filtered_ontologizer_results.add( ontologizer_result );
+                }
+            }
+            species_to_results_map.put( species, filtered_ontologizer_results );
+        }
+        writeLabelsToTabWriter( b_tab_writer );
+        writeLabelsToTabWriter( c_tab_writer );
+        writeLabelsToTabWriter( m_tab_writer );
+        String domain_gain_loss_file_full_path_str = null;
+        if ( domain_gain_loss_file != null ) {
+            domain_gain_loss_file_full_path_str = domain_gain_loss_file.getAbsolutePath();
+        }
+        writeHtmlHeader( b_html_writer,
+                         GoNameSpace.GoNamespaceType.BIOLOGICAL_PROCESS.toString() + " | Pmax = "
+                                 + p_adjusted_upper_limit + " | " + comment,
+                         ontologizer_outdir.getAbsolutePath(),
+                         domain_gain_loss_file_full_path_str );
+        writeHtmlHeader( c_html_writer,
+                         GoNameSpace.GoNamespaceType.CELLULAR_COMPONENT.toString() + " | Pmax = "
+                                 + p_adjusted_upper_limit + " | " + comment,
+                         ontologizer_outdir.getAbsolutePath(),
+                         domain_gain_loss_file_full_path_str );
+        writeHtmlHeader( m_html_writer,
+                         GoNameSpace.GoNamespaceType.MOLECULAR_FUNCTION.toString() + " | Pmax = "
+                                 + p_adjusted_upper_limit + " | " + comment,
+                         ontologizer_outdir.getAbsolutePath(),
+                         domain_gain_loss_file_full_path_str );
+        for( final String species : species_to_results_map.keySet() ) {
+            if ( hasResultsForSpecies( go_id_to_terms,
+                                       species_to_results_map,
+                                       species,
+                                       GoNameSpace.GoNamespaceType.BIOLOGICAL_PROCESS ) ) {
+                writeHtmlSpecies( b_html_writer, species );
+            }
+            if ( hasResultsForSpecies( go_id_to_terms,
+                                       species_to_results_map,
+                                       species,
+                                       GoNameSpace.GoNamespaceType.CELLULAR_COMPONENT ) ) {
+                writeHtmlSpecies( c_html_writer, species );
+            }
+            if ( hasResultsForSpecies( go_id_to_terms,
+                                       species_to_results_map,
+                                       species,
+                                       GoNameSpace.GoNamespaceType.MOLECULAR_FUNCTION ) ) {
+                writeHtmlSpecies( m_html_writer, species );
+            }
+            SortedSet<DomainId> domains_per_species = null;
+            if ( ( speciesto_to_domain_id != null ) && ( speciesto_to_domain_id.size() > 0 ) ) {
+                domains_per_species = speciesto_to_domain_id.get( new BasicSpecies( species ) );
+            }
+            final Set<DomainId> domain_ids_with_go_annot = new HashSet<DomainId>();
+            processOneSpecies( go_id_to_terms,
+                               b_html_writer,
+                               b_tab_writer,
+                               c_html_writer,
+                               c_tab_writer,
+                               m_html_writer,
+                               m_tab_writer,
+                               species_to_results_map,
+                               species,
+                               p_adjusted_upper_limit,
+                               domains_per_species,
+                               pfam_to_go,
+                               domain_ids_with_go_annot );
+            if ( ( speciesto_to_domain_id != null ) && ( speciesto_to_domain_id.size() > 0 ) ) {
+                if ( hasResultsForSpecies( go_id_to_terms,
+                                           species_to_results_map,
+                                           species,
+                                           GoNameSpace.GoNamespaceType.BIOLOGICAL_PROCESS ) ) {
+                    writeHtmlDomains( b_html_writer, domains_per_species, domain_ids_with_go_annot );
+                }
+                if ( hasResultsForSpecies( go_id_to_terms,
+                                           species_to_results_map,
+                                           species,
+                                           GoNameSpace.GoNamespaceType.CELLULAR_COMPONENT ) ) {
+                    writeHtmlDomains( c_html_writer, domains_per_species, domain_ids_with_go_annot );
+                }
+                if ( hasResultsForSpecies( go_id_to_terms,
+                                           species_to_results_map,
+                                           species,
+                                           GoNameSpace.GoNamespaceType.MOLECULAR_FUNCTION ) ) {
+                    writeHtmlDomains( m_html_writer, domains_per_species, domain_ids_with_go_annot );
+                }
+            }
+        }
+        writeHtmlEnd( b_html_writer );
+        writeHtmlEnd( c_html_writer );
+        writeHtmlEnd( m_html_writer );
+        b_html_writer.close();
+        b_tab_writer.close();
+        c_html_writer.close();
+        c_tab_writer.close();
+        m_html_writer.close();
+        m_tab_writer.close();
+        if ( VERBOSE ) {
+            ForesterUtil.programMessage( PRG_NAME, "successfully wrote biological process summary to [" + b_file_html
+                    + "]" );
+            ForesterUtil.programMessage( PRG_NAME, "successfully wrote biological process summary to [" + b_file_txt
+                    + "]" );
+            ForesterUtil.programMessage( PRG_NAME, "successfully wrote molecular function summary to [" + m_file_html
+                    + "]" );
+            ForesterUtil.programMessage( PRG_NAME, "successfully wrote molecular function summary to [" + m_file_txt
+                    + "]" );
+            ForesterUtil.programMessage( PRG_NAME, "successfully wrote cellular component summary to [" + c_file_html
+                    + "]" );
+            ForesterUtil.programMessage( PRG_NAME, "successfully wrote cellular component summary to [" + c_file_txt
+                    + "]" );
+        }
+    }
+
+    private static void writeHtmlDomains( final Writer writer,
+                                          final SortedSet<DomainId> domains,
+                                          final Set<DomainId> domain_ids_with_go_annot ) throws IOException {
+        writer.write( "<tr>" );
+        writer.write( "<td colspan=\"10\">" );
+        if ( domains != null ) {
+            for( final DomainId domain : domains ) {
+                if ( !domain_ids_with_go_annot.contains( domain ) ) {
+                    writer.write( "[<a class=\"new_type\" href=\"" + SurfacingConstants.PFAM_FAMILY_ID_LINK + domain
+                            + "\">" + domain + "</a>] " );
+                }
+            }
+        }
+        writer.write( "</td>" );
+        writer.write( "</tr>" );
+        writer.write( ForesterUtil.LINE_SEPARATOR );
+    }
+
+    private static void writeHtmlEnd( final Writer writer ) throws IOException {
+        writer.write( "</table>" );
+        writer.write( "</body>" );
+        writer.write( "</html>" );
+    }
+
+    private static void writeHtmlHeader( final Writer w,
+                                         final String desc,
+                                         final String ontologizer_outdir,
+                                         final String domain_gain_loss_file ) throws IOException {
+        w.write( "<head>" );
+        w.write( "<title>" );
+        w.write( desc );
+        w.write( "</title>" );
+        w.write( ForesterUtil.LINE_SEPARATOR );
+        w.write( "<style>" );
+        w.write( ForesterUtil.LINE_SEPARATOR );
+        w.write( "a:visited { color : #F87217; text-decoration : none; }" );
+        w.write( ForesterUtil.LINE_SEPARATOR );
+        w.write( "a:link { color : #F87217; text-decoration : none; }" );
+        w.write( ForesterUtil.LINE_SEPARATOR );
+        w.write( "a:hover { color : #FFFFFF; background-color : #00FF00; text-decoration : none; }" );
+        w.write( ForesterUtil.LINE_SEPARATOR );
+        w.write( "a:hover { color : #FFFFFF; background-color : #00FF00; text-decoration : none; }" );
+        w.write( ForesterUtil.LINE_SEPARATOR );
+        w.write( "a.new_type:visited { font-size: 7pt; color : #808080; text-decoration : none; }" );
+        w.write( ForesterUtil.LINE_SEPARATOR );
+        w.write( "a.new_type:link { font-size: 7pt; color : #505050; text-decoration : none; }" );
+        w.write( ForesterUtil.LINE_SEPARATOR );
+        w
+                .write( "a.new_type:hover { font-size: 7pt; color : #000000; background-color : #FFFF00; text-decoration : none; }" );
+        w.write( ForesterUtil.LINE_SEPARATOR );
+        w
+                .write( "a.new_type:hover { font-size: 7pt; color : #000000; background-color : #FFFF00; text-decoration : none; }" );
+        w.write( ForesterUtil.LINE_SEPARATOR );
+        w.write( "td { text-align: left; vertical-align: top; font-family: Verdana, Arial, Helvetica; font-size: 8pt}" );
+        w.write( ForesterUtil.LINE_SEPARATOR );
+        w
+                .write( "th { text-align: left; vertical-align: top; font-family: Verdana, Arial, Helvetica; font-size: 10pt; font-weight: bold }" );
+        w.write( ForesterUtil.LINE_SEPARATOR );
+        w.write( "h1 { color : #000000; font-family: Verdana, Arial, Helvetica; font-size: 18pt; font-weight: bold }" );
+        w.write( ForesterUtil.LINE_SEPARATOR );
+        w.write( "h2 { color : #000000; font-family: Verdana, Arial, Helvetica; font-size: 16pt; font-weight: bold }" );
+        w
+                .write( "h3 { margin-top: 12px;  margin-bottom: 0px; color : #000000; font-family: Verdana, Arial, Helvetica; font-size: 12pt; font-weight: bold }" );
+        w.write( ForesterUtil.LINE_SEPARATOR );
+        w.write( "</style>" );
+        w.write( ForesterUtil.LINE_SEPARATOR );
+        w.write( "</head>" );
+        w.write( ForesterUtil.LINE_SEPARATOR );
+        w.write( "<body>" );
+        w.write( ForesterUtil.LINE_SEPARATOR );
+        w.write( "<h2>" );
+        w.write( "meta ontologizer" );
+        w.write( "</h2>" );
+        w.write( ForesterUtil.LINE_SEPARATOR );
+        w.write( "<h2>" );
+        w.write( desc );
+        w.write( "</h2>" );
+        w.write( ForesterUtil.LINE_SEPARATOR );
+        w.write( "<table>" );
+        w.write( ForesterUtil.LINE_SEPARATOR );
+        w.write( "<tr><th>" );
+        w.write( "ontolgizer output directory analysed:" );
+        w.write( "</th><td>" );
+        w.write( ontologizer_outdir );
+        w.write( "</td></tr>" );
+        if ( !ForesterUtil.isEmpty( domain_gain_loss_file ) ) {
+            w.write( ForesterUtil.LINE_SEPARATOR );
+            w.write( "<tr><th>" );
+            w.write( "domain gain or loss file:" );
+            w.write( "</th><td>" );
+            w.write( domain_gain_loss_file );
+            w.write( "</td></tr>" );
+        }
+        w.write( "</table>" );
+        w.write( ForesterUtil.LINE_SEPARATOR );
+        w.write( "<table>" );
+        w.write( ForesterUtil.LINE_SEPARATOR );
+        w.write( "<tr>" );
+        w.write( "<th>" );
+        w.write( "GO term name" );
+        w.write( "</th><th>" );
+        w.write( "GO id" );
+        w.write( "</th><th>" );
+        w.write( "P adjusted" );
+        w.write( "</th><th>" );
+        w.write( "P" );
+        w.write( "</th><th>" );
+        w.write( "Pop total" );
+        w.write( "</th><th>" );
+        w.write( "Pop term" );
+        w.write( "</th><th>" );
+        w.write( "Study total" );
+        w.write( "</th><th>" );
+        w.write( "Study term" );
+        w.write( "</th><th>" );
+        w.write( "Domains" );
+        w.write( "</th><th>" );
+        w.write( "trivial?" );
+        w.write( "</th>" );
+        w.write( "</tr>" );
+        w.write( ForesterUtil.LINE_SEPARATOR );
+    }
+
+    private static void writeHtmlSpecies( final Writer writer, final String species ) throws IOException {
+        writer.write( "<tr>" );
+        writer.write( "<td><h3>" );
+        writer.write( species );
+        SurfacingUtil.writeTaxonomyLinks( writer, species );
+        writer.write( "</h3></td>" );
+        writer.write( "</tr>" );
+        writer.write( ForesterUtil.LINE_SEPARATOR );
+    }
+
+    private static void writeLabelsToTabWriter( final Writer writer ) throws IOException {
+        writer.write( "#species" );
+        writer.write( "\t" );
+        writer.write( "GO name" );
+        writer.write( "\t" );
+        writer.write( "GO id" );
+        writer.write( "\t" );
+        writer.write( "P adjusted" );
+        writer.write( "\t" );
+        writer.write( "P" );
+        writer.write( "\t" );
+        writer.write( "Pop total" );
+        writer.write( "\t" );
+        writer.write( "Pop term" );
+        writer.write( "\t" );
+        writer.write( "Study total" );
+        writer.write( "\t" );
+        writer.write( "Study term" );
+        writer.write( "\t" );
+        writer.write( "is trivial" );
+        writer.write( ForesterUtil.LINE_SEPARATOR );
+    }
+
+    private static void writeValuesToHtmlWriter( final OntologizerResult ontologizer_result,
+                                                 final GoTerm go_term,
+                                                 final Writer writer,
+                                                 final double p_adjusted_upper_limit,
+                                                 final String species,
+                                                 final Map<GoId, GoTerm> go_id_to_terms,
+                                                 final SortedSet<DomainId> domains_per_species,
+                                                 final List<PfamToGoMapping> pfam_to_go,
+                                                 final Set<DomainId> domain_ids_with_go_annot ) throws IOException {
+        final Color p_adj_color = ForesterUtil.calcColor( ontologizer_result.getPAdjusted(),
+                                                          0,
+                                                          p_adjusted_upper_limit,
+                                                          MIN_COLOR,
+                                                          MAX_COLOR );
+        final Color p_color = ForesterUtil.calcColor( ontologizer_result.getP(),
+                                                      0,
+                                                      p_adjusted_upper_limit,
+                                                      MIN_COLOR,
+                                                      MAX_COLOR );
+        writer.write( "<tr>" );
+        writer.write( "<td>" );
+        writer.write( "<font color=\"#" + ForesterUtil.colorToHex( p_adj_color ) + "\">" );
+        writer.write( go_term.getName() );
+        writer.write( "</font>" );
+        writer.write( "</td><td>" );
+        writer.write( "<a href=\"" + SurfacingConstants.GO_LINK + ontologizer_result.getGoId().getId()
+                + "\" target=\"amigo_window\">" + ontologizer_result.getGoId().getId() + "</a>" );
+        writer.write( "</td><td>" );
+        writer.write( "<font color=\"#" + ForesterUtil.colorToHex( p_adj_color ) + "\">" );
+        writer.write( FORMATER.format( ontologizer_result.getPAdjusted() ) );
+        writer.write( "</font>" );
+        writer.write( "</td><td>" );
+        writer.write( "<font color=\"#" + ForesterUtil.colorToHex( p_color ) + "\">" );
+        writer.write( FORMATER.format( ontologizer_result.getP() ) );
+        writer.write( "</font>" );
+        writer.write( "</td><td>" );
+        writer.write( String.valueOf( ontologizer_result.getPopTotal() ) );
+        writer.write( "</td><td>" );
+        writer.write( String.valueOf( ontologizer_result.getPopTerm() ) );
+        writer.write( "</td><td>" );
+        writer.write( String.valueOf( ontologizer_result.getStudyTotal() ) );
+        writer.write( "</td><td>" );
+        writer.write( String.valueOf( ontologizer_result.getStudyTerm() ) );
+        writer.write( "</td><td>" );
+        if ( domains_per_species != null ) {
+            final StringBuilder sb = obtainDomainsForGoId( pfam_to_go, domains_per_species, go_id_to_terms, go_term
+                    .getGoId(), domain_ids_with_go_annot );
+            writer.write( sb.toString() );
+        }
+        else {
+            writer.write( " " );
+        }
+        writer.write( "</td><td>" );
+        if ( ontologizer_result.isTrivial() ) {
+            writer.write( "trivial" );
+        }
+        else {
+            writer.write( " " );
+        }
+        writer.write( "</td>" );
+        writer.write( "</tr>" );
+        writer.write( ForesterUtil.LINE_SEPARATOR );
+    }
+
+    private static void writeValuesToTabWriter( final String species,
+                                                final OntologizerResult ontologizer_result,
+                                                final GoTerm got_term,
+                                                final Writer writer ) throws IOException {
+        writer.write( species );
+        writer.write( "\t" );
+        writer.write( got_term.getName() );
+        writer.write( "\t" );
+        writer.write( ontologizer_result.getGoId().getId() );
+        writer.write( "\t" );
+        writer.write( String.valueOf( ontologizer_result.getPAdjusted() ) );
+        writer.write( "\t" );
+        writer.write( String.valueOf( ontologizer_result.getP() ) );
+        writer.write( "\t" );
+        writer.write( String.valueOf( ontologizer_result.getPopTotal() ) );
+        writer.write( "\t" );
+        writer.write( String.valueOf( ontologizer_result.getPopTerm() ) );
+        writer.write( "\t" );
+        writer.write( String.valueOf( ontologizer_result.getStudyTotal() ) );
+        writer.write( "\t" );
+        writer.write( String.valueOf( ontologizer_result.getStudyTerm() ) );
+        writer.write( "\t" );
+        writer.write( String.valueOf( ontologizer_result.isTrivial() ) );
+        writer.write( ForesterUtil.LINE_SEPARATOR );
+    }
+}
diff --git a/forester/java/src/org/forester/go/etc/OntologizerResult.java b/forester/java/src/org/forester/go/etc/OntologizerResult.java

new file mode 100644 (file)

index 0000000..d7e2be5
--- /dev/null
+++ b/forester/java/src/org/forester/go/etc/OntologizerResult.java
@@ -0,0 +1,205 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org
+
+package org.forester.go.etc;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.forester.go.GoId;
+import org.forester.util.ForesterUtil;
+
+/*
+ * 
+ * Note: this class has a natural ordering that is inconsistent with equals.
+ */
+public class OntologizerResult implements Comparable<OntologizerResult> {
+
+    final private GoId    _goid;
+    final private int     _pop_total;
+    final private int     _pop_term;
+    final private int     _study_total;
+    final private int     _study_term;
+    final private int     _pop_family;
+    final private int     _study_family;
+    final private int     _nparents;
+    final private boolean _is_trivial;
+    final private double  _p;
+    final private double  _p_adjusted;
+    final private double  _p_min;
+    final private TYPE    _type;
+
+    private OntologizerResult( final String s ) {
+        if ( ForesterUtil.isEmpty( s ) ) {
+            throw new IllegalArgumentException( "result string is null or empty" );
+        }
+        final String[] tokens = s.split( "\t" );
+        if ( ( tokens.length != 9 ) && ( tokens.length != 11 ) && ( tokens.length != 12 ) ) {
+            throw new IllegalArgumentException( "result string [" + s + "] has unexpected format" );
+        }
+        _goid = new GoId( tokens[ 0 ] );
+        _pop_total = Integer.parseInt( tokens[ 1 ] );
+        _pop_term = Integer.parseInt( tokens[ 2 ] );
+        _study_total = Integer.parseInt( tokens[ 3 ] );
+        _study_term = Integer.parseInt( tokens[ 4 ] );
+        if ( tokens.length == 11 ) {
+            // Topology Elim
+            // ID Pop.total Pop.term Study.total Study.term Pop.family Study.family is.trivial p p.adjusted p.min
+            _type = TYPE.TOPOLOGY;
+            _pop_family = Integer.parseInt( tokens[ 5 ] );
+            _study_family = Integer.parseInt( tokens[ 6 ] );
+            _is_trivial = Boolean.parseBoolean( tokens[ 7 ] );
+            _p = Double.parseDouble( tokens[ 8 ] );
+            _p_adjusted = Double.parseDouble( tokens[ 9 ] );
+            _p_min = Double.parseDouble( tokens[ 10 ] );
+            _nparents = -1;
+        }
+        else if ( tokens.length == 9 ) {
+            // Term for Term
+            // ID Pop.total Pop.term Study.total Study.term p p.adjusted p.min name
+            _type = TYPE.TERM_FOR_TERM;
+            _pop_family = -1;
+            _study_family = -1;
+            _nparents = -1;
+            _is_trivial = false;
+            _p = Double.parseDouble( tokens[ 5 ] );
+            _p_adjusted = Double.parseDouble( tokens[ 6 ] );
+            _p_min = Double.parseDouble( tokens[ 7 ] );
+        }
+        else {
+            // Parent Child Union
+            // ID Pop.total Pop.term Study.total Study.term Pop.family Study.family nparents is.trivial p p.adjusted p.min
+            _type = TYPE.PARENT_CHILD;
+            _pop_family = Integer.parseInt( tokens[ 5 ] );
+            _study_family = Integer.parseInt( tokens[ 6 ] );
+            _nparents = Integer.parseInt( tokens[ 7 ] );
+            _is_trivial = Boolean.parseBoolean( tokens[ 8 ] );
+            _p = Double.parseDouble( tokens[ 9 ] );
+            _p_adjusted = Double.parseDouble( tokens[ 10 ] );
+            _p_min = Double.parseDouble( tokens[ 11 ] );
+        }
+    }
+
+    @Override
+    public int compareTo( final OntologizerResult o ) {
+        if ( this == o ) {
+            return 0;
+        }
+        else if ( getPAdjusted() < o.getPAdjusted() ) {
+            return -1;
+        }
+        else if ( getPAdjusted() > o.getPAdjusted() ) {
+            return 1;
+        }
+        else {
+            return 0;
+        }
+    }
+
+    public GoId getGoId() {
+        return _goid;
+    }
+
+    public int getNParents() {
+        return _nparents;
+    }
+
+    public double getP() {
+        return _p;
+    }
+
+    public double getPAdjusted() {
+        return _p_adjusted;
+    }
+
+    public double getPMin() {
+        return _p_min;
+    }
+
+    public int getPopFamily() {
+        return _pop_family;
+    }
+
+    public int getPopTerm() {
+        return _pop_term;
+    }
+
+    public int getPopTotal() {
+        return _pop_total;
+    }
+
+    public int getStudyFamily() {
+        return _study_family;
+    }
+
+    public int getStudyTerm() {
+        return _study_term;
+    }
+
+    public int getStudyTotal() {
+        return _study_total;
+    }
+
+    public TYPE getType() {
+        return _type;
+    }
+
+    public boolean isTrivial() {
+        return _is_trivial;
+    }
+
+    public static List<OntologizerResult> parse( final File input ) throws IOException {
+        final String error = ForesterUtil.isReadableFile( input );
+        if ( !ForesterUtil.isEmpty( error ) ) {
+            throw new IOException( error );
+        }
+        final BufferedReader br = new BufferedReader( new FileReader( input ) );
+        String line;
+        final List<OntologizerResult> results = new ArrayList<OntologizerResult>();
+        int line_number = 0;
+        try {
+            while ( ( line = br.readLine() ) != null ) {
+                line_number++;
+                line = line.trim();
+                if ( line.startsWith( "GO:" ) ) {
+                    results.add( new OntologizerResult( line ) );
+                }
+            }
+        }
+        catch ( final Exception e ) {
+            throw new IOException( "parsing problem [at line " + line_number + "] in [" + input + "]: "
+                    + e.getMessage() );
+        }
+        return results;
+    }
+
+    public static enum TYPE {
+        TOPOLOGY, TERM_FOR_TERM, PARENT_CHILD;
+    }
+}
diff --git a/forester/java/src/org/forester/io/parsers/FastaParser.java b/forester/java/src/org/forester/io/parsers/FastaParser.java

new file mode 100644 (file)

index 0000000..4c6845c
--- /dev/null
+++ b/forester/java/src/org/forester/io/parsers/FastaParser.java
@@ -0,0 +1,210 @@
+// $Id:
+//
+// forester -- software libraries and applications
+// for genomics and evolutionary biology research.
+//
+// Copyright (C) 2010 Christian M Zmasek
+// Copyright (C) 2010 Sanford-Burnham Medical Research Institute
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.io.parsers;
+
+import java.io.BufferedReader;
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.forester.msa.BasicMsa;
+import org.forester.msa.Msa;
+import org.forester.msa.MsaFormatException;
+import org.forester.phylogeny.Phylogeny;
+import org.forester.phylogeny.PhylogenyNode;
+import org.forester.phylogeny.data.Accession;
+import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
+import org.forester.sequence.BasicSequence;
+import org.forester.sequence.Sequence;
+import org.forester.util.ForesterUtil;
+
+public class FastaParser {
+
+    private static final Pattern NAME_REGEX      = Pattern.compile( "^\\s*>\\s*(.+)" );
+    private static final Pattern SEQ_REGEX       = Pattern.compile( "^\\s*(.+)" );
+    private static final Pattern ANYTHING_REGEX  = Pattern.compile( "[\\d\\s]+" );
+    //>gi|71834668|ref|NP_001025424.1| Bcl2 [Danio rerio]
+    private static final Pattern FASTA_DESC_LINE = Pattern
+                                                         .compile( ">?\\s*([^|]+)\\|([^|]+)\\S*\\s+(.+)\\s+\\[(.+)\\]" );
+
+    public static void main( final String[] args ) {
+        final String a = ">gi|71834668|ref|NP_001025424.1| Bcl2 [Danio rerio]";
+        final Matcher name_m = FASTA_DESC_LINE.matcher( a );
+        if ( name_m.lookingAt() ) {
+            System.out.println();
+            System.out.println( name_m.group( 1 ) );
+            System.out.println( name_m.group( 2 ) );
+            System.out.println( name_m.group( 3 ) );
+            System.out.println( name_m.group( 4 ) );
+        }
+        else {
+            System.out.println( "Does not match." );
+        }
+    }
+
+    static public boolean isLikelyFasta( final InputStream is ) throws IOException {
+        final BufferedReader reader = new BufferedReader( new InputStreamReader( is, "UTF-8" ) );
+        String line = null;
+        while ( ( line = reader.readLine() ) != null ) {
+            final boolean is_name_line = NAME_REGEX.matcher( line ).lookingAt();
+            if ( canIgnore( line, true, false ) ) {
+                continue;
+            }
+            else if ( is_name_line ) {
+                reader.close();
+                return true;
+            }
+            else if ( SEQ_REGEX.matcher( line ).lookingAt() ) {
+                reader.close();
+                return false;
+            }
+        }
+        reader.close();
+        return false;
+    }
+
+    static public Msa parseMsa( final InputStream is ) throws IOException {
+        return BasicMsa.createInstance( parse( is ) );
+    }
+
+    static public Msa parseMsa( final String s ) throws IOException {
+        return parseMsa( s.getBytes() );
+    }
+
+    static public Msa parseMsa( final byte[] bytes ) throws IOException {
+        return parseMsa( new ByteArrayInputStream( bytes ) );
+    }
+
+    static public List<Sequence> parse( final InputStream is ) throws IOException {
+        final BufferedReader reader = new BufferedReader( new InputStreamReader( is, "UTF-8" ) );
+        String line = null;
+        int line_counter = 0;
+        boolean saw_first_seq = false;
+        StringBuilder current_seq = null;
+        StringBuilder name = null;
+        final List<StringBuilder[]> temp_msa = new ArrayList<StringBuilder[]>();
+        while ( ( line = reader.readLine() ) != null ) {
+            ++line_counter;
+            final Matcher name_m = NAME_REGEX.matcher( line );
+            final boolean is_name_line = name_m.lookingAt();
+            if ( canIgnore( line, saw_first_seq, is_name_line ) ) {
+                continue;
+            }
+            final Matcher seq_m = SEQ_REGEX.matcher( line );
+            if ( is_name_line ) {
+                saw_first_seq = true;
+                addSeq( name, current_seq, temp_msa );
+                name = new StringBuilder( name_m.group( 1 ).trim() );
+                current_seq = new StringBuilder();
+            }
+            else if ( seq_m.lookingAt() ) {
+                if ( name.length() < 1 ) {
+                    reader.close();
+                    throw new MsaFormatException( "illegally formatted fasta msa (line: " + line_counter + "):\n\""
+                            + trim( line ) + "\"" );
+                }
+                current_seq.append( seq_m.group( 1 ).replaceAll( "\\s+", "" ) );
+            }
+            else {
+                reader.close();
+                throw new MsaFormatException( "illegally formatted fasta msa (line: " + line_counter + "):\n\""
+                        + trim( line ) + "\"" );
+            }
+        }
+        addSeq( name, current_seq, temp_msa );
+        reader.close();
+        final List<Sequence> seqs = new ArrayList<Sequence>();
+        for( int i = 0; i < temp_msa.size(); ++i ) {
+            seqs.add( BasicSequence.createAaSequence( temp_msa.get( i )[ 0 ].toString(), temp_msa.get( i )[ 1 ]
+                    .toString() ) );
+        }
+        return seqs;
+    }
+
+    static private boolean canIgnore( final String line, final boolean saw_first_seq, final boolean is_name_line ) {
+        if ( ( line.length() < 1 ) || ANYTHING_REGEX.matcher( line ).matches() ) {
+            return true;
+        }
+        if ( !saw_first_seq && !is_name_line ) {
+            return true;
+        }
+        return false;
+    }
+
+    private static void addSeq( final StringBuilder name, final StringBuilder seq, final List<StringBuilder[]> temp_msa ) {
+        if ( ( name != null ) && ( seq != null ) && ( name.length() > 0 ) && ( seq.length() > 0 ) ) {
+            final StringBuilder[] ary = new StringBuilder[ 2 ];
+            ary[ 0 ] = name;
+            ary[ 1 ] = seq;
+            temp_msa.add( ary );
+        }
+    }
+
+    private static String trim( final String line ) {
+        if ( line.length() > 100 ) {
+            return line.substring( 0, 100 ) + " ...";
+        }
+        return line;
+    }
+
+    public static void extractFastaInformation( final Phylogeny phy ) {
+        for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) {
+            final PhylogenyNode node = iter.next();
+            if ( !ForesterUtil.isEmpty( node.getName() ) ) {
+                final Matcher name_m = FASTA_DESC_LINE.matcher( node.getName() );
+                if ( name_m.lookingAt() ) {
+                    System.out.println();
+                    // System.out.println( name_m.group( 1 ) );
+                    // System.out.println( name_m.group( 2 ) );
+                    // System.out.println( name_m.group( 3 ) );
+                    // System.out.println( name_m.group( 4 ) );
+                    final String acc_source = name_m.group( 1 );
+                    final String acc = name_m.group( 2 );
+                    final String seq_name = name_m.group( 3 );
+                    final String tax_sn = name_m.group( 4 );
+                    if ( !ForesterUtil.isEmpty( acc_source ) && !ForesterUtil.isEmpty( acc ) ) {
+                        ForesterUtil.ensurePresenceOfSequence( node );
+                        node.getNodeData().getSequence( 0 ).setAccession( new Accession( acc, acc_source ) );
+                    }
+                    if ( !ForesterUtil.isEmpty( seq_name ) ) {
+                        ForesterUtil.ensurePresenceOfSequence( node );
+                        node.getNodeData().getSequence( 0 ).setName( seq_name );
+                    }
+                    if ( !ForesterUtil.isEmpty( tax_sn ) ) {
+                        ForesterUtil.ensurePresenceOfTaxonomy( node );
+                        node.getNodeData().getTaxonomy( 0 ).setScientificName( tax_sn );
+                    }
+                }
+            }
+        }
+    }
+}
diff --git a/forester/java/src/org/forester/io/parsers/GeneralMsaParser.java b/forester/java/src/org/forester/io/parsers/GeneralMsaParser.java

new file mode 100644 (file)

index 0000000..4b7e920
--- /dev/null
+++ b/forester/java/src/org/forester/io/parsers/GeneralMsaParser.java
@@ -0,0 +1,186 @@
+// $Id:
+//
+// forester -- software libraries and applications
+// for genomics and evolutionary biology research.
+//
+// Copyright (C) 2010 Christian M Zmasek
+// Copyright (C) 2010 Sanford-Burnham Medical Research Institute
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.io.parsers;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.forester.msa.BasicMsa;
+import org.forester.msa.Msa;
+import org.forester.msa.MsaFormatException;
+import org.forester.sequence.BasicSequence;
+import org.forester.sequence.Sequence;
+
+public final class GeneralMsaParser {
+
+    private static final Pattern NAME_SEQ_PATTERN          = Pattern.compile( "(\\S+)\\s+(\\S+)\\s*" );
+    private static final Pattern INDENTED_SEQ_PATTERN      = Pattern.compile( "\\s+(\\S+)\\s*" );
+    private static final Pattern NON_INDENTED_SEQ_PATTERN  = Pattern.compile( "(\\S+).*" );
+    private static final Pattern PROBCONS_REGEX            = Pattern.compile( "^CLUSTAL\\s" );
+    private static final Pattern MUSCLE_REGEX              = Pattern.compile( "^MUSCLE\\s\\(" );
+    private static final Pattern CLUSTAL_REGEX             = Pattern.compile( "^PROBCONS\\s" );
+    private static final Pattern ANYTHING_REGEX            = Pattern.compile( "[\\d\\s]+" );
+    private static final Pattern SELEX_SPECIAL_LINES_REGEX = Pattern.compile( "\\s+[*\\.:\\s]+" );
+    private static final Pattern SPECIAL_LINES_REGEX       = Pattern.compile( "^\\s*(#|%|//|!!)" );
+    private static final Pattern ERROR_REGEX               = Pattern.compile( "\\S+\\s+\\S+\\s+\\S+" );
+
+    static private boolean canIgnore( final String line ) {
+        if ( ( line.length() < 1 ) || ANYTHING_REGEX.matcher( line ).matches() ) {
+            return true;
+        }
+        return ( SELEX_SPECIAL_LINES_REGEX.matcher( line ).matches() || SPECIAL_LINES_REGEX.matcher( line ).lookingAt() );
+    }
+
+    static private boolean isProgramNameLine( final String line ) {
+        return ( PROBCONS_REGEX.matcher( line ).lookingAt() || CLUSTAL_REGEX.matcher( line ).lookingAt() || MUSCLE_REGEX
+                .matcher( line ).lookingAt() );
+    }
+
+    static public Msa parse( final InputStream is ) throws IOException {
+        int block = -1;
+        int current_seq_index_per_block = -1;
+        String current_name = null;
+        boolean saw_ignorable = true;
+        boolean is_first = true;
+        final Map<String, StringBuilder> temp_msa = new HashMap<String, StringBuilder>();
+        final List<String> names_in_order = new ArrayList<String>();
+        final BufferedReader reader = new BufferedReader( new InputStreamReader( is, "UTF-8" ) );
+        String line = null;
+        int line_counter = 0;
+        while ( ( line = reader.readLine() ) != null ) {
+            ++line_counter;
+            if ( canIgnore( line ) ) {
+                saw_ignorable = true;
+            }
+            else if ( !( is_first && isProgramNameLine( line ) ) ) {
+                if ( ERROR_REGEX.matcher( line ).lookingAt() ) {
+                    throw new MsaFormatException( "unrecognized msa format (line: " + line_counter + "):\n\""
+                            + trim( line ) + "\"" );
+                }
+                if ( canIgnore( line ) ) {
+                    saw_ignorable = true;
+                }
+                final Matcher name_seq_m = NAME_SEQ_PATTERN.matcher( line );
+                Matcher ind_seq_m = null;
+                Matcher non_ind_seq_m = null;
+                boolean ind_seq_m_matches = false;
+                boolean non_ind_seq_m_matches = false;
+                final boolean name_seq_m_matches = name_seq_m.matches();
+                if ( !name_seq_m_matches ) {
+                    ind_seq_m = INDENTED_SEQ_PATTERN.matcher( line );
+                    ind_seq_m_matches = ind_seq_m.matches();
+                    if ( !ind_seq_m_matches ) {
+                        non_ind_seq_m = NON_INDENTED_SEQ_PATTERN.matcher( line );
+                        non_ind_seq_m_matches = non_ind_seq_m.lookingAt();
+                    }
+                }
+                if ( name_seq_m_matches || ind_seq_m_matches || non_ind_seq_m_matches ) {
+                    if ( saw_ignorable ) {
+                        ++block;
+                        current_seq_index_per_block = -1;
+                        saw_ignorable = false;
+                    }
+                    ++current_seq_index_per_block;
+                    if ( name_seq_m_matches ) {
+                        final String name = name_seq_m.group( 1 );
+                        final String seq = name_seq_m.group( 2 );
+                        if ( temp_msa.containsKey( name ) ) {
+                            temp_msa.get( name ).append( seq );
+                        }
+                        else {
+                            temp_msa.put( name, new StringBuilder( seq ) );
+                            names_in_order.add( name );
+                        }
+                        current_name = name;
+                    }
+                    else if ( ind_seq_m_matches ) {
+                        if ( temp_msa.containsKey( current_name ) ) {
+                            temp_msa.get( current_name ).append( ind_seq_m.group( 1 ) );
+                        }
+                        else {
+                            throw new MsaFormatException( "illegal msa format (line: " + line_counter + "):\n\""
+                                    + trim( line ) + "\"" );
+                        }
+                    }
+                    else if ( non_ind_seq_m_matches ) {
+                        if ( block == 0 ) {
+                            throw new MsaFormatException( "illegal msa format: first block cannot contain un-named sequence (line: "
+                                    + line_counter + "):\n\"" + trim( line ) + "\"" );
+                        }
+                        else {
+                            String name = "";
+                            try {
+                                name = names_in_order.get( current_seq_index_per_block );
+                            }
+                            catch ( final IndexOutOfBoundsException e ) {
+                                throw new MsaFormatException( "illegalmsa format (line: " + line_counter + "):\n\""
+                                        + trim( line ) + "\"" );
+                            }
+                            if ( temp_msa.containsKey( name ) ) {
+                                temp_msa.get( name ).append( non_ind_seq_m.group( 1 ) );
+                            }
+                            else {
+                                throw new MsaFormatException( "illegal msa format (line: " + line_counter + "):\n\""
+                                        + trim( line ) + "\"" );
+                            }
+                        }
+                        current_name = null;
+                    }
+                }
+                else {
+                    throw new MsaFormatException( "illegal msa format (line: " + line_counter + "):\n\"" + trim( line )
+                            + "\"" );
+                }
+                if ( is_first ) {
+                    is_first = false;
+                }
+            }
+        } // while ( ( line = reader.readLine() ) != null )
+        final List<Sequence> seqs = new ArrayList<Sequence>();
+        for( int i = 0; i < names_in_order.size(); ++i ) {
+            seqs.add( BasicSequence.createAaSequence( names_in_order.get( i ), temp_msa.get( names_in_order.get( i ) )
+                    .toString() ) );
+        }
+        final Msa msa = BasicMsa.createInstance( seqs );
+        return msa;
+    }
+
+    private static String trim( final String line ) {
+        if ( line.length() > 100 ) {
+            return line.substring( 0, 100 ) + " ...";
+        }
+        return line;
+    }
+}
diff --git a/forester/java/src/org/forester/io/parsers/HmmPfamOutputParser.java b/forester/java/src/org/forester/io/parsers/HmmPfamOutputParser.java

new file mode 100644 (file)

index 0000000..42f94ee
--- /dev/null
+++ b/forester/java/src/org/forester/io/parsers/HmmPfamOutputParser.java
@@ -0,0 +1,689 @@
+// $Id:
+//
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.io.parsers;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Date;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.SortedSet;
+import java.util.TreeMap;
+import java.util.TreeSet;
+
+import org.forester.surfacing.BasicDomain;
+import org.forester.surfacing.BasicProtein;
+import org.forester.surfacing.Domain;
+import org.forester.surfacing.DomainId;
+import org.forester.surfacing.Protein;
+import org.forester.surfacing.SurfacingUtil;
+import org.forester.util.ForesterUtil;
+
+public final class HmmPfamOutputParser {
+
+    private static final String     RETRO                       = "RETRO";
+    private static final String     PHAGE                       = "PHAGE";
+    private static final String     VIR                         = "VIR";
+    private static final String     TRANSPOS                    = "TRANSPOS";
+    private static final String     RV                          = "RV";
+    private static final String     GAG                         = "GAG_";
+    private static final String     HCV                         = "HCV_";                                                    // New. Added on Jun 11, after 1st submission.
+    private static final String     HERPES                      = "Herpes_";                                                 // New. Added on Jun 11, after 1st submission.
+    private static final int        E_VALUE_MAXIMUM_DEFAULT     = -1;
+    private static final ReturnType RETURN_TYPE_DEFAULT         = ReturnType.UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN;
+    private static final boolean    IGNORE_DUFS_DEFAULT         = false;
+    private static final int        MAX_ALLOWED_OVERLAP_DEFAULT = -1;
+    private final Set<DomainId>     _filter;
+    private final FilterType        _filter_type;
+    private final File              _input_file;
+    private final String            _species;
+    private final String            _model_type;
+    private double                  _e_value_maximum;
+    private Map<String, String>     _individual_domain_score_cutoffs;
+    private boolean                 _ignore_dufs;
+    private boolean                 _ignore_virus_like_ids;
+    private boolean                 _allow_non_unique_query;
+    private boolean                 _verbose;
+    private int                     _max_allowed_overlap;
+    private boolean                 _ignore_engulfed_domains;
+    private ReturnType              _return_type;
+    private int                     _proteins_encountered;
+    private int                     _proteins_ignored_due_to_filter;
+    private int                     _proteins_stored;
+    private int                     _domains_encountered;
+    private int                     _domains_ignored_due_to_duf;
+    private int                     _domains_ignored_due_to_overlap;
+    private int                     _domains_ignored_due_to_e_value;
+    private int                     _domains_ignored_due_to_individual_score_cutoff;
+    private int                     _domains_stored;
+    private SortedSet<DomainId>     _domains_stored_set;
+    private long                    _time;
+    private int                     _domains_ignored_due_to_negative_domain_filter;
+    private Map<String, Integer>    _domains_ignored_due_to_negative_domain_filter_counts_map;
+    private int                     _domains_ignored_due_to_virus_like_id;
+    private Map<String, Integer>    _domains_ignored_due_to_virus_like_id_counts_map;
+
+    public HmmPfamOutputParser( final File input_file, final String species, final String model_type ) {
+        _input_file = input_file;
+        _species = species;
+        _model_type = model_type;
+        _filter = null;
+        _filter_type = FilterType.NONE;
+        init();
+    }
+
+    public HmmPfamOutputParser( final File input_file,
+                                final String species,
+                                final String model_type,
+                                final Set<DomainId> filter,
+                                final FilterType filter_type ) {
+        _input_file = input_file;
+        _species = species;
+        _model_type = model_type;
+        _filter = filter;
+        _filter_type = filter_type;
+        init();
+    }
+
+    private void actuallyAddProtein( final List<Protein> proteins, final Protein current_protein ) {
+        final List<Domain> l = current_protein.getProteinDomains();
+        for( final Domain d : l ) {
+            getDomainsStoredSet().add( d.getDomainId() );
+        }
+        proteins.add( current_protein );
+        ++_proteins_stored;
+    }
+
+    private void addProtein( final List<Protein> proteins, final Protein current_protein ) {
+        if ( ( getFilterType() == FilterType.POSITIVE_PROTEIN ) || ( getFilterType() == FilterType.NEGATIVE_PROTEIN ) ) {
+            final Set<DomainId> domain_ids_in_protein = new HashSet<DomainId>();
+            for( final Domain d : current_protein.getProteinDomains() ) {
+                domain_ids_in_protein.add( d.getDomainId() );
+            }
+            domain_ids_in_protein.retainAll( getFilter() );
+            if ( getFilterType() == FilterType.POSITIVE_PROTEIN ) {
+                if ( domain_ids_in_protein.size() > 0 ) {
+                    actuallyAddProtein( proteins, current_protein );
+                }
+                else {
+                    ++_proteins_ignored_due_to_filter;
+                }
+            }
+            else {
+                if ( domain_ids_in_protein.size() < 1 ) {
+                    actuallyAddProtein( proteins, current_protein );
+                }
+                else {
+                    ++_proteins_ignored_due_to_filter;
+                }
+            }
+        }
+        else {
+            actuallyAddProtein( proteins, current_protein );
+        }
+    }
+
+    public int getDomainsEncountered() {
+        return _domains_encountered;
+    }
+
+    public int getDomainsIgnoredDueToDuf() {
+        return _domains_ignored_due_to_duf;
+    }
+
+    public int getDomainsIgnoredDueToEval() {
+        return _domains_ignored_due_to_e_value;
+    }
+
+    public int getDomainsIgnoredDueToIndividualScoreCutoff() {
+        return _domains_ignored_due_to_individual_score_cutoff;
+    }
+
+    public int getDomainsIgnoredDueToNegativeDomainFilter() {
+        return _domains_ignored_due_to_negative_domain_filter;
+    }
+
+    public Map<String, Integer> getDomainsIgnoredDueToNegativeDomainFilterCountsMap() {
+        return _domains_ignored_due_to_negative_domain_filter_counts_map;
+    }
+
+    public int getDomainsIgnoredDueToOverlap() {
+        return _domains_ignored_due_to_overlap;
+    }
+
+    public Map<String, Integer> getDomainsIgnoredDueToVirusLikeIdCountsMap() {
+        return _domains_ignored_due_to_virus_like_id_counts_map;
+    }
+
+    public int getDomainsIgnoredDueToVirusLikeIds() {
+        return _domains_ignored_due_to_virus_like_id;
+    }
+
+    public int getDomainsStored() {
+        return _domains_stored;
+    }
+
+    public SortedSet<DomainId> getDomainsStoredSet() {
+        return _domains_stored_set;
+    }
+
+    private double getEValueMaximum() {
+        return _e_value_maximum;
+    }
+
+    private Set<DomainId> getFilter() {
+        return _filter;
+    }
+
+    private FilterType getFilterType() {
+        return _filter_type;
+    }
+
+    private Map<String, String> getIndividualDomainScoreCutoffs() {
+        return _individual_domain_score_cutoffs;
+    }
+
+    private File getInputFile() {
+        return _input_file;
+    }
+
+    private int getMaxAllowedOverlap() {
+        return _max_allowed_overlap;
+    }
+
+    private String getModelType() {
+        return _model_type;
+    }
+
+    public int getProteinsEncountered() {
+        return _proteins_encountered;
+    }
+
+    public int getProteinsIgnoredDueToFilter() {
+        return _proteins_ignored_due_to_filter;
+    }
+
+    public int getProteinsStored() {
+        return _proteins_stored;
+    }
+
+    private ReturnType getReturnType() {
+        return _return_type;
+    }
+
+    private String getSpecies() {
+        return _species;
+    }
+
+    public long getTime() {
+        return _time;
+    }
+
+    private void init() {
+        _e_value_maximum = HmmPfamOutputParser.E_VALUE_MAXIMUM_DEFAULT;
+        setIgnoreDufs( HmmPfamOutputParser.IGNORE_DUFS_DEFAULT );
+        setReturnType( HmmPfamOutputParser.RETURN_TYPE_DEFAULT );
+        _max_allowed_overlap = HmmPfamOutputParser.MAX_ALLOWED_OVERLAP_DEFAULT;
+        setIndividualDomainScoreCutoffs( null );
+        setIgnoreEngulfedDomains( false );
+        setIgnoreVirusLikeIds( false );
+        setAllowNonUniqueQuery( false );
+        setVerbose( false );
+        intitCounts();
+    }
+
+    private void intitCounts() {
+        setDomainsStoredSet( new TreeSet<DomainId>() );
+        setDomainsEncountered( 0 );
+        setProteinsEncountered( 0 );
+        setProteinsIgnoredDueToFilter( 0 );
+        setDomainsIgnoredDueToNegativeFilter( 0 );
+        setDomainsIgnoredDueToDuf( 0 );
+        setDomainsIgnoredDueToEval( 0 );
+        setDomainsIgnoredDueToIndividualScoreCutoff( 0 );
+        setDomainsIgnoredDueToVirusLikeId( 0 );
+        setDomainsIgnoredDueToOverlap( 0 );
+        setDomainsStored( 0 );
+        setProteinsStored( 0 );
+        setTime( 0 );
+        setDomainsIgnoredDueToVirusLikeIdCountsMap( new TreeMap<String, Integer>() );
+        setDomainsIgnoredDueToNegativeDomainFilterCountsMap( new TreeMap<String, Integer>() );
+    }
+
+    private boolean isAllowNonUniqueQuery() {
+        return _allow_non_unique_query;
+    }
+
+    private boolean isIgnoreDufs() {
+        return _ignore_dufs;
+    }
+
+    private boolean isIgnoreEngulfedDomains() {
+        return _ignore_engulfed_domains;
+    }
+
+    private boolean isIgnoreVirusLikeIds() {
+        return _ignore_virus_like_ids;
+    }
+
+    private boolean isVerbose() {
+        return _verbose;
+    }
+
+    public List<Protein> parse() throws IOException {
+        intitCounts();
+        final Set<String> queries = new HashSet<String>();
+        final String error = ForesterUtil.isReadableFile( getInputFile() );
+        if ( !ForesterUtil.isEmpty( error ) ) {
+            throw new IOException( error );
+        }
+        final BufferedReader br = new BufferedReader( new FileReader( getInputFile() ) );
+        String line;
+        final List<Protein> proteins = new ArrayList<Protein>();
+        Protein current_protein = null;
+        int line_number = 0;
+        boolean saw_double_slash = true;
+        boolean can_parse_domains = false;
+        boolean saw_parsed_for_domains = false;
+        boolean saw_query_sequence = false;
+        boolean was_not_unique = false;
+        final long start_time = new Date().getTime();
+        while ( ( line = br.readLine() ) != null ) {
+            line_number++;
+            if ( line.length() < 1 ) {
+                continue;
+            }
+            else if ( line.startsWith( "Query sequence:" ) ) {
+                ++_proteins_encountered;
+                if ( !saw_double_slash ) {
+                    throw new IOException( "unexpected format [line " + line_number + "] in ["
+                            + getInputFile().getCanonicalPath() + "]" );
+                }
+                saw_double_slash = false;
+                saw_query_sequence = true;
+                was_not_unique = false;
+                final String query = line.substring( 16 ).trim();
+                if ( ForesterUtil.isEmpty( query ) ) {
+                    throw new IOException( "query sequence cannot be empty [line " + line_number + "] in ["
+                            + getInputFile().getCanonicalPath() + "]" );
+                }
+                if ( queries.contains( query ) ) {
+                    if ( !isAllowNonUniqueQuery() ) {
+                        throw new IOException( "query \"" + query + "\" is not unique [line " + line_number + "] in ["
+                                + getInputFile().getCanonicalPath() + "]" );
+                    }
+                    else if ( isVerbose() ) {
+                        ForesterUtil.printWarningMessage( getClass().getName(), "query \"" + query
+                                + "\" is not unique [line " + line_number + "] in ["
+                                + getInputFile().getCanonicalPath() + "]" );
+                    }
+                }
+                else {
+                    queries.add( query );
+                }
+                if ( current_protein != null ) {
+                    throw new IOException( "unexpected format [line " + line_number + "] in ["
+                            + getInputFile().getCanonicalPath() + "]" );
+                }
+                if ( getReturnType() == ReturnType.UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN ) {
+                    current_protein = new BasicProtein( query, getSpecies() );
+                }
+                else {
+                    throw new IllegalArgumentException( "unknown return type" );
+                }
+            }
+            else if ( line.startsWith( "Accession:" ) ) {
+                if ( !saw_query_sequence || ( current_protein == null ) ) {
+                    throw new IOException( "unexpected format [line " + line_number + "] in ["
+                            + getInputFile().getCanonicalPath() + "]" );
+                }
+                ( ( BasicProtein ) current_protein ).setAccession( line.substring( 11 ).trim() );
+            }
+            else if ( line.startsWith( "Description:" ) ) {
+                if ( !saw_query_sequence || ( current_protein == null ) ) {
+                    throw new IOException( "unexpected format [line " + line_number + "] in ["
+                            + getInputFile().getCanonicalPath() + "]" );
+                }
+                if ( was_not_unique ) {
+                    if ( getReturnType() == ReturnType.UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN ) {
+                        current_protein = new BasicProtein( current_protein.getProteinId() + " "
+                                + line.substring( 13 ).trim(), getSpecies() );
+                    }
+                }
+                else {
+                    ( ( BasicProtein ) current_protein ).setDescription( line.substring( 13 ).trim() );
+                }
+            }
+            else if ( line.startsWith( "Parsed for domains:" ) ) {
+                if ( !saw_query_sequence ) {
+                    throw new IOException( "unexpected format [line " + line_number + "] in ["
+                            + getInputFile().getCanonicalPath() + "]" );
+                }
+                saw_query_sequence = false;
+                saw_parsed_for_domains = true;
+            }
+            else if ( saw_parsed_for_domains && line.startsWith( "--------" ) ) {
+                can_parse_domains = true;
+                saw_parsed_for_domains = false;
+            }
+            else if ( line.startsWith( "Alignments of top-scoring domains:" ) ) {
+                if ( !can_parse_domains ) {
+                    throw new IOException( "unexpected format [line " + line_number + "] in ["
+                            + getInputFile().getCanonicalPath() + "]" );
+                }
+                can_parse_domains = false;
+            }
+            else if ( line.startsWith( "//" ) ) {
+                can_parse_domains = false;
+                saw_double_slash = true;
+                if ( current_protein.getProteinDomains().size() > 0 ) {
+                    if ( ( getMaxAllowedOverlap() != HmmPfamOutputParser.MAX_ALLOWED_OVERLAP_DEFAULT )
+                            || isIgnoreEngulfedDomains() ) {
+                        final int domains_count = current_protein.getNumberOfProteinDomains();
+                        current_protein = SurfacingUtil.removeOverlappingDomains( getMaxAllowedOverlap(),
+                                                                                  isIgnoreEngulfedDomains(),
+                                                                                  current_protein );
+                        final int domains_removed = domains_count - current_protein.getNumberOfProteinDomains();
+                        _domains_stored -= domains_removed;
+                        _domains_ignored_due_to_overlap += domains_removed;
+                    }
+                    addProtein( proteins, current_protein );
+                }
+                current_protein = null;
+            }
+            else if ( can_parse_domains && ( line.indexOf( "[no hits above thresholds]" ) == -1 ) ) {
+                final String[] s = line.split( "\\s+" );
+                if ( s.length != 10 ) {
+                    throw new IOException( "unexpected format in hmmpfam output:  \"" + line + "\" [line "
+                            + line_number + "] in [" + getInputFile().getCanonicalPath() + "]" );
+                }
+                final String id = s[ 0 ];
+                final String domain_count_str = s[ 1 ];
+                final String from_str = s[ 2 ];
+                final String to_str = s[ 3 ];
+                final String query_match_str = s[ 4 ];
+                final String hmm_match_str = s[ 7 ];
+                final String score_str = s[ 8 ];
+                final String e_value_str = s[ 9 ];
+                int from = -1;
+                int to = -1;
+                double e_value = -1;
+                double score = -1;
+                boolean is_complete_hmm_match = false;
+                boolean is_complete_query_match = false;
+                try {
+                    from = Integer.valueOf( from_str ).intValue();
+                }
+                catch ( final NumberFormatException e ) {
+                    throw new IOException( "could not parse seq-f from \"" + line + "\" [line " + line_number
+                            + "] in [" + getInputFile().getCanonicalPath() + "]" );
+                }
+                try {
+                    to = Integer.valueOf( to_str ).intValue();
+                }
+                catch ( final NumberFormatException e ) {
+                    throw new IOException( "could not parse seq-t from \"" + line + "\" [line " + line_number
+                            + "] in [" + getInputFile().getCanonicalPath() + "]" );
+                }
+                try {
+                    score = Double.valueOf( score_str ).doubleValue();
+                }
+                catch ( final NumberFormatException e ) {
+                    throw new IOException( "could not parse score from \"" + line + "\" [line " + line_number
+                            + "] in [" + getInputFile().getCanonicalPath() + "]" );
+                }
+                try {
+                    e_value = Double.valueOf( e_value_str ).doubleValue();
+                }
+                catch ( final NumberFormatException e ) {
+                    throw new IOException( "could not parse E-value from \"" + line + "\" [line " + line_number
+                            + "] in [" + getInputFile().getCanonicalPath() + "]" );
+                }
+                if ( hmm_match_str.equals( "[]" ) ) {
+                    is_complete_hmm_match = true;
+                }
+                else if ( !( hmm_match_str.equals( ".]" ) || hmm_match_str.equals( "[." ) || hmm_match_str
+                        .equals( ".." ) ) ) {
+                    throw new IOException( "unexpected format in hmmpfam output:  \"" + line + "\" [line "
+                            + line_number + "] in [" + getInputFile().getCanonicalPath() + "]" );
+                }
+                if ( query_match_str.equals( ".." ) ) {
+                    is_complete_query_match = true;
+                }
+                else if ( !( query_match_str.equals( ".]" ) || query_match_str.equals( "[." ) || query_match_str
+                        .equals( "[]" ) ) ) {
+                    throw new IOException( "unexpected format in hmmpfam output:  \"" + line + "\" [line "
+                            + line_number + "] in [" + getInputFile().getCanonicalPath() + "]" );
+                }
+                final String[] ct = domain_count_str.split( "/" );
+                if ( ct.length != 2 ) {
+                    throw new IOException( "unexpected format in hmmpfam output:  \"" + line + "\" [line "
+                            + line_number + "] in [" + getInputFile().getCanonicalPath() + "]" );
+                }
+                final String number_str = ct[ 0 ];
+                final String total_str = ct[ 1 ];
+                int number = -1;
+                int total = -1;
+                try {
+                    number = Integer.valueOf( ( number_str ) ).intValue();
+                }
+                catch ( final NumberFormatException e ) {
+                    throw new IOException( "could not parse domain number from \"" + line + "\" [line " + line_number
+                            + "] in [" + getInputFile().getCanonicalPath() + "]" );
+                }
+                try {
+                    total = Integer.valueOf( ( total_str ) ).intValue();
+                }
+                catch ( final NumberFormatException e ) {
+                    throw new IOException( "could not parse domain count from \"" + line + "\" [line " + line_number
+                            + "] in [" + getInputFile().getCanonicalPath() + "]" );
+                }
+                ++_domains_encountered;
+                boolean failed_cutoff = false;
+                if ( getIndividualDomainScoreCutoffs() != null ) {
+                    if ( getIndividualDomainScoreCutoffs().containsKey( id ) ) {
+                        final double cutoff = Double.parseDouble( getIndividualDomainScoreCutoffs().get( id ) );
+                        if ( score < cutoff ) {
+                            failed_cutoff = true;
+                        }
+                    }
+                    else {
+                        throw new IOException( "could not find a score cutoff value for domain id \"" + id
+                                + "\" [line " + line_number + "] in [" + getInputFile().getCanonicalPath() + "]" );
+                    }
+                }
+                final String uc_id = id.toUpperCase();
+                if ( failed_cutoff ) {
+                    ++_domains_ignored_due_to_individual_score_cutoff;
+                }
+                else if ( ( getEValueMaximum() != HmmPfamOutputParser.E_VALUE_MAXIMUM_DEFAULT )
+                        && ( e_value > getEValueMaximum() ) ) {
+                    ++_domains_ignored_due_to_e_value;
+                }
+                else if ( isIgnoreDufs() && uc_id.startsWith( "DUF" ) ) {
+                    ++_domains_ignored_due_to_duf;
+                }
+                else if ( isIgnoreVirusLikeIds()
+                        && ( uc_id.contains( VIR ) || uc_id.contains( PHAGE ) || uc_id.contains( RETRO )
+                                || uc_id.contains( TRANSPOS ) || uc_id.startsWith( RV ) || uc_id.startsWith( GAG )
+                                || uc_id.startsWith( HCV ) || uc_id.startsWith( HERPES ) ) ) {
+                    ForesterUtil.increaseCountingMap( getDomainsIgnoredDueToVirusLikeIdCountsMap(), id );
+                    ++_domains_ignored_due_to_virus_like_id;
+                }
+                else if ( ( getFilterType() == FilterType.NEGATIVE_DOMAIN )
+                        && getFilter().contains( new DomainId( id ) ) ) {
+                    ++_domains_ignored_due_to_negative_domain_filter;
+                    ForesterUtil.increaseCountingMap( getDomainsIgnoredDueToNegativeDomainFilterCountsMap(), id );
+                }
+                else {
+                    final BasicDomain pd = new BasicDomain( id,
+                                                            from,
+                                                            to,
+                                                            ( short ) number,
+                                                            ( short ) total,
+                                                            e_value,
+                                                            score );
+                    current_protein.addProteinDomain( pd );
+                    ++_domains_stored;
+                }
+            }
+        } // while ( ( line = br.readLine() ) != null )
+        setTime( new Date().getTime() - start_time );
+        if ( !saw_double_slash ) {
+            throw new IOException( "file ends unexpectedly [line " + line_number + "]" );
+        }
+        return proteins;
+    }
+
+    public void setAllowNonUniqueQuery( final boolean allow_non_unique_query ) {
+        _allow_non_unique_query = allow_non_unique_query;
+    }
+
+    private void setDomainsEncountered( final int domains_encountered ) {
+        _domains_encountered = domains_encountered;
+    }
+
+    private void setDomainsIgnoredDueToDuf( final int domains_ignored_due_to_duf ) {
+        _domains_ignored_due_to_duf = domains_ignored_due_to_duf;
+    }
+
+    public void setDomainsIgnoredDueToEval( final int domains_ignored_due_to_e_value ) {
+        _domains_ignored_due_to_e_value = domains_ignored_due_to_e_value;
+    }
+
+    public void setDomainsIgnoredDueToIndividualScoreCutoff( final int domains_ignored_due_to_individual_score_cutoff ) {
+        _domains_ignored_due_to_individual_score_cutoff = domains_ignored_due_to_individual_score_cutoff;
+    }
+
+    private void setDomainsIgnoredDueToNegativeDomainFilterCountsMap( final Map<String, Integer> domains_ignored_due_to_negative_domain_filter_counts_map ) {
+        _domains_ignored_due_to_negative_domain_filter_counts_map = domains_ignored_due_to_negative_domain_filter_counts_map;
+    }
+
+    private void setDomainsIgnoredDueToNegativeFilter( final int domains_ignored_due_to_negative_domain_filter ) {
+        _domains_ignored_due_to_negative_domain_filter = domains_ignored_due_to_negative_domain_filter;
+    }
+
+    private void setDomainsIgnoredDueToOverlap( final int domains_ignored_due_to_overlap ) {
+        _domains_ignored_due_to_overlap = domains_ignored_due_to_overlap;
+    }
+
+    private void setDomainsIgnoredDueToVirusLikeId( final int i ) {
+        _domains_ignored_due_to_virus_like_id = i;
+    }
+
+    private void setDomainsIgnoredDueToVirusLikeIdCountsMap( final Map<String, Integer> domains_ignored_due_to_virus_like_id_counts_map ) {
+        _domains_ignored_due_to_virus_like_id_counts_map = domains_ignored_due_to_virus_like_id_counts_map;
+    }
+
+    private void setDomainsStored( final int domains_stored ) {
+        _domains_stored = domains_stored;
+    }
+
+    private void setDomainsStoredSet( final SortedSet<DomainId> _storeddomains_stored ) {
+        _domains_stored_set = _storeddomains_stored;
+    }
+
+    public void setEValueMaximum( final double e_value_maximum ) {
+        if ( e_value_maximum < 0.0 ) {
+            throw new IllegalArgumentException( "attempt to set the maximum E-value to a negative value" );
+        }
+        _e_value_maximum = e_value_maximum;
+    }
+
+    public void setIgnoreDufs( final boolean ignore_dufs ) {
+        _ignore_dufs = ignore_dufs;
+    }
+
+    /**
+     * To ignore domains which are completely engulfed by domains (individual
+     * ones or stretches of overlapping ones) with better support values.
+     * 
+     * 
+     * @param ignored_engulfed_domains
+     */
+    public void setIgnoreEngulfedDomains( final boolean ignore_engulfed_domains ) {
+        _ignore_engulfed_domains = ignore_engulfed_domains;
+    }
+
+    public void setIgnoreVirusLikeIds( final boolean ignore_virus_like_ids ) {
+        _ignore_virus_like_ids = ignore_virus_like_ids;
+    }
+
+    /**
+     * Sets the individual domain score cutoff values (for example, gathering
+     * thresholds from Pfam). Domain ids are the keys, cutoffs the values.
+     * 
+     * @param individual_domain_score_cutoffs
+     */
+    public void setIndividualDomainScoreCutoffs( final Map<String, String> individual_domain_score_cutoffs ) {
+        _individual_domain_score_cutoffs = individual_domain_score_cutoffs;
+    }
+
+    public void setMaxAllowedOverlap( final int max_allowed_overlap ) {
+        if ( max_allowed_overlap < 0 ) {
+            throw new IllegalArgumentException( "Attempt to set max allowed overlap to less than zero." );
+        }
+        _max_allowed_overlap = max_allowed_overlap;
+    }
+
+    private void setProteinsEncountered( final int proteins_encountered ) {
+        _proteins_encountered = proteins_encountered;
+    }
+
+    private void setProteinsIgnoredDueToFilter( final int proteins_ignored_due_to_filter ) {
+        _proteins_ignored_due_to_filter = proteins_ignored_due_to_filter;
+    }
+
+    private void setProteinsStored( final int proteins_stored ) {
+        _proteins_stored = proteins_stored;
+    }
+
+    public void setReturnType( final ReturnType return_type ) {
+        _return_type = return_type;
+    }
+
+    private void setTime( final long time ) {
+        _time = time;
+    }
+
+    public void setVerbose( final boolean verbose ) {
+        _verbose = verbose;
+    }
+
+    public static enum FilterType {
+        NONE, POSITIVE_PROTEIN, NEGATIVE_PROTEIN, NEGATIVE_DOMAIN
+    }
+
+    public static enum ReturnType {
+        UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN
+    }
+}
diff --git a/forester/java/src/org/forester/io/parsers/HmmscanPerDomainTableParser.java b/forester/java/src/org/forester/io/parsers/HmmscanPerDomainTableParser.java

new file mode 100644 (file)

index 0000000..a63ba50
--- /dev/null
+++ b/forester/java/src/org/forester/io/parsers/HmmscanPerDomainTableParser.java
@@ -0,0 +1,595 @@
+// $Id:
+// $
+//
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.io.parsers;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Date;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.SortedSet;
+import java.util.TreeMap;
+import java.util.TreeSet;
+
+import org.forester.surfacing.BasicDomain;
+import org.forester.surfacing.BasicProtein;
+import org.forester.surfacing.Domain;
+import org.forester.surfacing.DomainId;
+import org.forester.surfacing.Protein;
+import org.forester.surfacing.SurfacingUtil;
+import org.forester.util.ForesterUtil;
+
+public final class HmmscanPerDomainTableParser {
+
+    private static final String           RETRO                       = "RETRO";
+    private static final String           PHAGE                       = "PHAGE";
+    private static final String           VIR                         = "VIR";
+    private static final String           TRANSPOS                    = "TRANSPOS";
+    private static final String           RV                          = "RV";
+    private static final String           GAG                         = "GAG_";
+    private static final String           HCV                         = "HCV_";
+    private static final String           HERPES                      = "HERPES_";
+    private static final String           BACULO                      = "BACULO_";
+    private static final int              E_VALUE_MAXIMUM_DEFAULT     = -1;
+    private static final ReturnType       RETURN_TYPE_DEFAULT         = ReturnType.UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN;
+    private static final boolean          IGNORE_DUFS_DEFAULT         = false;
+    private static final int              MAX_ALLOWED_OVERLAP_DEFAULT = -1;
+    private final Set<DomainId>           _filter;
+    private final FilterType              _filter_type;
+    private final File                    _input_file;
+    private final String                  _species;
+    private double                        _e_value_maximum;
+    private Map<String, Double>           _individual_score_cutoffs;
+    private boolean                       _ignore_dufs;
+    private boolean                       _ignore_virus_like_ids;
+    private int                           _max_allowed_overlap;
+    private boolean                       _ignore_engulfed_domains;
+    private ReturnType                    _return_type;
+    private int                           _proteins_encountered;
+    private int                           _proteins_ignored_due_to_filter;
+    private int                           _proteins_stored;
+    private int                           _domains_encountered;
+    private int                           _domains_ignored_due_to_duf;
+    private int                           _domains_ignored_due_to_overlap;
+    private int                           _domains_ignored_due_to_e_value;
+    private int                           _domains_ignored_due_to_individual_score_cutoff;
+    private int                           _domains_stored;
+    private SortedSet<DomainId>           _domains_stored_set;
+    private long                          _time;
+    private int                           _domains_ignored_due_to_negative_domain_filter;
+    private Map<String, Integer>          _domains_ignored_due_to_negative_domain_filter_counts_map;
+    private int                           _domains_ignored_due_to_virus_like_id;
+    private Map<String, Integer>          _domains_ignored_due_to_virus_like_id_counts_map;
+    private final INDIVIDUAL_SCORE_CUTOFF _ind_cutoff;
+
+    public HmmscanPerDomainTableParser( final File input_file,
+                                        final String species,
+                                        final INDIVIDUAL_SCORE_CUTOFF individual_cutoff_applies_to ) {
+        _input_file = input_file;
+        _species = species;
+        _filter = null;
+        _filter_type = FilterType.NONE;
+        _ind_cutoff = individual_cutoff_applies_to;
+        init();
+    }
+
+    public HmmscanPerDomainTableParser( final File input_file,
+                                        final String species,
+                                        final Set<DomainId> filter,
+                                        final FilterType filter_type,
+                                        final INDIVIDUAL_SCORE_CUTOFF individual_cutoff_applies_to ) {
+        _input_file = input_file;
+        _species = species;
+        _filter = filter;
+        _filter_type = filter_type;
+        _ind_cutoff = individual_cutoff_applies_to;
+        init();
+    }
+
+    private void actuallyAddProtein( final List<Protein> proteins, final Protein current_protein ) {
+        final List<Domain> l = current_protein.getProteinDomains();
+        for( final Domain d : l ) {
+            getDomainsStoredSet().add( d.getDomainId() );
+        }
+        proteins.add( current_protein );
+        ++_proteins_stored;
+    }
+
+    private void addProtein( final List<Protein> proteins, Protein current_protein ) {
+        if ( ( getMaxAllowedOverlap() != HmmscanPerDomainTableParser.MAX_ALLOWED_OVERLAP_DEFAULT )
+                || isIgnoreEngulfedDomains() ) {
+            final int domains_count = current_protein.getNumberOfProteinDomains();
+            current_protein = SurfacingUtil.removeOverlappingDomains( getMaxAllowedOverlap(),
+                                                                      isIgnoreEngulfedDomains(),
+                                                                      current_protein );
+            final int domains_removed = domains_count - current_protein.getNumberOfProteinDomains();
+            _domains_stored -= domains_removed;
+            _domains_ignored_due_to_overlap += domains_removed;
+        }
+        if ( ( getFilterType() == FilterType.POSITIVE_PROTEIN ) || ( getFilterType() == FilterType.NEGATIVE_PROTEIN ) ) {
+            final Set<DomainId> domain_ids_in_protein = new HashSet<DomainId>();
+            for( final Domain d : current_protein.getProteinDomains() ) {
+                domain_ids_in_protein.add( d.getDomainId() );
+            }
+            domain_ids_in_protein.retainAll( getFilter() );
+            if ( getFilterType() == FilterType.POSITIVE_PROTEIN ) {
+                if ( domain_ids_in_protein.size() > 0 ) {
+                    actuallyAddProtein( proteins, current_protein );
+                }
+                else {
+                    ++_proteins_ignored_due_to_filter;
+                }
+            }
+            else {
+                if ( domain_ids_in_protein.size() < 1 ) {
+                    actuallyAddProtein( proteins, current_protein );
+                }
+                else {
+                    ++_proteins_ignored_due_to_filter;
+                }
+            }
+        }
+        else {
+            actuallyAddProtein( proteins, current_protein );
+        }
+    }
+
+    public int getDomainsEncountered() {
+        return _domains_encountered;
+    }
+
+    public int getDomainsIgnoredDueToDuf() {
+        return _domains_ignored_due_to_duf;
+    }
+
+    public int getDomainsIgnoredDueToEval() {
+        return _domains_ignored_due_to_e_value;
+    }
+
+    public int getDomainsIgnoredDueToIndividualScoreCutoff() {
+        return _domains_ignored_due_to_individual_score_cutoff;
+    }
+
+    public int getDomainsIgnoredDueToNegativeDomainFilter() {
+        return _domains_ignored_due_to_negative_domain_filter;
+    }
+
+    public Map<String, Integer> getDomainsIgnoredDueToNegativeDomainFilterCountsMap() {
+        return _domains_ignored_due_to_negative_domain_filter_counts_map;
+    }
+
+    public int getDomainsIgnoredDueToOverlap() {
+        return _domains_ignored_due_to_overlap;
+    }
+
+    public Map<String, Integer> getDomainsIgnoredDueToVirusLikeIdCountsMap() {
+        return _domains_ignored_due_to_virus_like_id_counts_map;
+    }
+
+    public int getDomainsIgnoredDueToVirusLikeIds() {
+        return _domains_ignored_due_to_virus_like_id;
+    }
+
+    public int getDomainsStored() {
+        return _domains_stored;
+    }
+
+    public SortedSet<DomainId> getDomainsStoredSet() {
+        return _domains_stored_set;
+    }
+
+    private double getEValueMaximum() {
+        return _e_value_maximum;
+    }
+
+    private Set<DomainId> getFilter() {
+        return _filter;
+    }
+
+    private FilterType getFilterType() {
+        return _filter_type;
+    }
+
+    public INDIVIDUAL_SCORE_CUTOFF getIndividualCutoffAppliesTo() {
+        return _ind_cutoff;
+    }
+
+    private Map<String, Double> getIndividualScoreCutoffs() {
+        return _individual_score_cutoffs;
+    }
+
+    private File getInputFile() {
+        return _input_file;
+    }
+
+    private int getMaxAllowedOverlap() {
+        return _max_allowed_overlap;
+    }
+
+    public int getProteinsEncountered() {
+        return _proteins_encountered;
+    }
+
+    public int getProteinsIgnoredDueToFilter() {
+        return _proteins_ignored_due_to_filter;
+    }
+
+    public int getProteinsStored() {
+        return _proteins_stored;
+    }
+
+    private ReturnType getReturnType() {
+        return _return_type;
+    }
+
+    private String getSpecies() {
+        return _species;
+    }
+
+    public long getTime() {
+        return _time;
+    }
+
+    private void init() {
+        _e_value_maximum = HmmscanPerDomainTableParser.E_VALUE_MAXIMUM_DEFAULT;
+        setIgnoreDufs( HmmscanPerDomainTableParser.IGNORE_DUFS_DEFAULT );
+        setReturnType( HmmscanPerDomainTableParser.RETURN_TYPE_DEFAULT );
+        _max_allowed_overlap = HmmscanPerDomainTableParser.MAX_ALLOWED_OVERLAP_DEFAULT;
+        setIndividualScoreCutoffs( null );
+        setIgnoreEngulfedDomains( false );
+        setIgnoreVirusLikeIds( false );
+        intitCounts();
+    }
+
+    private void intitCounts() {
+        setDomainsStoredSet( new TreeSet<DomainId>() );
+        setDomainsEncountered( 0 );
+        setProteinsEncountered( 0 );
+        setProteinsIgnoredDueToFilter( 0 );
+        setDomainsIgnoredDueToNegativeFilter( 0 );
+        setDomainsIgnoredDueToDuf( 0 );
+        setDomainsIgnoredDueToEval( 0 );
+        setDomainsIgnoredDueToIndividualScoreCutoff( 0 );
+        setDomainsIgnoredDueToVirusLikeId( 0 );
+        setDomainsIgnoredDueToOverlap( 0 );
+        setDomainsStored( 0 );
+        setProteinsStored( 0 );
+        setTime( 0 );
+        setDomainsIgnoredDueToVirusLikeIdCountsMap( new TreeMap<String, Integer>() );
+        setDomainsIgnoredDueToNegativeDomainFilterCountsMap( new TreeMap<String, Integer>() );
+    }
+
+    private boolean isIgnoreDufs() {
+        return _ignore_dufs;
+    }
+
+    private boolean isIgnoreEngulfedDomains() {
+        return _ignore_engulfed_domains;
+    }
+
+    private boolean isIgnoreVirusLikeIds() {
+        return _ignore_virus_like_ids;
+    }
+
+    public List<Protein> parse() throws IOException {
+        if ( ( getIndividualCutoffAppliesTo() != INDIVIDUAL_SCORE_CUTOFF.NONE )
+                && ( ( getIndividualScoreCutoffs() == null ) || ( getIndividualScoreCutoffs().size() < 1 ) ) ) {
+            throw new RuntimeException( "attempt to use individual cuttoffs with having set them" );
+        }
+        intitCounts();
+        final Set<String> prev_queries = new HashSet<String>();
+        final String error = ForesterUtil.isReadableFile( getInputFile() );
+        if ( !ForesterUtil.isEmpty( error ) ) {
+            throw new IOException( error );
+        }
+        final BufferedReader br = new BufferedReader( new FileReader( getInputFile() ) );
+        String line;
+        final List<Protein> proteins = new ArrayList<Protein>();
+        Protein current_protein = null;
+        int line_number = 0;
+        final long start_time = new Date().getTime();
+        String prev_query = "";
+        int prev_qlen = -1;
+        while ( ( line = br.readLine() ) != null ) {
+            line_number++;
+            if ( ForesterUtil.isEmpty( line ) || line.startsWith( "#" ) ) {
+                continue;
+            }
+            // 0                    1           2    3                      4           5      6        7      8      9  10  11        12        13     14    15      16  17      18  19      20  21  22      
+            // #                                                                              --- full sequence --- -------------- this domain -------------   hmm coord   ali coord   env coord
+            // # target name        accession   tlen query name             accession   qlen   E-value  score  bias   #  of  c-Evalue  i-Evalue  score  bias  from    to  from    to  from    to  acc description of target
+            // #------------------- ---------- -----   -------------------- ---------- ----- --------- ------ ----- --- --- --------- --------- ------ ----- ----- ----- ----- ----- ----- ----- ---- ---------------------
+            // Ion_trans            PF00520.24   201 jgi|Nemve1|7|gw.28.1.1 -           1604  6.3e-169  557.4  95.3   1   4   1.5e-41     3e-38  130.8  11.1     3   171   140   307   139   346 0.81 Ion transport protein
+            // Ion_trans            PF00520.24   201 jgi|Nemve1|7|gw.28.1.1 -           1604  6.3e-169  557.4  95.3   2   4   9.1e-45   1.8e-41  141.3  13.1     4   200   479   664   476   665 0.97 Ion transport protein
+            // Ion_trans            PF00520.24   201 jgi|Nemve1|7|gw.28.1.1 -           1604  6.3e-169  557.4  95.3   3   4   5.2e-45     1e-41  142.1  14.0     1   201   900  1117   900  1117 0.96 Ion transport protein
+            // Ion_trans            PF00520.24   201 jgi|Nemve1|7|gw.28.1.1 -           1604  6.3e-169  557.4  95.3   4   4   9.2e-51   1.8e-47  160.9  11.3     1   201  1217  1423  1217  1423 0.97 Ion transport protein
+            // PKD_channel          PF08016.5    426 jgi|Nemve1|7|gw.28.1.1 -           1604   5.9e-19   67.4  70.5   1   8   0.00053       1.1    7.3   0.4   220   264   142   191   134   200 0.73 Polycystin cation channel
+            final String tokens[] = line.split( "\\s+" );
+            final String target_id = tokens[ 0 ];
+            final String target_acc = tokens[ 1 ];
+            final int tlen = parseInt( tokens[ 2 ], line_number, "tlen" );
+            final String query = tokens[ 3 ];
+            final String query_acc = tokens[ 4 ];
+            final int qlen = parseInt( tokens[ 5 ], line_number, "qlen" );
+            final double fs_e_value = parseDouble( tokens[ 6 ], line_number, "E-value" );
+            final double fs_score = parseDouble( tokens[ 7 ], line_number, "score" );
+            final int domain_number = parseInt( tokens[ 9 ], line_number, "count" );
+            final int total_domains = parseInt( tokens[ 10 ], line_number, "total" );
+            final double c_e_value = parseDouble( tokens[ 11 ], line_number, "c-Evalue" );
+            final double i_e_value = parseDouble( tokens[ 12 ], line_number, "i-Evalue" );
+            final double domain_score = parseDouble( tokens[ 13 ], line_number, "score" );
+            final int hmm_from = parseInt( tokens[ 15 ], line_number, "hmm from" );
+            final int hmm_to = parseInt( tokens[ 16 ], line_number, "hmm to" );
+            final int ali_from = parseInt( tokens[ 17 ], line_number, "ali from" );
+            final int ali_to = parseInt( tokens[ 18 ], line_number, "ali to" );
+            final int env_from = parseInt( tokens[ 19 ], line_number, "env from" );
+            final int env_to = parseInt( tokens[ 20 ], line_number, "env to" );
+            ++_domains_encountered;
+            if ( !query.equals( prev_query ) || ( qlen != prev_qlen ) ) {
+                if ( query.equals( prev_query ) ) {
+                    throw new IOException( "more than one protein named [" + query + "]" + " lengths: " + qlen + ", "
+                            + prev_qlen );
+                }
+                if ( prev_queries.contains( query ) ) {
+                    throw new IOException( "more than one protein named [" + query + "]" );
+                }
+                prev_query = query;
+                prev_qlen = qlen;
+                prev_queries.add( query );
+                if ( ( current_protein != null ) && ( current_protein.getProteinDomains().size() > 0 ) ) {
+                    addProtein( proteins, current_protein );
+                }
+                if ( getReturnType() == ReturnType.UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN ) {
+                    current_protein = new BasicProtein( query, getSpecies() );
+                }
+                else {
+                    throw new IllegalArgumentException( "unknown return type" );
+                }
+            }
+            boolean failed_cutoff = false;
+            if ( getIndividualCutoffAppliesTo() != INDIVIDUAL_SCORE_CUTOFF.NONE ) {
+                if ( getIndividualScoreCutoffs().containsKey( target_id ) ) {
+                    final double cutoff = getIndividualScoreCutoffs().get( target_id );
+                    if ( getIndividualCutoffAppliesTo() != INDIVIDUAL_SCORE_CUTOFF.FULL_SEQUENCE ) {
+                        if ( fs_score < cutoff ) {
+                            failed_cutoff = true;
+                        }
+                    }
+                    else if ( getIndividualCutoffAppliesTo() != INDIVIDUAL_SCORE_CUTOFF.DOMAIN ) {
+                        if ( domain_score < cutoff ) {
+                            failed_cutoff = true;
+                        }
+                    }
+                }
+                else {
+                    throw new IOException( "could not find a score cutoff value for domain id \"" + target_id
+                            + "\" [line " + line_number + "] in [" + getInputFile().getCanonicalPath() + "]" );
+                }
+            }
+            final String uc_id = target_id.toUpperCase();
+            if ( failed_cutoff ) {
+                ++_domains_ignored_due_to_individual_score_cutoff;
+            }
+            else if ( ali_from == ali_to ) {
+                //Ignore
+            }
+            else if ( ( getEValueMaximum() != HmmscanPerDomainTableParser.E_VALUE_MAXIMUM_DEFAULT )
+                    && ( fs_e_value > getEValueMaximum() ) ) {
+                ++_domains_ignored_due_to_e_value;
+            }
+            else if ( isIgnoreDufs() && uc_id.startsWith( "DUF" ) ) {
+                ++_domains_ignored_due_to_duf;
+            }
+            else if ( isIgnoreVirusLikeIds()
+                    && ( uc_id.contains( VIR ) || uc_id.contains( PHAGE ) || uc_id.contains( RETRO )
+                            || uc_id.contains( TRANSPOS ) || uc_id.startsWith( RV ) || uc_id.startsWith( GAG )
+                            || uc_id.startsWith( HCV ) || uc_id.startsWith( HERPES ) || uc_id.startsWith( BACULO ) ) ) {
+                ForesterUtil.increaseCountingMap( getDomainsIgnoredDueToVirusLikeIdCountsMap(), target_id );
+                ++_domains_ignored_due_to_virus_like_id;
+            }
+            else if ( ( getFilterType() == FilterType.NEGATIVE_DOMAIN )
+                    && getFilter().contains( new DomainId( target_id ) ) ) {
+                ++_domains_ignored_due_to_negative_domain_filter;
+                ForesterUtil.increaseCountingMap( getDomainsIgnoredDueToNegativeDomainFilterCountsMap(), target_id );
+            }
+            else {
+                try {
+                    final Domain pd = new BasicDomain( target_id,
+                                                       ali_from,
+                                                       ali_to,
+                                                       ( short ) domain_number,
+                                                       ( short ) total_domains,
+                                                       fs_e_value,
+                                                       fs_score,
+                                                       i_e_value,
+                                                       domain_score );
+                    current_protein.addProteinDomain( pd );
+                }
+                catch ( final IllegalArgumentException e ) {
+                    throw new IOException( "problem with domain parsing at line " + line_number + "[" + line + "]: "
+                            + e.getMessage() );
+                }
+                ++_domains_stored;
+            }
+        } // while ( ( line = br.readLine() ) != null )
+        if ( ( current_protein != null ) && ( current_protein.getProteinDomains().size() > 0 ) ) {
+            addProtein( proteins, current_protein );
+        }
+        setProteinsEncountered( prev_queries.size() );
+        setTime( new Date().getTime() - start_time );
+        return proteins;
+    }
+
+    private double parseDouble( final String double_str, final int line_number, final String label ) throws IOException {
+        double d = -1;
+        try {
+            d = Double.valueOf( double_str ).doubleValue();
+        }
+        catch ( final NumberFormatException e ) {
+            throw new IOException( "could not parse \" +label + \" from \"" + double_str + "\" [line " + line_number
+                    + "] in [" + getInputFile().getCanonicalPath() + "]" );
+        }
+        return d;
+    }
+
+    private int parseInt( final String double_str, final int line_number, final String label ) throws IOException {
+        int i = -1;
+        try {
+            i = Integer.valueOf( double_str ).intValue();
+        }
+        catch ( final NumberFormatException e ) {
+            throw new IOException( "could not parse \"" + label + "\" from \"" + double_str + "\" [line " + line_number
+                    + "] in [" + getInputFile().getCanonicalPath() + "]" );
+        }
+        return i;
+    }
+
+    private void setDomainsEncountered( final int domains_encountered ) {
+        _domains_encountered = domains_encountered;
+    }
+
+    private void setDomainsIgnoredDueToDuf( final int domains_ignored_due_to_duf ) {
+        _domains_ignored_due_to_duf = domains_ignored_due_to_duf;
+    }
+
+    public void setDomainsIgnoredDueToEval( final int domains_ignored_due_to_e_value ) {
+        _domains_ignored_due_to_e_value = domains_ignored_due_to_e_value;
+    }
+
+    public void setDomainsIgnoredDueToIndividualScoreCutoff( final int domains_ignored_due_to_individual_score_cutoff ) {
+        _domains_ignored_due_to_individual_score_cutoff = domains_ignored_due_to_individual_score_cutoff;
+    }
+
+    private void setDomainsIgnoredDueToNegativeDomainFilterCountsMap( final Map<String, Integer> domains_ignored_due_to_negative_domain_filter_counts_map ) {
+        _domains_ignored_due_to_negative_domain_filter_counts_map = domains_ignored_due_to_negative_domain_filter_counts_map;
+    }
+
+    private void setDomainsIgnoredDueToNegativeFilter( final int domains_ignored_due_to_negative_domain_filter ) {
+        _domains_ignored_due_to_negative_domain_filter = domains_ignored_due_to_negative_domain_filter;
+    }
+
+    private void setDomainsIgnoredDueToOverlap( final int domains_ignored_due_to_overlap ) {
+        _domains_ignored_due_to_overlap = domains_ignored_due_to_overlap;
+    }
+
+    private void setDomainsIgnoredDueToVirusLikeId( final int i ) {
+        _domains_ignored_due_to_virus_like_id = i;
+    }
+
+    private void setDomainsIgnoredDueToVirusLikeIdCountsMap( final Map<String, Integer> domains_ignored_due_to_virus_like_id_counts_map ) {
+        _domains_ignored_due_to_virus_like_id_counts_map = domains_ignored_due_to_virus_like_id_counts_map;
+    }
+
+    private void setDomainsStored( final int domains_stored ) {
+        _domains_stored = domains_stored;
+    }
+
+    private void setDomainsStoredSet( final SortedSet<DomainId> _storeddomains_stored ) {
+        _domains_stored_set = _storeddomains_stored;
+    }
+
+    public void setEValueMaximum( final double e_value_maximum ) {
+        if ( e_value_maximum < 0.0 ) {
+            throw new IllegalArgumentException( "attempt to set the maximum E-value to a negative value" );
+        }
+        _e_value_maximum = e_value_maximum;
+    }
+
+    public void setIgnoreDufs( final boolean ignore_dufs ) {
+        _ignore_dufs = ignore_dufs;
+    }
+
+    /**
+     * To ignore domains which are completely engulfed by domains (individual
+     * ones or stretches of overlapping ones) with better support values.
+     * 
+     * 
+     * @param ignored_engulfed_domains
+     */
+    public void setIgnoreEngulfedDomains( final boolean ignore_engulfed_domains ) {
+        _ignore_engulfed_domains = ignore_engulfed_domains;
+    }
+
+    public void setIgnoreVirusLikeIds( final boolean ignore_virus_like_ids ) {
+        _ignore_virus_like_ids = ignore_virus_like_ids;
+    }
+
+    /**
+     * Sets the individual  score cutoff values (for example, gathering
+     * thresholds from Pfam). Domain ids are the keys, cutoffs the values.
+     * 
+     * @param individual_score_cutoffs
+     */
+    public void setIndividualScoreCutoffs( final Map<String, Double> individual_score_cutoffs ) {
+        _individual_score_cutoffs = individual_score_cutoffs;
+    }
+
+    public void setMaxAllowedOverlap( final int max_allowed_overlap ) {
+        if ( max_allowed_overlap < 0 ) {
+            throw new IllegalArgumentException( "Attempt to set max allowed overlap to less than zero." );
+        }
+        _max_allowed_overlap = max_allowed_overlap;
+    }
+
+    private void setProteinsEncountered( final int proteins_encountered ) {
+        _proteins_encountered = proteins_encountered;
+    }
+
+    private void setProteinsIgnoredDueToFilter( final int proteins_ignored_due_to_filter ) {
+        _proteins_ignored_due_to_filter = proteins_ignored_due_to_filter;
+    }
+
+    private void setProteinsStored( final int proteins_stored ) {
+        _proteins_stored = proteins_stored;
+    }
+
+    public void setReturnType( final ReturnType return_type ) {
+        _return_type = return_type;
+    }
+
+    private void setTime( final long time ) {
+        _time = time;
+    }
+
+    public static enum FilterType {
+        NONE, POSITIVE_PROTEIN, NEGATIVE_PROTEIN, NEGATIVE_DOMAIN
+    }
+
+    static public enum INDIVIDUAL_SCORE_CUTOFF {
+        FULL_SEQUENCE, DOMAIN, NONE;
+    }
+
+    public static enum ReturnType {
+        UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN
+    }
+}
diff --git a/forester/java/src/org/forester/io/parsers/PhylogenyParser.java b/forester/java/src/org/forester/io/parsers/PhylogenyParser.java

new file mode 100644 (file)

index 0000000..d319d96
--- /dev/null
+++ b/forester/java/src/org/forester/io/parsers/PhylogenyParser.java
@@ -0,0 +1,44 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.io.parsers;
+
+import java.io.IOException;
+
+import org.forester.io.parsers.util.PhylogenyParserException;
+import org.forester.phylogeny.Phylogeny;
+
+/*
+ * @author Christian Zmasek
+ * 
+ * TODO To change the template for this generated type comment go to Window -
+ * Preferences - Java - Code Style - Code Templates
+ */
+public interface PhylogenyParser {
+
+    public Phylogeny[] parse() throws IOException;
+
+    public void setSource( Object source ) throws PhylogenyParserException, IOException;
+}
diff --git a/forester/java/src/org/forester/io/parsers/SymmetricalDistanceMatrixParser.java b/forester/java/src/org/forester/io/parsers/SymmetricalDistanceMatrixParser.java

new file mode 100644 (file)

index 0000000..b9df246
--- /dev/null
+++ b/forester/java/src/org/forester/io/parsers/SymmetricalDistanceMatrixParser.java
@@ -0,0 +1,196 @@
+// $Id:
+// Exp $
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.io.parsers;
+
+import java.io.IOException;
+import java.util.List;
+
+import org.forester.evoinference.matrix.distance.BasicSymmetricalDistanceMatrix;
+import org.forester.evoinference.matrix.distance.DistanceMatrix;
+import org.forester.util.BasicTable;
+import org.forester.util.BasicTableParser;
+import org.forester.util.ForesterUtil;
+
+/*
+ * This can read full, lower triangular, and upper triangular distance matrices.
+ * In the case of a full matrix, the lower triangular values are used. Format
+ * (by example): id1 0 id2 0.3 0 id3 0.4 0.4 0
+ * 
+ * OR
+ * 
+ * id1 id2 0.3 id3 0.4 0.4
+ * 
+ * Numbers before are after the data are ignored.
+ * 
+ * 
+ * 
+ * 
+ * @author Christian M Zmasek
+ */
+public class SymmetricalDistanceMatrixParser {
+
+    private final static InputMatrixType INPUT_MATRIX_TYPE_DEFAULT = InputMatrixType.LOWER_TRIANGLE;
+    private final static String          COMMENT                   = "#";
+    private final static String          VALUE_SEPARATOR           = " ";
+    private int                          _matrix_size;
+    private InputMatrixType              _input_matrix_type;
+
+    private SymmetricalDistanceMatrixParser() {
+        init();
+    }
+
+    private void checkValueIsZero( final BasicTable<String> table, final int row, final int i, final int start_row )
+            throws IOException {
+        double d = 0.0;
+        final String table_value = table.getValue( i, row + start_row );
+        if ( ForesterUtil.isEmpty( table_value ) ) {
+            throw new IOException( "value is null or empty at [" + ( i - 1 ) + ", " + row + "]" );
+        }
+        try {
+            d = Double.parseDouble( table_value );
+        }
+        catch ( final NumberFormatException e ) {
+            throw new IOException( "illegal format for distance [" + table_value + "] at [" + ( i - 1 ) + ", " + row
+                    + "]" );
+        }
+        if ( !ForesterUtil.isEqual( 0.0, d ) ) {
+            throw new IOException( "attempt to use non-zero diagonal value [" + table_value + "] at [" + ( i - 1 )
+                    + ", " + row + "]" );
+        }
+    }
+
+    private InputMatrixType getInputMatrixType() {
+        return _input_matrix_type;
+    }
+
+    private int getMatrixSize() {
+        return _matrix_size;
+    }
+
+    private void init() {
+        setInputMatrixType( INPUT_MATRIX_TYPE_DEFAULT );
+        reset();
+    }
+
+    public DistanceMatrix[] parse( final Object source ) throws IOException {
+        reset();
+        final List<BasicTable<String>> tables = BasicTableParser.parse( source, VALUE_SEPARATOR, false, COMMENT, true );
+        final DistanceMatrix[] distance_matrices = new DistanceMatrix[ tables.size() ];
+        int i = 0;
+        for( final BasicTable<String> table : tables ) {
+            distance_matrices[ i++ ] = transform( table );
+        }
+        return distance_matrices;
+    }
+
+    private void reset() {
+        setMatrixSize( -1 );
+    }
+
+    public void setInputMatrixType( final InputMatrixType input_matrix_type ) {
+        _input_matrix_type = input_matrix_type;
+    }
+
+    private void setMatrixSize( final int matrix_size ) {
+        _matrix_size = matrix_size;
+    }
+
+    private void transferValue( final BasicTable<String> table,
+                                final DistanceMatrix distance_matrix,
+                                final int row,
+                                final int col,
+                                final int start_row,
+                                final int col_offset ) throws IOException {
+        double d = 0.0;
+        final String table_value = table.getValue( col, row + start_row );
+        if ( ForesterUtil.isEmpty( table_value ) ) {
+            throw new IOException( "value is null or empty at [" + ( col - 1 ) + ", " + row + "]" );
+        }
+        try {
+            d = Double.parseDouble( table_value );
+        }
+        catch ( final NumberFormatException e ) {
+            throw new IOException( "illegal format for distance [" + table_value + "] at [" + ( col - 1 ) + ", " + row
+                    + "]" );
+        }
+        distance_matrix.setValue( col - 1 + col_offset, row, d );
+    }
+
+    private DistanceMatrix transform( final BasicTable<String> table ) throws IllegalArgumentException, IOException {
+        boolean first_line_is_size = false;
+        if ( table.getNumberOfColumns() < 3 ) {
+            throw new IllegalArgumentException( "attempt to create distance matrix with with less than 3 columns [columns: "
+                    + table.getNumberOfColumns() + ", rows: " + table.getNumberOfRows() + "]" );
+        }
+        if ( table.getNumberOfColumns() == table.getNumberOfRows() ) {
+            first_line_is_size = true;
+        }
+        else if ( table.getNumberOfColumns() != table.getNumberOfRows() + 1 ) {
+            throw new IllegalArgumentException( "attempt to create distance matrix with illegal dimensions [columns: "
+                    + table.getNumberOfColumns() + ", rows: " + table.getNumberOfRows() + "]" );
+        }
+        final DistanceMatrix distance_matrix = new BasicSymmetricalDistanceMatrix( table.getNumberOfColumns() - 1 );
+        int start_row = 0;
+        if ( first_line_is_size ) {
+            start_row = 1;
+        }
+        for( int row = 0; row < table.getNumberOfRows() - start_row; row++ ) {
+            distance_matrix.setIdentifier( row, table.getValue( 0, row + start_row ) );
+            switch ( getInputMatrixType() ) {
+                case LOWER_TRIANGLE:
+                    for( int col = 1; col <= row; ++col ) {
+                        transferValue( table, distance_matrix, row, col, start_row, 0 );
+                    }
+                    checkValueIsZero( table, row, row + 1, start_row );
+                    break;
+                case UPPER_TRIANGLE:
+                    for( int col = 1; col < ( table.getNumberOfColumns() - row ); ++col ) {
+                        transferValue( table, distance_matrix, row, col, start_row, row );
+                    }
+                    break;
+                default:
+                    throw new AssertionError( "unkwnown input matrix type [" + getInputMatrixType() + "]" );
+            }
+        }
+        if ( getMatrixSize() < 1 ) {
+            setMatrixSize( distance_matrix.getSize() );
+        }
+        else if ( getMatrixSize() != distance_matrix.getSize() ) {
+            throw new IOException( "attempt to use matrices of unequal size: [" + getMatrixSize() + "] vs ["
+                    + distance_matrix.getSize() + "]" );
+        }
+        return distance_matrix;
+    }
+
+    public static SymmetricalDistanceMatrixParser createInstance() {
+        return new SymmetricalDistanceMatrixParser();
+    }
+
+    public enum InputMatrixType {
+        UPPER_TRIANGLE, LOWER_TRIANGLE
+    }
+}
diff --git a/forester/java/src/org/forester/io/parsers/nexus/NexusBinaryStatesMatrixParser.java b/forester/java/src/org/forester/io/parsers/nexus/NexusBinaryStatesMatrixParser.java

new file mode 100644 (file)

index 0000000..a9d9d3d
--- /dev/null
+++ b/forester/java/src/org/forester/io/parsers/nexus/NexusBinaryStatesMatrixParser.java
@@ -0,0 +1,167 @@
+// $Id:
+// Exp $
+//
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2009-2010 Christian M. Zmasek
+// Copyright (C) 2009-2010 Burnham Institute for Medical Research
+// All rights reserved
+//
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester/
+
+package org.forester.io.parsers.nexus;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+
+import org.forester.evoinference.matrix.character.BasicCharacterStateMatrix;
+import org.forester.evoinference.matrix.character.CharacterStateMatrix;
+import org.forester.evoinference.matrix.character.CharacterStateMatrix.BinaryStates;
+import org.forester.io.parsers.util.ParserUtils;
+import org.forester.io.parsers.util.PhylogenyParserException;
+
+public class NexusBinaryStatesMatrixParser {
+
+    private Object                             _nexus_source;
+    private CharacterStateMatrix<BinaryStates> _matrix;
+    private int                                _nchar;
+    private int                                _ntax;
+
+    public CharacterStateMatrix<BinaryStates> getMatrix() {
+        return _matrix;
+    }
+
+    public int getNChar() {
+        return _nchar;
+    }
+
+    private Object getNexusSource() {
+        return _nexus_source;
+    }
+
+    public int getNTax() {
+        return _ntax;
+    }
+
+    public void parse() throws IOException {
+        reset();
+        final BufferedReader reader = ParserUtils.createReader( getNexusSource() );
+        String line;
+        boolean in_matrix = false;
+        int identifier_index = 0;
+        int max_character_index = -1;
+        while ( ( line = reader.readLine() ) != null ) {
+            line = line.trim();
+            if ( ( line.length() > 0 ) && !line.startsWith( "#" ) && !line.startsWith( ">" ) ) {
+                if ( line.toLowerCase().indexOf( NexusConstants.NCHAR.toLowerCase() ) >= 0 ) {
+                    final int i = line.toLowerCase().indexOf( NexusConstants.NCHAR.toLowerCase() );
+                    String s = line.toLowerCase().substring( i + 6 );
+                    s = s.replace( ';', ' ' ).trim();
+                    setNChar( Integer.parseInt( s ) );
+                }
+                else if ( line.toLowerCase().indexOf( NexusConstants.NTAX.toLowerCase() ) >= 0 ) {
+                    final int i = line.toLowerCase().indexOf( NexusConstants.NTAX.toLowerCase() );
+                    String s = line.toLowerCase().substring( i + 5 );
+                    s = s.replace( ';', ' ' ).trim();
+                    setNTax( Integer.parseInt( s ) );
+                }
+                else if ( line.toLowerCase().startsWith( NexusConstants.MATRIX.toLowerCase() ) ) {
+                    in_matrix = true;
+                    if ( getNTax() < 1 ) {
+                        throw new NexusFormatException( "did not encounter " + NexusConstants.NTAX );
+                    }
+                    if ( getNChar() < 1 ) {
+                        throw new NexusFormatException( "did not encounter " + NexusConstants.NCHAR );
+                    }
+                    if ( getMatrix() != null ) {
+                        throw new NexusFormatException( "more than one matrix present" );
+                    }
+                    setMatrix( new BasicCharacterStateMatrix<BinaryStates>( getNTax(), getNChar() ) );
+                }
+                else if ( line.toLowerCase().startsWith( NexusConstants.END.toLowerCase() ) ) {
+                    in_matrix = false;
+                }
+                else if ( in_matrix ) {
+                    final String[] line_ary = line.split( "\\s+" );
+                    final String label = line_ary[ 0 ].trim();
+                    String states_str = line_ary[ 1 ].trim();
+                    if ( states_str.endsWith( ";" ) ) {
+                        in_matrix = false;
+                        states_str = states_str.substring( 0, states_str.length() - 1 );
+                    }
+                    final char[] states = states_str.toCharArray();
+                    getMatrix().setIdentifier( identifier_index, label );
+                    int character_index = 0;
+                    for( final char state : states ) {
+                        if ( state == BinaryStates.PRESENT.toChar() ) {
+                            try {
+                                getMatrix().setState( identifier_index, character_index, BinaryStates.PRESENT );
+                            }
+                            catch ( final ArrayIndexOutOfBoundsException ex ) {
+                                throw new NexusFormatException( "problem at line " + line + " [" + ex + "]" );
+                            }
+                        }
+                        else if ( state == BinaryStates.ABSENT.toChar() ) {
+                            try {
+                                getMatrix().setState( identifier_index, character_index, BinaryStates.ABSENT );
+                            }
+                            catch ( final ArrayIndexOutOfBoundsException ex ) {
+                                throw new NexusFormatException( "problem at line " + line + " [" + ex + "]" );
+                            }
+                        }
+                        else {
+                            throw new NexusFormatException( "illegal state " + state );
+                        }
+                        ++character_index;
+                    }
+                    if ( ( max_character_index > 0 ) && ( max_character_index != character_index ) ) {
+                        throw new NexusFormatException( "unequal number of characters at line " + line );
+                    }
+                    max_character_index = character_index;
+                    ++identifier_index;
+                }
+            }
+        }
+    }
+
+    private void reset() {
+        setMatrix( null );
+        setNChar( -1 );
+        setNTax( -1 );
+    }
+
+    private void setMatrix( final CharacterStateMatrix<BinaryStates> matrix ) {
+        _matrix = matrix;
+    }
+
+    private void setNChar( final int nchar ) {
+        _nchar = nchar;
+    }
+
+    private void setNTax( final int ntax ) {
+        _ntax = ntax;
+    }
+
+    public void setSource( final Object nexus_source ) throws PhylogenyParserException, IOException {
+        if ( nexus_source == null ) {
+            throw new PhylogenyParserException( getClass() + ": attempt to parse null object." );
+        }
+        _nexus_source = nexus_source;
+    }
+}
diff --git a/forester/java/src/org/forester/io/parsers/nexus/NexusCharactersParser.java b/forester/java/src/org/forester/io/parsers/nexus/NexusCharactersParser.java

new file mode 100644 (file)

index 0000000..5d0ed60
--- /dev/null
+++ b/forester/java/src/org/forester/io/parsers/nexus/NexusCharactersParser.java
@@ -0,0 +1,117 @@
+// $Id:
+//
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/
+
+package org.forester.io.parsers.nexus;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.forester.io.parsers.util.ParserUtils;
+import org.forester.io.parsers.util.PhylogenyParserException;
+import org.forester.util.ForesterUtil;
+
+public class NexusCharactersParser {
+
+    final private static String charstatelabels = NexusConstants.CHARSTATELABELS.toLowerCase();
+    private Object              _nexus_source;
+    private String[]            _char_state_labels;
+
+    public String[] getCharStateLabels() {
+        return _char_state_labels;
+    }
+
+    private Object getNexusSource() {
+        return _nexus_source;
+    }
+
+    public void parse() throws IOException {
+        reset();
+        final BufferedReader reader = ParserUtils.createReader( getNexusSource() );
+        String line;
+        boolean in_charstatelabels = false;
+        final List<String> labels_list = new ArrayList<String>();
+        int counter = 1;
+        while ( ( line = reader.readLine() ) != null ) {
+            line = line.trim();
+            if ( ( line.length() > 0 ) && !line.startsWith( "#" ) && !line.startsWith( ">" ) ) {
+                if ( line.toLowerCase().startsWith( charstatelabels ) ) {
+                    in_charstatelabels = true;
+                }
+                else if ( in_charstatelabels ) {
+                    String label = line;
+                    if ( label.indexOf( ' ' ) > 0 ) {
+                        final String[] s = label.split( "\\s+" );
+                        label = s[ 1 ];
+                        int count = -1;
+                        try {
+                            count = Integer.parseInt( s[ 0 ] );
+                        }
+                        catch ( final NumberFormatException ex ) {
+                            throw new NexusFormatException( "failed to parse character label number from: " + line );
+                        }
+                        if ( count != counter ) {
+                            throw new NexusFormatException( "character label numbers are not in order, current line: "
+                                    + line );
+                        }
+                    }
+                    ++counter;
+                    label = label.replaceAll( "[\\s;\"',]+", "" );
+                    if ( !ForesterUtil.isEmpty( label ) ) {
+                        if ( labels_list.contains( label ) ) {
+                            throw new NexusFormatException( "character label [" + label + "] is not unique" );
+                        }
+                        labels_list.add( label );
+                    }
+                }
+                if ( line.endsWith( ";" ) ) {
+                    in_charstatelabels = false;
+                }
+            }
+        }
+        setCharStateLabels( new String[ labels_list.size() ] );
+        int i = 0;
+        for( final String label : labels_list ) {
+            getCharStateLabels()[ i++ ] = label;
+        }
+    }
+
+    private void reset() {
+        setCharStateLabels( new String[ 0 ] );
+    }
+
+    private void setCharStateLabels( final String[] char_state_labels ) {
+        _char_state_labels = char_state_labels;
+    }
+
+    public void setSource( final Object nexus_source ) throws PhylogenyParserException, IOException {
+        if ( nexus_source == null ) {
+            throw new PhylogenyParserException( getClass() + ": attempt to parse null object." );
+        }
+        _nexus_source = nexus_source;
+    }
+}
diff --git a/forester/java/src/org/forester/io/parsers/nexus/NexusConstants.java b/forester/java/src/org/forester/io/parsers/nexus/NexusConstants.java

new file mode 100644 (file)

index 0000000..67512d7
--- /dev/null
+++ b/forester/java/src/org/forester/io/parsers/nexus/NexusConstants.java
@@ -0,0 +1,48 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.io.parsers.nexus;
+
+public final class NexusConstants {
+
+    public final static String NEXUS            = "#NEXUS";
+    public final static String BEGIN_TAXA       = "Begin Taxa;";
+    public final static String BEGIN_TREES      = "Begin Trees;";
+    public final static String TREE             = "Tree";
+    public final static String DIMENSIONS       = "Dimensions";
+    public final static String NTAX             = "NTax";
+    public final static String NCHAR            = "NChar";
+    public final static String TAXLABELS        = "TaxLabels";
+    public final static String CHARSTATELABELS  = "CharStateLabels";
+    public final static String END              = "End;";
+    public final static String MATRIX           = "Matrix";
+    public final static String BEGIN_CHARACTERS = "Begin Characters;";
+    public final static String FORMAT           = "Format";
+    public final static String DATATYPE         = "DataType";
+    public final static String STANDARD         = "Standard";
+    public final static String SYMBOLS          = "Symbols";
+    public static final String TRANSLATE        = "Translate";
+    public static final String UTREE            = "UTREE";
+}
diff --git a/forester/java/src/org/forester/io/parsers/nexus/NexusFormatException.java b/forester/java/src/org/forester/io/parsers/nexus/NexusFormatException.java

new file mode 100644 (file)

index 0000000..7e20bee
--- /dev/null
+++ b/forester/java/src/org/forester/io/parsers/nexus/NexusFormatException.java
@@ -0,0 +1,41 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.io.parsers.nexus;
+
+import java.io.IOException;
+
+public class NexusFormatException extends IOException {
+
+    private static final long serialVersionUID = -8750474393398183410L;
+
+    public NexusFormatException() {
+        super();
+    }
+
+    public NexusFormatException( final String message ) {
+        super( message );
+    }
+}
\ No newline at end of file
diff --git a/forester/java/src/org/forester/io/parsers/nexus/NexusPhylogeniesParser.java b/forester/java/src/org/forester/io/parsers/nexus/NexusPhylogeniesParser.java

new file mode 100644 (file)

index 0000000..879a0e1
--- /dev/null
+++ b/forester/java/src/org/forester/io/parsers/nexus/NexusPhylogeniesParser.java
@@ -0,0 +1,338 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.io.parsers.nexus;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.forester.archaeopteryx.Constants;
+import org.forester.io.parsers.PhylogenyParser;
+import org.forester.io.parsers.nhx.NHXFormatException;
+import org.forester.io.parsers.nhx.NHXParser;
+import org.forester.io.parsers.util.ParserUtils;
+import org.forester.io.parsers.util.PhylogenyParserException;
+import org.forester.phylogeny.Phylogeny;
+import org.forester.phylogeny.PhylogenyNode;
+import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
+import org.forester.phylogeny.factories.PhylogenyFactory;
+import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
+import org.forester.util.ForesterUtil;
+
+public class NexusPhylogeniesParser implements PhylogenyParser {
+
+    final private static String  begin_trees               = NexusConstants.BEGIN_TREES.toLowerCase();
+    final private static String  taxlabels                 = NexusConstants.TAXLABELS.toLowerCase();
+    final private static String  translate                 = NexusConstants.TRANSLATE.toLowerCase();
+    final private static String  tree                      = NexusConstants.TREE.toLowerCase();
+    final private static String  utree                     = NexusConstants.UTREE.toLowerCase();
+    final private static String  end                       = NexusConstants.END.toLowerCase();
+    final private static String  endblock                  = "endblock";
+    final private static Pattern TREE_NAME_PATTERN         = Pattern.compile( "\\s*.?Tree\\s+(.+?)\\s*=.+",
+                                                                              Pattern.CASE_INSENSITIVE );
+    final private static Pattern ROOTEDNESS_PATTERN        = Pattern.compile( ".+=\\s*\\[&([R|U])\\].*" );
+    private Object               _nexus_source;
+    private List<Phylogeny>      _phylogenies;
+    private List<String>         _taxlabels;
+    private Map<String, String>  _translate_map;
+    private boolean              _replace_underscores      = NHXParser.REPLACE_UNDERSCORES_DEFAULT;
+    private boolean              _ignore_quotes_in_nh_data = Constants.NH_PARSING_IGNORE_QUOTES_DEFAULT;
+
+    private void createPhylogeny( final String name,
+                                  final StringBuffer nhx,
+                                  final boolean rooted_info_present,
+                                  final boolean is_rooted ) throws IOException {
+        final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
+        final NHXParser pars = new NHXParser();
+        pars.setTaxonomyExtraction( ForesterUtil.TAXONOMY_EXTRACTION.NO );
+        pars.setReplaceUnderscores( isReplaceUnderscores() );
+        pars.setIgnoreQuotes( isIgnoreQuotes() );
+        if ( rooted_info_present ) {
+            pars.setGuessRootedness( false );
+        }
+        final Phylogeny p = factory.create( nhx, pars )[ 0 ];
+        p.setName( name );
+        if ( rooted_info_present ) {
+            p.setRooted( is_rooted );
+        }
+        if ( ( getTaxlabels().size() > 0 ) || ( getTranslateMap().size() > 0 ) ) {
+            final PhylogenyNodeIterator it = p.iteratorExternalForward();
+            while ( it.hasNext() ) {
+                final PhylogenyNode node = it.next();
+                if ( ( getTranslateMap().size() > 0 ) && getTranslateMap().containsKey( node.getName() ) ) {
+                    node.setName( getTranslateMap().get( node.getName() ).replaceAll( "['\"]+", "" ) );
+                }
+                else if ( getTaxlabels().size() > 0 ) {
+                    int i = -1;
+                    try {
+                        i = Integer.parseInt( node.getName() );
+                    }
+                    catch ( final NumberFormatException e ) {
+                        // Ignore.
+                    }
+                    if ( i > 0 ) {
+                        node.setName( getTaxlabels().get( i - 1 ).replaceAll( "['\"]+", "" ) );
+                    }
+                }
+            }
+        }
+        getPhylogenies().add( p );
+    }
+
+    private Object getNexusSource() {
+        return _nexus_source;
+    }
+
+    private List<Phylogeny> getPhylogenies() {
+        return _phylogenies;
+    }
+
+    private Phylogeny[] getPhylogeniesAsArray() {
+        final Phylogeny[] p = new Phylogeny[ getPhylogenies().size() ];
+        for( int i = 0; i < getPhylogenies().size(); ++i ) {
+            p[ i ] = getPhylogenies().get( i );
+        }
+        return p;
+    }
+
+    private List<String> getTaxlabels() {
+        return _taxlabels;
+    }
+
+    private Map<String, String> getTranslateMap() {
+        return _translate_map;
+    }
+
+    private boolean isIgnoreQuotes() {
+        return _ignore_quotes_in_nh_data;
+    }
+
+    private boolean isReplaceUnderscores() {
+        return _replace_underscores;
+    }
+
+    public Phylogeny[] parse() throws IOException, NHXFormatException {
+        reset();
+        final BufferedReader reader = ParserUtils.createReader( getNexusSource() );
+        String line;
+        String name = "";
+        StringBuffer nhx = new StringBuffer();
+        final StringBuffer translate_sb = new StringBuffer();
+        boolean in_trees_block = false;
+        boolean in_taxalabels = false;
+        boolean in_translate = false;
+        final boolean in_comment = false;
+        boolean in_tree = false;
+        boolean rooted_info_present = false;
+        boolean is_rooted = false;
+        while ( ( line = reader.readLine() ) != null ) {
+            line = line.trim();
+            if ( ( line.length() > 0 ) && !line.startsWith( "#" ) && !line.startsWith( ">" ) ) {
+                line = ForesterUtil.collapseWhiteSpace( line );
+                line = removeWhiteSpaceBeforeSemicolon( line );
+                final String line_lc = line.toLowerCase();
+                if ( line_lc.startsWith( begin_trees ) ) {
+                    in_trees_block = true;
+                    in_taxalabels = false;
+                    in_translate = false;
+                }
+                else if ( line_lc.startsWith( taxlabels ) ) {
+                    in_trees_block = false;
+                    in_taxalabels = true;
+                    in_translate = false;
+                }
+                else if ( line_lc.startsWith( translate ) ) {
+                    in_taxalabels = false;
+                    in_translate = true;
+                }
+                else if ( in_trees_block ) {
+                    //FIXME TODO need to work on this "title" and "link"
+                    if ( line_lc.startsWith( "title" ) || line_lc.startsWith( "link" ) ) {
+                        // Do nothing.
+                    }
+                    else if ( line_lc.startsWith( end ) || line_lc.startsWith( endblock ) ) {
+                        in_trees_block = false;
+                        in_tree = false;
+                        in_translate = false;
+                        if ( nhx.length() > 0 ) {
+                            createPhylogeny( name, nhx, rooted_info_present, is_rooted );
+                            nhx = new StringBuffer();
+                            name = "";
+                            rooted_info_present = false;
+                            is_rooted = false;
+                        }
+                    }
+                    else if ( line_lc.startsWith( tree ) || ( line_lc.startsWith( utree ) ) ) {
+                        if ( nhx.length() > 0 ) {
+                            createPhylogeny( name, nhx, rooted_info_present, is_rooted );
+                            nhx = new StringBuffer();
+                            name = "";
+                            rooted_info_present = false;
+                            is_rooted = false;
+                        }
+                        in_tree = true;
+                        nhx.append( line.substring( line.indexOf( '=' ) ) );
+                        final Matcher name_matcher = TREE_NAME_PATTERN.matcher( line );
+                        if ( name_matcher.matches() ) {
+                            name = name_matcher.group( 1 );
+                            name = name.replaceAll( "['\"]+", "" );
+                        }
+                        final Matcher rootedness_matcher = ROOTEDNESS_PATTERN.matcher( line );
+                        if ( rootedness_matcher.matches() ) {
+                            final String s = rootedness_matcher.group( 1 );
+                            line = line.replaceAll( "\\[\\&.\\]", "" );
+                            rooted_info_present = true;
+                            if ( s.toUpperCase().equals( "R" ) ) {
+                                is_rooted = true;
+                            }
+                        }
+                    }
+                    else if ( in_tree && !in_translate ) {
+                        nhx.append( line );
+                    }
+                    if ( !line_lc.startsWith( "title" ) && !line_lc.startsWith( "link" ) && !in_translate
+                            && !line_lc.startsWith( end ) && !line_lc.startsWith( endblock ) && line_lc.endsWith( ";" ) ) {
+                        in_tree = false;
+                        in_translate = false;
+                        createPhylogeny( name, nhx, rooted_info_present, is_rooted );
+                        nhx = new StringBuffer();
+                        name = "";
+                        rooted_info_present = false;
+                        is_rooted = false;
+                    }
+                }
+                if ( in_taxalabels ) {
+                    if ( line_lc.startsWith( end ) || line_lc.startsWith( endblock ) ) {
+                        in_taxalabels = false;
+                    }
+                    else {
+                        final String[] labels = line.split( "\\s+" );
+                        for( String label : labels ) {
+                            if ( !label.toLowerCase().equals( taxlabels ) ) {
+                                if ( label.endsWith( ";" ) ) {
+                                    in_taxalabels = false;
+                                    label = label.substring( 0, label.length() - 1 );
+                                }
+                                if ( label.length() > 0 ) {
+                                    getTaxlabels().add( label );
+                                }
+                            }
+                        }
+                    }
+                }
+                if ( in_translate ) {
+                    if ( line_lc.startsWith( end ) || line_lc.startsWith( endblock ) ) {
+                        in_translate = false;
+                    }
+                    else {
+                        translate_sb.append( " " );
+                        translate_sb.append( line.trim() );
+                        if ( line.endsWith( ";" ) ) {
+                            in_translate = false;
+                            setTranslateKeyValuePairs( translate_sb );
+                        }
+                    }
+                }
+            }
+        }
+        if ( nhx.length() > 0 ) {
+            createPhylogeny( name, nhx, rooted_info_present, is_rooted );
+        }
+        return getPhylogeniesAsArray();
+    }
+
+    private void reset() {
+        setPhylogenies( new ArrayList<Phylogeny>() );
+        setTaxlabels( new ArrayList<String>() );
+        setTranslateMap( new HashMap<String, String>() );
+    }
+
+    public void setIgnoreQuotes( final boolean ignore_quotes_in_nh_data ) {
+        _ignore_quotes_in_nh_data = ignore_quotes_in_nh_data;
+    }
+
+    private void setPhylogenies( final ArrayList<Phylogeny> phylogenies ) {
+        _phylogenies = phylogenies;
+    }
+
+    public void setReplaceUnderscores( final boolean replace_underscores ) {
+        _replace_underscores = replace_underscores;
+    }
+
+    public void setSource( final Object nexus_source ) throws PhylogenyParserException, IOException {
+        if ( nexus_source == null ) {
+            throw new PhylogenyParserException( getClass() + ": attempt to parse null object." );
+        }
+        _nexus_source = nexus_source;
+    }
+
+    private void setTaxlabels( final List<String> taxlabels ) {
+        _taxlabels = taxlabels;
+    }
+
+    private void setTranslateKeyValuePairs( final StringBuffer translate_sb ) throws IOException {
+        String s = translate_sb.toString().trim();
+        if ( s.endsWith( ";" ) ) {
+            s = s.substring( 0, s.length() - 1 ).trim();
+        }
+        for( final String pair : s.split( "," ) ) {
+            final String[] kv = pair.trim().split( "\\s+" );
+            if ( ( kv.length < 2 ) || ( kv.length > 3 ) ) {
+                throw new IOException( "ill formatted translate values: " + translate_sb );
+            }
+            if ( ( kv.length == 3 ) && !kv[ 0 ].toLowerCase().trim().equals( translate ) ) {
+                throw new IOException( "ill formatted translate values: " + translate_sb );
+            }
+            String key = "";
+            String value = "";
+            if ( kv.length == 3 ) {
+                key = kv[ 1 ];
+                value = kv[ 2 ];
+            }
+            else {
+                key = kv[ 0 ];
+                value = kv[ 1 ];
+            }
+            if ( value.endsWith( ";" ) ) {
+                value = value.substring( 0, value.length() - 1 );
+            }
+            getTranslateMap().put( key, value );
+        }
+    }
+
+    private void setTranslateMap( final Map<String, String> translate_map ) {
+        _translate_map = translate_map;
+    }
+
+    private static String removeWhiteSpaceBeforeSemicolon( final String s ) {
+        return s.replaceAll( "\\s+;", ";" );
+    }
+}
diff --git a/forester/java/src/org/forester/io/parsers/nexus/PaupLogParser.java b/forester/java/src/org/forester/io/parsers/nexus/PaupLogParser.java

new file mode 100644 (file)

index 0000000..c4243b4
--- /dev/null
+++ b/forester/java/src/org/forester/io/parsers/nexus/PaupLogParser.java
@@ -0,0 +1,128 @@
+// $Id:
+//
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/
+
+package org.forester.io.parsers.nexus;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.forester.evoinference.matrix.character.BasicCharacterStateMatrix;
+import org.forester.evoinference.matrix.character.CharacterStateMatrix;
+import org.forester.evoinference.matrix.character.CharacterStateMatrix.BinaryStates;
+import org.forester.io.parsers.util.ParserUtils;
+import org.forester.io.parsers.util.PhylogenyParserException;
+
+public class PaupLogParser {
+
+    private static final String DATA_MATRIX_AND_RECONSTRUCTED_STATES_FOR_INTERNAL_NODES = "data matrix and reconstructed states for internal nodes";
+    private Object              _nexus_source;
+
+    private Object getNexusSource() {
+        return _nexus_source;
+    }
+
+    public CharacterStateMatrix<BinaryStates> parse() throws IOException {
+        final BufferedReader reader = ParserUtils.createReader( getNexusSource() );
+        String line;
+        boolean saw_line = false;
+        int identifier_index = 0;
+        boolean first_block = true;
+        boolean saw_data_matrix_line = false;
+        final List<String> identifiers = new ArrayList<String>();
+        final List<List<BinaryStates>> states = new ArrayList<List<BinaryStates>>();
+        boolean done = false;
+        while ( ( ( line = reader.readLine() ) != null ) && !done ) {
+            line = line.trim();
+            if ( ( line.length() > 0 ) && !line.startsWith( "#" ) && !line.startsWith( ">" ) ) {
+                if ( ( ( identifier_index > 0 ) && line.startsWith( "Tree " ) )
+                        || line.startsWith( "Character change list" ) ) {
+                    done = true;
+                    continue;
+                }
+                if ( line.toLowerCase().startsWith( DATA_MATRIX_AND_RECONSTRUCTED_STATES_FOR_INTERNAL_NODES ) ) {
+                    saw_line = false;
+                    saw_data_matrix_line = true;
+                    identifier_index = 0;
+                    if ( first_block && ( line.indexOf( "continued" ) > 0 ) ) {
+                        first_block = false;
+                    }
+                }
+                if ( saw_data_matrix_line && line.startsWith( "----------" ) ) {
+                    saw_line = true;
+                }
+                else if ( saw_line && ( line.indexOf( ' ' ) > 0 ) ) {
+                    final String[] s = line.split( "\\s+" );
+                    if ( s.length != 2 ) {
+                        throw new NexusFormatException( "unexpected format at line: " + line );
+                    }
+                    final String identifier = s[ 0 ];
+                    final String row = s[ 1 ];
+                    if ( first_block ) {
+                        if ( identifiers.contains( identifier ) ) {
+                            throw new NexusFormatException( "identifier [" + identifier + "] is not unique in line: "
+                                    + line );
+                        }
+                        identifiers.add( identifier );
+                        states.add( new ArrayList<BinaryStates>() );
+                    }
+                    else {
+                        if ( !identifiers.contains( identifier ) ) {
+                            throw new NexusFormatException( "new identifier [" + identifier + "] at line: " + line );
+                        }
+                    }
+                    for( int c = 0; c < row.length(); ++c ) {
+                        final char ch = row.charAt( c );
+                        if ( ch == '0' ) {
+                            states.get( identifier_index ).add( BinaryStates.ABSENT );
+                        }
+                        else if ( ch == '1' ) {
+                            states.get( identifier_index ).add( BinaryStates.PRESENT );
+                        }
+                        else {
+                            throw new NexusFormatException( "unknown character state [" + ch + "] at line: " + line );
+                        }
+                    }
+                    ++identifier_index;
+                }
+            }
+        }
+        final CharacterStateMatrix<BinaryStates> matrix = new BasicCharacterStateMatrix<BinaryStates>( states );
+        int i = 0;
+        for( final String identifier : identifiers ) {
+            matrix.setIdentifier( i++, identifier );
+        }
+        return matrix;
+    }
+
+    public void setSource( final Object nexus_source ) throws PhylogenyParserException, IOException {
+        if ( nexus_source == null ) {
+            throw new PhylogenyParserException( getClass() + ": attempt to parse null object." );
+        }
+        _nexus_source = nexus_source;
+    }
+}
diff --git a/forester/java/src/org/forester/io/parsers/nhx/NHXFormatException.java b/forester/java/src/org/forester/io/parsers/nhx/NHXFormatException.java

new file mode 100644 (file)

index 0000000..b4c18cf
--- /dev/null
+++ b/forester/java/src/org/forester/io/parsers/nhx/NHXFormatException.java
@@ -0,0 +1,41 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.io.parsers.nhx;
+
+import java.io.IOException;
+
+public class NHXFormatException extends IOException {
+
+    private static final long serialVersionUID = 3756209394438250170L;
+
+    public NHXFormatException() {
+        super();
+    }
+
+    public NHXFormatException( final String message ) {
+        super( message );
+    }
+}
diff --git a/forester/java/src/org/forester/io/parsers/nhx/NHXParser.java b/forester/java/src/org/forester/io/parsers/nhx/NHXParser.java

new file mode 100644 (file)

index 0000000..8fa5b29
--- /dev/null
+++ b/forester/java/src/org/forester/io/parsers/nhx/NHXParser.java
@@ -0,0 +1,797 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.io.parsers.nhx;
+
+import java.awt.Color;
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.StringTokenizer;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.forester.io.parsers.PhylogenyParser;
+import org.forester.io.parsers.util.PhylogenyParserException;
+import org.forester.phylogeny.Phylogeny;
+import org.forester.phylogeny.PhylogenyMethods;
+import org.forester.phylogeny.PhylogenyNode;
+import org.forester.phylogeny.data.Accession;
+import org.forester.phylogeny.data.Annotation;
+import org.forester.phylogeny.data.DomainArchitecture;
+import org.forester.phylogeny.data.Event;
+import org.forester.phylogeny.data.Identifier;
+import org.forester.phylogeny.data.PropertiesMap;
+import org.forester.phylogeny.data.Property;
+import org.forester.phylogeny.data.Sequence;
+import org.forester.phylogeny.data.Taxonomy;
+import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
+import org.forester.util.ForesterUtil;
+
+public final class NHXParser implements PhylogenyParser {
+
+    public static final boolean                          LIMIT_SPECIES_NAMES_TO_FIVE_CHARS = true;
+    public static final ForesterUtil.TAXONOMY_EXTRACTION TAXONOMY_EXTRACTION_DEFAULT       = ForesterUtil.TAXONOMY_EXTRACTION.NO;
+    final static private boolean                         GUESS_ROOTEDNESS_DEFAULT          = true;
+    final static private boolean                         GUESS_IF_SUPPORT_VALUES           = true;
+    final static private boolean                         IGNORE_QUOTES_DEFAULT             = false;
+    final static public boolean                          REPLACE_UNDERSCORES_DEFAULT       = false;
+    private boolean                                      _saw_closing_paren;
+    final static private byte                            STRING                            = 0;
+    final static private byte                            STRING_BUFFER                     = 1;
+    final static private byte                            CHAR_ARRAY                        = 2;
+    final static private byte                            BUFFERED_READER                   = 3;
+    private boolean                                      _guess_rootedness;
+    private boolean                                      _has_next;
+    private boolean                                      _ignore_quotes;
+    private byte                                         _input_type;
+    private int                                          _source_length;
+    private PhylogenyNode                                _current_node;
+    private StringBuilder                                _current_anotation;
+    private Object                                       _nhx_source;
+    private int                                          _clade_level;
+    private List<Phylogeny>                              _phylogenies;
+    private Phylogeny                                    _current_phylogeny;
+    private ForesterUtil.TAXONOMY_EXTRACTION             _taxonomy_extraction;
+    private boolean                                      _replace_underscores;
+    public final static Pattern                          UC_LETTERS_NUMBERS_PATTERN        = Pattern
+                                                                                                   .compile( "^[A-Z0-9]+$" );
+    public final static Pattern                          NUMBERS_ONLY_PATTERN              = Pattern
+                                                                                                   .compile( "^[0-9]+$" );
+
+    public NHXParser() {
+        init();
+    }
+
+    /**
+     * Decreases the clade level by one.
+     * 
+     * @throws PhylogenyParserException
+     *             if level goes below zero.
+     */
+    private void decreaseCladeLevel() throws PhylogenyParserException {
+        if ( getCladeLevel() < 0 ) {
+            throw new PhylogenyParserException( "error in NH (Newick)/NHX formatted data: most likely cause: number of close parens is larger than number of open parens" );
+        }
+        --_clade_level;
+    }
+
+    /**
+     * Finishes the current Phylogeny and adds it to the list of Phylogenies
+     * created.
+     * 
+     * @throws PhylogenyParserException
+     * @throws NHXFormatException
+     */
+    private void finishPhylogeny() throws PhylogenyParserException, NHXFormatException {
+        setCladeLevel( 0 );
+        if ( getCurrentPhylogeny() != null ) {
+            parseNHX( getCurrentAnotation().toString(),
+                      getCurrentPhylogeny().getRoot(),
+                      getTaxonomyExtraction(),
+                      isReplaceUnderscores() );
+            if ( NHXParser.GUESS_IF_SUPPORT_VALUES ) {
+                if ( NHXParser.isBranchLengthsLikeBootstrapValues( getCurrentPhylogeny() ) ) {
+                    NHXParser.moveBranchLengthsToBootstrapValues( getCurrentPhylogeny() );
+                }
+            }
+            if ( isGuessRootedness() ) {
+                final PhylogenyNode root = getCurrentPhylogeny().getRoot();
+                if ( ( root.getDistanceToParent() >= 0.0 ) || !ForesterUtil.isEmpty( root.getName() )
+                        || !ForesterUtil.isEmpty( PhylogenyMethods.getSpecies( root ) ) || root.isHasAssignedEvent() ) {
+                    getCurrentPhylogeny().setRooted( true );
+                }
+            }
+            getPhylogenies().add( getCurrentPhylogeny() );
+        }
+    }
+
+    private void finishSingleNodePhylogeny() throws PhylogenyParserException, NHXFormatException {
+        setCladeLevel( 0 );
+        final PhylogenyNode new_node = new PhylogenyNode();
+        parseNHX( getCurrentAnotation().toString(), new_node, getTaxonomyExtraction(), isReplaceUnderscores() );
+        setCurrentPhylogeny( new Phylogeny() );
+        getCurrentPhylogeny().setRoot( new_node );
+        getPhylogenies().add( getCurrentPhylogeny() );
+    }
+
+    private int getCladeLevel() {
+        return _clade_level;
+    }
+
+    private StringBuilder getCurrentAnotation() {
+        return _current_anotation;
+    }
+
+    private PhylogenyNode getCurrentNode() {
+        return _current_node;
+    }
+
+    private Phylogeny getCurrentPhylogeny() {
+        return _current_phylogeny;
+    }
+
+    private byte getInputType() {
+        return _input_type;
+    }
+
+    private Object getNhxSource() {
+        return _nhx_source;
+    }
+
+    private List<Phylogeny> getPhylogenies() {
+        return _phylogenies;
+    }
+
+    /**
+     * Returns the Phylogenies created as Array.
+     * 
+     * @return the Phylogenies created as Array
+     */
+    private Phylogeny[] getPhylogeniesAsArray() {
+        final Phylogeny[] p = new Phylogeny[ getPhylogenies().size() ];
+        for( int i = 0; i < getPhylogenies().size(); ++i ) {
+            p[ i ] = getPhylogenies().get( i );
+        }
+        return p;
+    }
+
+    private int getSourceLength() {
+        return _source_length;
+    }
+
+    public ForesterUtil.TAXONOMY_EXTRACTION getTaxonomyExtraction() {
+        return _taxonomy_extraction;
+    }
+
+    public boolean hasNext() {
+        return _has_next;
+    }
+
+    /**
+     * Increases the clade level by one.
+     */
+    private void increaseCladeLevel() {
+        ++_clade_level;
+    }
+
+    private void init() {
+        setTaxonomyExtraction( TAXONOMY_EXTRACTION_DEFAULT );
+        setReplaceUnderscores( REPLACE_UNDERSCORES_DEFAULT );
+        setGuessRootedness( GUESS_ROOTEDNESS_DEFAULT );
+        setIgnoreQuotes( IGNORE_QUOTES_DEFAULT );
+        setHasNext( false );
+    }
+
+    private boolean isGuessRootedness() {
+        return _guess_rootedness;
+    }
+
+    private boolean isIgnoreQuotes() {
+        return _ignore_quotes;
+    }
+
+    private boolean isReplaceUnderscores() {
+        return _replace_underscores;
+    }
+
+    private boolean isSawClosingParen() {
+        return _saw_closing_paren;
+    }
+
+    /**
+     * Replaces the current annotation with a new StringBuffer.
+     */
+    private void newCurrentAnotation() {
+        setCurrentAnotation( new StringBuilder() );
+    }
+
+    /**
+     * Parses the source set with setSource( final Object nhx_source ). Returns
+     * the Phylogenies found in the source as Phylogeny[].
+     * Everything between [ and ] is considered comment and ignored,
+     * unless:
+     * "[&&NHX... ]"
+     * or
+     * ":digits and/or.[bootstrap]" 
+     * 
+     * @see #setSource( final Object nhx_source )
+     * @see org.forester.io.parsers.PhylogenyParser#parse()
+     * @return Phylogeny[]
+     * @throws IOException
+     * @throws NHXFormatException
+     * @throws PhylogenyParserException
+     */
+    public Phylogeny[] parse() throws IOException, NHXFormatException {
+        setHasNext( false );
+        boolean in_comment = false;
+        boolean saw_colon = false;
+        boolean saw_open_bracket = false;
+        boolean in_double_quote = false;
+        boolean in_single_quote = false;
+        setPhylogenies( new ArrayList<Phylogeny>() );
+        setCladeLevel( 0 );
+        newCurrentAnotation();
+        int i = 0;
+        while ( true ) {
+            char c = '\b';
+            if ( getInputType() == NHXParser.BUFFERED_READER ) {
+                final int ci = ( ( BufferedReader ) getNhxSource() ).read();
+                if ( ci >= 0 ) {
+                    c = ( char ) ci;
+                }
+                else {
+                    break;
+                }
+            }
+            else {
+                if ( i >= getSourceLength() ) {
+                    break;
+                }
+                else {
+                    switch ( getInputType() ) {
+                        case STRING:
+                            c = ( ( String ) getNhxSource() ).charAt( i );
+                            break;
+                        case STRING_BUFFER:
+                            c = ( ( StringBuffer ) getNhxSource() ).charAt( i );
+                            break;
+                        case CHAR_ARRAY:
+                            c = ( ( char[] ) getNhxSource() )[ i ];
+                            break;
+                    }
+                }
+            }
+            if ( !in_single_quote && !in_double_quote ) {
+                if ( c == ':' ) {
+                    saw_colon = true;
+                }
+                else if ( !( ( c < 33 ) || ( c > 126 ) ) && saw_colon
+                        && ( ( c != '[' ) && ( c != '.' ) && ( ( c < 48 ) || ( c > 57 ) ) ) ) {
+                    saw_colon = false;
+                }
+            }
+            // \n\t is always ignored,
+            // as is " (34) and ' (39) (space is 32):
+            if ( ( isIgnoreQuotes() && ( ( c < 33 ) || ( c > 126 ) || ( c == 34 ) || ( c == 39 ) || ( ( getCladeLevel() == 0 ) && ( c == ';' ) ) ) )
+                    || ( !isIgnoreQuotes() && ( ( c < 32 ) || ( c > 126 ) || ( ( getCladeLevel() == 0 ) && ( c == ';' ) ) ) ) ) {
+                // Do nothing.
+            }
+            else if ( ( c == 32 ) && ( !in_single_quote && !in_double_quote ) ) {
+                // Do nothing.
+            }
+            else if ( in_comment ) {
+                if ( c == ']' ) {
+                    in_comment = false;
+                }
+            }
+            else if ( in_double_quote ) {
+                if ( c == '"' ) {
+                    in_double_quote = false;
+                }
+                else {
+                    getCurrentAnotation().append( c );
+                }
+            }
+            else if ( c == '"' ) {
+                in_double_quote = true;
+            }
+            else if ( in_single_quote ) {
+                if ( c == 39 ) {
+                    in_single_quote = false;
+                }
+                else {
+                    getCurrentAnotation().append( c );
+                }
+            }
+            else if ( c == 39 ) {
+                in_single_quote = true;
+            }
+            else if ( c == '[' ) {
+                saw_open_bracket = true;
+            }
+            else if ( saw_open_bracket ) {
+                if ( c != ']' ) {
+                    // everything not starting with "[&" is considered a comment
+                    // unless ":digits and/or . [bootstrap]":
+                    if ( c == '&' ) {
+                        getCurrentAnotation().append( "[&" );
+                    }
+                    else if ( saw_colon ) {
+                        getCurrentAnotation().append( "[" + c );
+                    }
+                    else {
+                        in_comment = true;
+                    }
+                }
+                // comment consisting just of "[]":
+                saw_open_bracket = false;
+            }
+            else if ( c == '(' ) {
+                processOpenParen();
+            }
+            else if ( c == ')' ) {
+                processCloseParen();
+            }
+            else if ( c == ',' ) {
+                processComma();
+            }
+            else {
+                getCurrentAnotation().append( c );
+            }
+            ++i;
+        }
+        if ( getCladeLevel() != 0 ) {
+            setPhylogenies( null );
+            throw new PhylogenyParserException( "error in NH (Newick)/NHX formatted data: most likely cause: number of open parens does not equal number of close parens" );
+        }
+        if ( getCurrentPhylogeny() != null ) {
+            finishPhylogeny();
+        }
+        else if ( getCurrentAnotation().length() > 0 ) {
+            finishSingleNodePhylogeny();
+        }
+        else if ( getPhylogenies().size() < 1 ) {
+            getPhylogenies().add( new Phylogeny() );
+        }
+        return getPhylogeniesAsArray();
+    } // parse()
+
+    public Phylogeny parseNext() throws IOException, NHXFormatException {
+        return null;
+    }
+
+    /**
+     * Called if a closing paren is encountered.
+     * 
+     * @throws PhylogenyParserException
+     * @throws NHXFormatException
+     */
+    private void processCloseParen() throws PhylogenyParserException, NHXFormatException {
+        decreaseCladeLevel();
+        if ( !isSawClosingParen() ) {
+            final PhylogenyNode new_node = new PhylogenyNode();
+            parseNHX( getCurrentAnotation().toString(), new_node, getTaxonomyExtraction(), isReplaceUnderscores() );
+            newCurrentAnotation();
+            getCurrentNode().addAsChild( new_node );
+        }
+        else {
+            parseNHX( getCurrentAnotation().toString(),
+                      getCurrentNode().getLastChildNode(),
+                      getTaxonomyExtraction(),
+                      isReplaceUnderscores() );
+            newCurrentAnotation();
+        }
+        if ( !getCurrentNode().isRoot() ) {
+            setCurrentNode( getCurrentNode().getParent() );
+        }
+        setSawClosingParen( true );
+    } // processCloseParen()
+
+    /**
+     * Called if a comma is encountered.
+     * 
+     * @throws PhylogenyParserException
+     * @throws NHXFormatException
+     */
+    private void processComma() throws PhylogenyParserException, NHXFormatException {
+        if ( !isSawClosingParen() ) {
+            final PhylogenyNode new_node = new PhylogenyNode();
+            parseNHX( getCurrentAnotation().toString(), new_node, getTaxonomyExtraction(), isReplaceUnderscores() );
+            if ( getCurrentNode() == null ) {
+                throw new NHXFormatException( "format might not be NH or NHX" );
+            }
+            getCurrentNode().addAsChild( new_node );
+        }
+        else {
+            parseNHX( getCurrentAnotation().toString(),
+                      getCurrentNode().getLastChildNode(),
+                      getTaxonomyExtraction(),
+                      isReplaceUnderscores() );
+        }
+        newCurrentAnotation();
+        setSawClosingParen( false );
+    } // processComma()
+
+    /**
+     * Called if a opening paren is encountered.
+     * 
+     * @throws PhylogenyParserException
+     * @throws NHXFormatException
+     */
+    private void processOpenParen() throws PhylogenyParserException, NHXFormatException {
+        final PhylogenyNode new_node = new PhylogenyNode();
+        if ( getCladeLevel() == 0 ) {
+            if ( getCurrentPhylogeny() != null ) {
+                finishPhylogeny();
+            }
+            setCladeLevel( 1 );
+            newCurrentAnotation();
+            setCurrentPhylogeny( new Phylogeny() );
+            getCurrentPhylogeny().setRoot( new_node );
+        }
+        else {
+            increaseCladeLevel();
+            getCurrentNode().addAsChild( new_node );
+        }
+        setCurrentNode( new_node );
+        setSawClosingParen( false );
+    }
+
+    private void setCladeLevel( final int clade_level ) {
+        if ( clade_level < 0 ) {
+            throw new IllegalArgumentException( "Attempt to set clade level to a number smaller than zero." );
+        }
+        _clade_level = clade_level;
+    }
+
+    private void setCurrentAnotation( final StringBuilder current_anotation ) {
+        _current_anotation = current_anotation;
+    }
+
+    private void setCurrentNode( final PhylogenyNode current_node ) {
+        _current_node = current_node;
+    }
+
+    private void setCurrentPhylogeny( final Phylogeny current_phylogeny ) {
+        _current_phylogeny = current_phylogeny;
+    }
+
+    public void setGuessRootedness( final boolean guess_rootedness ) {
+        _guess_rootedness = guess_rootedness;
+    }
+
+    private void setHasNext( final boolean has_next ) {
+        _has_next = has_next;
+    }
+
+    public void setIgnoreQuotes( final boolean ignore_quotes ) {
+        _ignore_quotes = ignore_quotes;
+    }
+
+    private void setInputType( final byte input_type ) {
+        _input_type = input_type;
+    }
+
+    private void setNhxSource( final Object nhx_source ) {
+        _nhx_source = nhx_source;
+    }
+
+    private void setPhylogenies( final ArrayList<Phylogeny> phylogenies ) {
+        _phylogenies = phylogenies;
+    }
+
+    public void setReplaceUnderscores( final boolean replace_underscores ) {
+        _replace_underscores = replace_underscores;
+    }
+
+    private void setSawClosingParen( final boolean saw_closing_paren ) {
+        _saw_closing_paren = saw_closing_paren;
+    }
+
+    /**
+     * This sets the source to be parsed. The source can be: String,
+     * StringBuffer, char[], File, or InputStream. The source can contain more
+     * than one phylogenies in either New Hamphshire (NH) or New Hamphshire
+     * Extended (NHX) format. There is no need to separate phylogenies with any
+     * special character. White space is always ignored, as are semicolons
+     * inbetween phylogenies. Example of a source describing two phylogenies
+     * (source is a String, in this example): "(A,(B,(C,(D,E)de)cde)bcde)abcde
+     * ((((A,B)ab,C)abc,D)abcd,E)abcde". Everything between a '[' followed by any
+     * character other than '&' and ']' is considered a comment and ignored
+     * (example: "[this is a comment]"). NHX tags are surrounded by '[&&NHX' and
+     * ']' (example: "[&&NHX:S=Varanus_storri]"). A sequence like "[& some
+     * info]" is ignored, too (at the PhylogenyNode level, though).
+     * Exception: numbers only between [ and ] (e.g. [90]) are interpreted as support values.
+     * 
+     * @see #parse()
+     * @see org.forester.io.parsers.PhylogenyParser#setSource(java.lang.Object)
+     * @param nhx_source
+     *            the source to be parsed (String, StringBuffer, char[], File,
+     *            or InputStream)
+     * @throws IOException
+     * @throws PhylogenyParserException
+     */
+    public void setSource( final Object nhx_source ) throws PhylogenyParserException, IOException {
+        if ( nhx_source == null ) {
+            throw new PhylogenyParserException( getClass() + ": attempt to parse null object." );
+        }
+        else if ( nhx_source instanceof String ) {
+            setInputType( NHXParser.STRING );
+            setSourceLength( ( ( String ) nhx_source ).length() );
+            setNhxSource( nhx_source );
+        }
+        else if ( nhx_source instanceof StringBuffer ) {
+            setInputType( NHXParser.STRING_BUFFER );
+            setSourceLength( ( ( StringBuffer ) nhx_source ).length() );
+            setNhxSource( nhx_source );
+        }
+        else if ( nhx_source instanceof char[] ) {
+            setInputType( NHXParser.CHAR_ARRAY );
+            setSourceLength( ( ( char[] ) nhx_source ).length );
+            setNhxSource( nhx_source );
+        }
+        else if ( nhx_source instanceof File ) {
+            setInputType( NHXParser.BUFFERED_READER );
+            setSourceLength( 0 );
+            final File f = ( File ) nhx_source;
+            final String error = ForesterUtil.isReadableFile( f );
+            if ( !ForesterUtil.isEmpty( error ) ) {
+                throw new PhylogenyParserException( error );
+            }
+            setNhxSource( new BufferedReader( new FileReader( f ) ) );
+        }
+        else if ( nhx_source instanceof InputStream ) {
+            setInputType( NHXParser.BUFFERED_READER );
+            setSourceLength( 0 );
+            final InputStreamReader isr = new InputStreamReader( ( InputStream ) nhx_source );
+            setNhxSource( new BufferedReader( isr ) );
+        }
+        else {
+            throw new IllegalArgumentException( getClass() + " can only parse objects of type String,"
+                    + " StringBuffer, char[], File," + " or InputStream " + " [attempt to parse object of "
+                    + nhx_source.getClass() + "]." );
+        }
+        setHasNext( true );
+    }
+
+    private void setSourceLength( final int source_length ) {
+        _source_length = source_length;
+    }
+
+    public void setTaxonomyExtraction( final ForesterUtil.TAXONOMY_EXTRACTION taxonomy_extraction ) {
+        _taxonomy_extraction = taxonomy_extraction;
+    }
+
+    private static double doubleValue( final String str ) throws NHXFormatException {
+        try {
+            return Double.valueOf( str ).doubleValue();
+        }
+        catch ( final NumberFormatException ex ) {
+            throw new NHXFormatException( "error in NH/NHX formatted data: failed to parse number from :" + "\"" + str
+                    + "\"" );
+        }
+    }
+
+    private static boolean isBranchLengthsLikeBootstrapValues( final Phylogeny p ) {
+        final PhylogenyNodeIterator it = p.iteratorExternalForward();
+        final double d0 = it.next().getDistanceToParent();
+        if ( ( d0 < 10 ) || !it.hasNext() ) {
+            return false;
+        }
+        while ( it.hasNext() ) {
+            final double d = it.next().getDistanceToParent();
+            if ( ( d != d0 ) || ( d < 10 ) ) {
+                return false;
+            }
+        }
+        return true;
+    }
+
+    private static void moveBranchLengthsToBootstrapValues( final Phylogeny p ) {
+        final PhylogenyNodeIterator it = p.iteratorPostorder();
+        while ( it.hasNext() ) {
+            final PhylogenyNode n = it.next();
+            PhylogenyMethods.setBootstrapConfidence( n, n.getDistanceToParent() );
+            n.setDistanceToParent( PhylogenyNode.DISTANCE_DEFAULT );
+        }
+    }
+
+    public static void parseNHX( String s,
+                                 final PhylogenyNode node_to_annotate,
+                                 final ForesterUtil.TAXONOMY_EXTRACTION taxonomy_extraction,
+                                 final boolean replace_underscores ) throws NHXFormatException {
+        if ( ( taxonomy_extraction != ForesterUtil.TAXONOMY_EXTRACTION.NO ) && replace_underscores ) {
+            throw new IllegalArgumentException( "cannot extract taxonomies and replace under scores at the same time" );
+        }
+        if ( ( s != null ) && ( s.length() > 0 ) ) {
+            if ( replace_underscores ) {
+                s = s.replaceAll( "_+", " " );
+            }
+            int ob = 0;
+            int cb = 0;
+            String a = "";
+            String b = "";
+            StringTokenizer t = null;
+            boolean is_nhx = false;
+            ob = s.indexOf( "[" );
+            cb = s.indexOf( "]" );
+            if ( ob > -1 ) {
+                a = "";
+                b = "";
+                is_nhx = true;
+                if ( cb < 0 ) {
+                    throw new NHXFormatException( "error in NHX formatted data: no closing \"]\"" );
+                }
+                if ( s.indexOf( "&&NHX" ) == ( ob + 1 ) ) {
+                    b = s.substring( ob + 6, cb );
+                }
+                else {
+                    // No &&NHX and digits only: is likely to be a support value.
+                    final String bracketed = s.substring( ob + 1, cb );
+                    final Matcher numbers_only = NUMBERS_ONLY_PATTERN.matcher( bracketed );
+                    if ( numbers_only.matches() ) {
+                        b = ":" + NHXtags.SUPPORT + bracketed;
+                    }
+                }
+                a = s.substring( 0, ob );
+                s = a + b;
+                if ( ( s.indexOf( "[" ) > -1 ) || ( s.indexOf( "]" ) > -1 ) ) {
+                    throw new NHXFormatException( "error in NHX formatted data: more than one \"]\" or \"[\"" );
+                }
+            }
+            t = new StringTokenizer( s, ":" );
+            if ( t.countTokens() >= 1 ) {
+                if ( !s.startsWith( ":" ) ) {
+                    node_to_annotate.setName( t.nextToken() );
+                    if ( !replace_underscores
+                            && ( !is_nhx && ( taxonomy_extraction != ForesterUtil.TAXONOMY_EXTRACTION.NO ) ) ) {
+                        final String tax = ForesterUtil
+                                .extractTaxonomyCodeFromNodeName( node_to_annotate.getName(),
+                                                                  LIMIT_SPECIES_NAMES_TO_FIVE_CHARS,
+                                                                  taxonomy_extraction );
+                        if ( !ForesterUtil.isEmpty( tax ) ) {
+                            if ( !node_to_annotate.getNodeData().isHasTaxonomy() ) {
+                                node_to_annotate.getNodeData().setTaxonomy( new Taxonomy() );
+                            }
+                            node_to_annotate.getNodeData().getTaxonomy().setTaxonomyCode( tax );
+                        }
+                    }
+                }
+                while ( t.hasMoreTokens() ) {
+                    s = t.nextToken();
+                    if ( s.startsWith( org.forester.io.parsers.nhx.NHXtags.SPECIES_NAME ) ) {
+                        if ( !node_to_annotate.getNodeData().isHasTaxonomy() ) {
+                            node_to_annotate.getNodeData().setTaxonomy( new Taxonomy() );
+                        }
+                        node_to_annotate.getNodeData().getTaxonomy().setScientificName( s.substring( 2 ) );
+                    }
+                    else if ( s.startsWith( org.forester.io.parsers.nhx.NHXtags.ANNOTATION ) ) {
+                        if ( !node_to_annotate.getNodeData().isHasSequence() ) {
+                            node_to_annotate.getNodeData().setSequence( new Sequence() );
+                        }
+                        final Annotation annotation = new Annotation( "_:_" );
+                        annotation.setDesc( s.substring( 3 ) );
+                        node_to_annotate.getNodeData().getSequence().addAnnotation( annotation );
+                    }
+                    else if ( s.startsWith( org.forester.io.parsers.nhx.NHXtags.IS_DUPLICATION ) ) {
+                        if ( ( s.charAt( 2 ) == 'Y' ) || ( s.charAt( 2 ) == 'T' ) ) {
+                            node_to_annotate.getNodeData().setEvent( Event.createSingleDuplicationEvent() );
+                        }
+                        else if ( ( s.charAt( 2 ) == 'N' ) || ( s.charAt( 2 ) == 'F' ) ) {
+                            node_to_annotate.getNodeData().setEvent( Event.createSingleSpeciationEvent() );
+                        }
+                        else if ( s.charAt( 2 ) == '?' ) {
+                            node_to_annotate.getNodeData().setEvent( Event.createSingleSpeciationOrDuplicationEvent() );
+                        }
+                        else {
+                            throw new NHXFormatException( "error in NHX formatted data: :D=Y or :D=N or :D=?" );
+                        }
+                    }
+                    else if ( s.startsWith( NHXtags.SUPPORT ) ) {
+                        PhylogenyMethods.setConfidence( node_to_annotate, doubleValue( s.substring( 2 ) ) );
+                    }
+                    else if ( s.startsWith( NHXtags.TAXONOMY_ID ) ) {
+                        if ( !node_to_annotate.getNodeData().isHasTaxonomy() ) {
+                            node_to_annotate.getNodeData().setTaxonomy( new Taxonomy() );
+                        }
+                        node_to_annotate.getNodeData().getTaxonomy().setIdentifier( new Identifier( s.substring( 2 ) ) );
+                    }
+                    else if ( s.startsWith( NHXtags.PARENT_BRANCH_WIDTH ) ) {
+                        PhylogenyMethods.setBranchWidthValue( node_to_annotate, Integer.parseInt( s.substring( 2 ) ) );
+                    }
+                    else if ( s.startsWith( NHXtags.COLOR ) ) {
+                        final Color c = NHXParser.stringToColor( s.substring( 2 ) );
+                        if ( c != null ) {
+                            PhylogenyMethods.setBranchColorValue( node_to_annotate, c );
+                        }
+                    }
+                    else if ( s.startsWith( NHXtags.CUSTOM_DATA_ON_NODE ) ) {
+                        if ( !node_to_annotate.getNodeData().isHasProperties() ) {
+                            node_to_annotate.getNodeData().setProperties( new PropertiesMap() );
+                        }
+                        node_to_annotate.getNodeData().getProperties().addProperty( Property.createFromNhxString( s ) );
+                    }
+                    else if ( s.startsWith( NHXtags.DOMAIN_STRUCTURE ) ) {
+                        if ( !node_to_annotate.getNodeData().isHasSequence() ) {
+                            node_to_annotate.getNodeData().setSequence( new Sequence() );
+                        }
+                        node_to_annotate.getNodeData().getSequence().setDomainArchitecture( new DomainArchitecture( s
+                                .substring( 3 ) ) );
+                    }
+                    else if ( s.startsWith( NHXtags.NODE_IDENTIFIER ) ) {
+                        node_to_annotate.getNodeData().setNodeIdentifier( new Identifier( s.substring( 3 ) ) );
+                    }
+                    else if ( s.startsWith( NHXtags.SEQUENCE_ACCESSION ) ) {
+                        if ( !node_to_annotate.getNodeData().isHasSequence() ) {
+                            node_to_annotate.getNodeData().setSequence( new Sequence() );
+                        }
+                        node_to_annotate.getNodeData().getSequence()
+                                .setAccession( new Accession( s.substring( 3 ), "?" ) );
+                    }
+                    else if ( s.startsWith( NHXtags.GENE_NAME ) ) {
+                        if ( !node_to_annotate.getNodeData().isHasSequence() ) {
+                            node_to_annotate.getNodeData().setSequence( new Sequence() );
+                        }
+                        node_to_annotate.getNodeData().getSequence().setName( s.substring( 3 ) );
+                    }
+                    else if ( s.startsWith( NHXtags.GENE_NAME_SYNONYM ) ) {
+                        if ( !node_to_annotate.getNodeData().isHasSequence() ) {
+                            node_to_annotate.getNodeData().setSequence( new Sequence() );
+                        }
+                        node_to_annotate.getNodeData().getSequence().setName( s.substring( 2 ) );
+                    }
+                    else if ( s.indexOf( '=' ) < 0 ) {
+                        if ( node_to_annotate.getDistanceToParent() != PhylogenyNode.DISTANCE_DEFAULT ) {
+                            throw new NHXFormatException( "error in NHX formatted data: more than one distance to parent:"
+                                    + "\"" + s + "\"" );
+                        }
+                        node_to_annotate.setDistanceToParent( doubleValue( s ) );
+                    }
+                } // while ( t.hasMoreTokens() ) 
+            }
+        }
+    }
+
+    /**
+     * Parses String s in the format r.g.b (e.g. "12.34.234" ) into red, green,
+     * and blue and returns the corresponding Color.
+     */
+    private static Color stringToColor( final String s ) {
+        final StringTokenizer st = new StringTokenizer( s, "." );
+        if ( st.countTokens() != 3 ) {
+            throw new IllegalArgumentException( "illegal format for color: " + s );
+        }
+        final int red = ForesterUtil.limitRangeForColor( Integer.parseInt( st.nextToken() ) );
+        final int green = ForesterUtil.limitRangeForColor( Integer.parseInt( st.nextToken() ) );
+        final int blu = ForesterUtil.limitRangeForColor( Integer.parseInt( st.nextToken() ) );
+        return new Color( red, green, blu );
+    }
+}
diff --git a/forester/java/src/org/forester/io/parsers/nhx/NHXtags.java b/forester/java/src/org/forester/io/parsers/nhx/NHXtags.java

new file mode 100644 (file)

index 0000000..ae884ab
--- /dev/null
+++ b/forester/java/src/org/forester/io/parsers/nhx/NHXtags.java
@@ -0,0 +1,55 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.io.parsers.nhx;
+
+public final class NHXtags {
+
+    public static final String CUSTOM_DATA_ON_NODE        = "XN=";
+    public static final String COLOR                      = "C=";
+    public static final String PARENT_BRANCH_WIDTH        = "W=";
+    public static final String SUBTREE_NEIGHBORS          = "SNn=";
+    public static final String SUPER_ORTHOLOGOUS          = "SOn=";
+    public static final String ORTHOLOGOUS                = "On=";
+    public static final String TAXONOMY_ID                = "T=";
+    public static final String SUPPORT                    = "B=";
+    public static final String IS_DUPLICATION             = "D=";
+    public static final String ANNOTATION                 = "AN="; //TODO fix on website NHXv2
+    public static final String SPECIES_NAME               = "S=";
+    public static final String DOMAIN_STRUCTURE           = "DS=";
+    public static final String GENE_NAME                  = "GN=";
+    public static final String GENE_NAME_SYNONYM          = "G=";
+    public static final String SEQUENCE_ACCESSION         = "AC=";
+    public static final String NODE_IDENTIFIER            = "ID="; //TODO fix on website NHXv2
+    public static final Object BRANCH_WIDTH               = "W=";
+    @Deprecated
+    public static final String BINARY_DOMAIN_COMBINATIONS = "GDC=";
+    @Deprecated
+    public static final String DOMAINS_SEPARATOR          = "\\|";
+    @Deprecated
+    public static final String DOMAINS                    = "GD=";
+    @Deprecated
+    public static final String EC_NUMBER                  = "E=";
+}
diff --git a/forester/java/src/org/forester/io/parsers/phyloxml/PhyloXmlDataFormatException.java b/forester/java/src/org/forester/io/parsers/phyloxml/PhyloXmlDataFormatException.java

new file mode 100644 (file)

index 0000000..bb80183
--- /dev/null
+++ b/forester/java/src/org/forester/io/parsers/phyloxml/PhyloXmlDataFormatException.java
@@ -0,0 +1,40 @@
+// $Id:
+// $
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.io.parsers.phyloxml;
+
+public class PhyloXmlDataFormatException extends PhyloXmlException {
+
+    private static final long serialVersionUID = 3756209394438250170L;
+
+    public PhyloXmlDataFormatException() {
+        super();
+    }
+
+    public PhyloXmlDataFormatException( final String message ) {
+        super( message );
+    }
+}
\ No newline at end of file
diff --git a/forester/java/src/org/forester/io/parsers/phyloxml/PhyloXmlException.java b/forester/java/src/org/forester/io/parsers/phyloxml/PhyloXmlException.java

new file mode 100644 (file)

index 0000000..6c82fa1
--- /dev/null
+++ b/forester/java/src/org/forester/io/parsers/phyloxml/PhyloXmlException.java
@@ -0,0 +1,39 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.io.parsers.phyloxml;
+
+public class PhyloXmlException extends RuntimeException {
+
+    private static final long serialVersionUID = 3756209394438250170L;
+
+    public PhyloXmlException() {
+        super();
+    }
+
+    public PhyloXmlException( final String message ) {
+        super( message );
+    }
+}
diff --git a/forester/java/src/org/forester/io/parsers/phyloxml/PhyloXmlHandler.java b/forester/java/src/org/forester/io/parsers/phyloxml/PhyloXmlHandler.java

new file mode 100644 (file)

index 0000000..6c013f6
--- /dev/null
+++ b/forester/java/src/org/forester/io/parsers/phyloxml/PhyloXmlHandler.java
@@ -0,0 +1,454 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.io.parsers.phyloxml;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.forester.io.parsers.phyloxml.data.BinaryCharactersParser;
+import org.forester.io.parsers.phyloxml.data.BranchWidthParser;
+import org.forester.io.parsers.phyloxml.data.ColorParser;
+import org.forester.io.parsers.phyloxml.data.ConfidenceParser;
+import org.forester.io.parsers.phyloxml.data.DateParser;
+import org.forester.io.parsers.phyloxml.data.DistributionParser;
+import org.forester.io.parsers.phyloxml.data.EventParser;
+import org.forester.io.parsers.phyloxml.data.IdentifierParser;
+import org.forester.io.parsers.phyloxml.data.PropertyParser;
+import org.forester.io.parsers.phyloxml.data.ReferenceParser;
+import org.forester.io.parsers.phyloxml.data.SequenceParser;
+import org.forester.io.parsers.phyloxml.data.SequenceRelationParser;
+import org.forester.io.parsers.phyloxml.data.TaxonomyParser;
+import org.forester.io.parsers.util.PhylogenyParserException;
+import org.forester.phylogeny.Phylogeny;
+import org.forester.phylogeny.PhylogenyNode;
+import org.forester.phylogeny.data.BinaryCharacters;
+import org.forester.phylogeny.data.BranchColor;
+import org.forester.phylogeny.data.BranchWidth;
+import org.forester.phylogeny.data.Confidence;
+import org.forester.phylogeny.data.Date;
+import org.forester.phylogeny.data.Distribution;
+import org.forester.phylogeny.data.Event;
+import org.forester.phylogeny.data.Identifier;
+import org.forester.phylogeny.data.PropertiesMap;
+import org.forester.phylogeny.data.Property;
+import org.forester.phylogeny.data.Reference;
+import org.forester.phylogeny.data.Sequence;
+import org.forester.phylogeny.data.SequenceRelation;
+import org.forester.phylogeny.data.Taxonomy;
+import org.forester.phylogeny.data.SequenceRelation.SEQUENCE_RELATION_TYPE;
+import org.forester.util.FailedConditionCheckException;
+import org.forester.util.ForesterConstants;
+import org.forester.util.ForesterUtil;
+import org.xml.sax.Attributes;
+import org.xml.sax.SAXException;
+import org.xml.sax.helpers.DefaultHandler;
+
+public final class PhyloXmlHandler extends DefaultHandler {
+
+    private static final String                              PHYLOXML               = "phyloxml";
+    private String                                           _current_element_name;
+    private Phylogeny                                        _current_phylogeny;
+    private List<Phylogeny>                                  _phylogenies;
+    private XmlElement                                       _current_xml_element;
+    private PhylogenyNode                                    _current_node;
+    private static Map<Phylogeny, HashMap<String, Sequence>> phylogenySequencesById = new HashMap<Phylogeny, HashMap<String, Sequence>>();
+
+    PhyloXmlHandler() {
+        // Constructor.
+    }
+
+    private void addNode() {
+        final PhylogenyNode new_node = new PhylogenyNode();
+        getCurrentNode().addAsChild( new_node );
+        setCurrentNode( new_node );
+    }
+
+    @Override
+    public void characters( final char[] chars, final int start_index, final int end_index ) {
+        if ( ( ( getCurrentXmlElement() != null ) && ( getCurrentElementName() != null ) )
+                && !getCurrentElementName().equals( PhyloXmlMapping.CLADE )
+                && !getCurrentElementName().equals( PhyloXmlMapping.PHYLOGENY ) ) {
+            if ( !ForesterUtil.isEmpty( getCurrentXmlElement().getValueAsString() ) ) {
+                getCurrentXmlElement().appendValue( new String( chars, start_index, end_index ) );
+            }
+            else {
+                getCurrentXmlElement().setValue( new String( chars, start_index, end_index ) );
+            }
+        }
+    }
+
+    @Override
+    public void endElement( final String namespace_uri, final String local_name, final String qualified_name )
+            throws SAXException {
+        if ( ForesterUtil.isEmpty( namespace_uri ) || namespace_uri.startsWith( ForesterConstants.PHYLO_XML_LOCATION ) ) {
+            if ( local_name.equals( PhyloXmlMapping.CLADE ) ) {
+                try {
+                    mapElementToPhylogenyNode( getCurrentXmlElement(), getCurrentNode() );
+                    if ( !getCurrentNode().isRoot() ) {
+                        setCurrentNode( getCurrentNode().getParent() );
+                    }
+                    getCurrentXmlElement().setValue( null );
+                    setCurrentXmlElement( getCurrentXmlElement().getParent() );
+                }
+                catch ( final PhylogenyParserException ex ) {
+                    throw new SAXException( ex.getMessage() );
+                }
+            }
+            else if ( local_name.equals( PhyloXmlMapping.SEQUENCE_RELATION ) ) {
+                try {
+                    if ( getCurrentPhylogeny() != null ) {
+                        final SequenceRelation seqRelation = ( SequenceRelation ) SequenceRelationParser
+                                .getInstance( getCurrentPhylogeny() ).parse( getCurrentXmlElement() );
+                        final Map<String, Sequence> sequencesById = getSequenceMapByIdForPhylogeny( getCurrentPhylogeny() );
+                        final Sequence ref0 = sequencesById.get( seqRelation.getRef0().getSourceId() ), ref1 = sequencesById
+                                .get( seqRelation.getRef1().getSourceId() );
+                        if ( ref0 != null ) {
+                            // check for reverse relation
+                            boolean fFoundReverse = false;
+                            for( final SequenceRelation sr : ref0.getSequenceRelations() ) {
+                                if ( sr.getType().equals( seqRelation.getType() )
+                                        && ( ( sr.getRef0().isEqual( ref1 ) && sr.getRef1().isEqual( ref0 ) ) || ( sr
+                                                .getRef0().isEqual( ref0 ) && sr.getRef1().isEqual( ref1 ) ) ) ) {
+                                    // in this case we don't need to re-add it, but we make sure we don't loose the confidence value
+                                    fFoundReverse = true;
+                                    if ( ( sr.getConfidence() == null ) && ( seqRelation.getConfidence() != null ) ) {
+                                        sr.setConfidence( seqRelation.getConfidence() );
+                                    }
+                                }
+                            }
+                            if ( !fFoundReverse ) {
+                                ref0.addSequenceRelation( seqRelation );
+                            }
+                        }
+                        if ( ref1 != null ) {
+                            // check for reverse relation
+                            boolean fFoundReverse = false;
+                            for( final SequenceRelation sr : ref1.getSequenceRelations() ) {
+                                if ( sr.getType().equals( seqRelation.getType() )
+                                        && ( ( sr.getRef0().isEqual( ref1 ) && sr.getRef1().isEqual( ref0 ) ) || ( sr
+                                                .getRef0().isEqual( ref0 ) && sr.getRef1().isEqual( ref1 ) ) ) ) {
+                                    // in this case we don't need to re-add it, but we make sure we don't loose the confidence value
+                                    fFoundReverse = true;
+                                    if ( ( sr.getConfidence() == null ) && ( seqRelation.getConfidence() != null ) ) {
+                                        sr.setConfidence( seqRelation.getConfidence() );
+                                    }
+                                }
+                            }
+                            if ( !fFoundReverse ) {
+                                ref1.addSequenceRelation( seqRelation );
+                            }
+                        }
+                        // we add the type to the current phylogeny so we can know it needs to be displayed in the combo
+                        final Collection<SEQUENCE_RELATION_TYPE> relationTypesForCurrentPhylogeny = getCurrentPhylogeny()
+                                .getRelevantSequenceRelationTypes();
+                        if ( !relationTypesForCurrentPhylogeny.contains( seqRelation.getType() ) ) {
+                            relationTypesForCurrentPhylogeny.add( seqRelation.getType() );
+                        }
+                    }
+                }
+                catch ( final PhylogenyParserException ex ) {
+                    throw new SAXException( ex.getMessage() );
+                }
+            }
+            else if ( local_name.equals( PhyloXmlMapping.PHYLOGENY ) ) {
+                try {
+                    PhyloXmlHandler.mapElementToPhylogeny( getCurrentXmlElement(), getCurrentPhylogeny() );
+                }
+                catch ( final PhylogenyParserException ex ) {
+                    throw new SAXException( ex.getMessage() );
+                }
+                finishPhylogeny();
+                reset();
+            }
+            else if ( local_name.equals( PHYLOXML ) ) {
+                // Do nothing.
+            }
+            else if ( ( getCurrentPhylogeny() != null ) && ( getCurrentXmlElement().getParent() != null ) ) {
+                setCurrentXmlElement( getCurrentXmlElement().getParent() );
+            }
+            setCurrentElementName( null );
+        }
+    }
+
+    private void finishPhylogeny() throws SAXException {
+        getCurrentPhylogeny().recalculateNumberOfExternalDescendants( false );
+        getPhylogenies().add( getCurrentPhylogeny() );
+        final HashMap<String, Sequence> phyloSequences = phylogenySequencesById.get( getCurrentPhylogeny() );
+        if ( phyloSequences != null ) {
+            getCurrentPhylogeny().setSequenceRelationQueries( phyloSequences.values() );
+            phylogenySequencesById.remove( getCurrentPhylogeny() );
+        }
+    }
+
+    private String getCurrentElementName() {
+        return _current_element_name;
+    }
+
+    private PhylogenyNode getCurrentNode() {
+        return _current_node;
+    }
+
+    private Phylogeny getCurrentPhylogeny() {
+        return _current_phylogeny;
+    }
+
+    private XmlElement getCurrentXmlElement() {
+        return _current_xml_element;
+    }
+
+    List<Phylogeny> getPhylogenies() {
+        return _phylogenies;
+    }
+
+    private void init() {
+        reset();
+        setPhylogenies( new ArrayList<Phylogeny>() );
+    }
+
+    private void initCurrentNode() {
+        if ( getCurrentNode() != null ) {
+            throw new FailedConditionCheckException( "attempt to create new current node when current node already exists" );
+        }
+        if ( getCurrentPhylogeny() == null ) {
+            throw new FailedConditionCheckException( "attempt to create new current node for non-existing phylogeny" );
+        }
+        final PhylogenyNode node = new PhylogenyNode();
+        getCurrentPhylogeny().setRoot( node );
+        setCurrentNode( getCurrentPhylogeny().getRoot() );
+    }
+
+    private void mapElementToPhylogenyNode( final XmlElement xml_element, final PhylogenyNode node )
+            throws PhylogenyParserException {
+        if ( xml_element.isHasAttribute( PhyloXmlMapping.BRANCH_LENGTH ) ) {
+            double d = 0;
+            try {
+                d = Double.parseDouble( xml_element.getAttribute( PhyloXmlMapping.BRANCH_LENGTH ) );
+            }
+            catch ( final NumberFormatException e ) {
+                throw new PhylogenyParserException( "ill formatted distance in clade attribute ["
+                        + xml_element.getAttribute( PhyloXmlMapping.BRANCH_LENGTH ) + "]: " + e.getMessage() );
+            }
+            node.setDistanceToParent( d );
+        }
+        for( int i = 0; i < xml_element.getNumberOfChildElements(); ++i ) {
+            final XmlElement element = xml_element.getChildElement( i );
+            final String qualified_name = element.getQualifiedName();
+            if ( qualified_name.equals( PhyloXmlMapping.BRANCH_LENGTH ) ) {
+                if ( node.getDistanceToParent() != PhylogenyNode.DISTANCE_DEFAULT ) {
+                    throw new PhylogenyParserException( "ill advised attempt to set distance twice for the same clade (probably via element and via attribute)" );
+                }
+                node.setDistanceToParent( element.getValueAsDouble() );
+            }
+            if ( qualified_name.equals( PhyloXmlMapping.NODE_NAME ) ) {
+                node.setName( element.getValueAsString() );
+            }
+            //  else if ( qualified_name.equals( PhyloXmlMapping.NODE_IDENTIFIER ) ) {
+            //      node.getNodeData().setNodeIdentifier( ( Identifier ) IdentifierParser.getInstance().parse( element ) );
+            //  }
+            else if ( qualified_name.equals( PhyloXmlMapping.TAXONOMY ) ) {
+                node.getNodeData().addTaxonomy( ( Taxonomy ) TaxonomyParser.getInstance().parse( element ) );
+            }
+            else if ( qualified_name.equals( PhyloXmlMapping.SEQUENCE ) ) {
+                final Sequence sequence = ( Sequence ) SequenceParser.getInstance().parse( element );
+                node.getNodeData().addSequence( sequence );
+                // we temporarily store all sequences that have a source ID so we can access them easily when we need to attach relations to them
+                final String sourceId = sequence.getSourceId();
+                if ( ( getCurrentPhylogeny() != null ) && !ForesterUtil.isEmpty( sourceId ) ) {
+                    getSequenceMapByIdForPhylogeny( getCurrentPhylogeny() ).put( sourceId, sequence );
+                }
+            }
+            else if ( qualified_name.equals( PhyloXmlMapping.DISTRIBUTION ) ) {
+                node.getNodeData().addDistribution( ( Distribution ) DistributionParser.getInstance().parse( element ) );
+            }
+            else if ( qualified_name.equals( PhyloXmlMapping.CLADE_DATE ) ) {
+                node.getNodeData().setDate( ( Date ) DateParser.getInstance().parse( element ) );
+            }
+            else if ( qualified_name.equals( PhyloXmlMapping.REFERENCE ) ) {
+                node.getNodeData().addReference( ( Reference ) ReferenceParser.getInstance().parse( element ) );
+            }
+            else if ( qualified_name.equals( PhyloXmlMapping.BINARY_CHARACTERS ) ) {
+                node.getNodeData().setBinaryCharacters( ( BinaryCharacters ) BinaryCharactersParser.getInstance()
+                        .parse( element ) );
+            }
+            else if ( qualified_name.equals( PhyloXmlMapping.COLOR ) ) {
+                node.getBranchData().setBranchColor( ( BranchColor ) ColorParser.getInstance().parse( element ) );
+            }
+            else if ( qualified_name.equals( PhyloXmlMapping.CONFIDENCE ) ) {
+                node.getBranchData().addConfidence( ( Confidence ) ConfidenceParser.getInstance().parse( element ) );
+            }
+            else if ( qualified_name.equals( PhyloXmlMapping.WIDTH ) ) {
+                node.getBranchData().setBranchWidth( ( BranchWidth ) BranchWidthParser.getInstance().parse( element ) );
+            }
+            else if ( qualified_name.equals( PhyloXmlMapping.EVENTS ) ) {
+                node.getNodeData().setEvent( ( Event ) EventParser.getInstance().parse( element ) );
+            }
+            else if ( qualified_name.equals( PhyloXmlMapping.PROPERTY ) ) {
+                if ( !node.getNodeData().isHasProperties() ) {
+                    node.getNodeData().setProperties( new PropertiesMap() );
+                }
+                node.getNodeData().getProperties().addProperty( ( Property ) PropertyParser.getInstance()
+                        .parse( element ) );
+            }
+        }
+    }
+
+    private void newClade() {
+        if ( getCurrentNode() == null ) {
+            initCurrentNode();
+        }
+        else {
+            addNode();
+        }
+    }
+
+    private void newPhylogeny() {
+        setCurrentPhylogeny( new Phylogeny() );
+    }
+
+    private void reset() {
+        setCurrentPhylogeny( null );
+        setCurrentNode( null );
+        setCurrentElementName( null );
+        setCurrentXmlElement( null );
+    }
+
+    private void setCurrentElementName( final String element_name ) {
+        _current_element_name = element_name;
+    }
+
+    private void setCurrentNode( final PhylogenyNode current_node ) {
+        _current_node = current_node;
+    }
+
+    private void setCurrentPhylogeny( final Phylogeny phylogeny ) {
+        _current_phylogeny = phylogeny;
+    }
+
+    private void setCurrentXmlElement( final XmlElement element ) {
+        _current_xml_element = element;
+    }
+
+    private void setPhylogenies( final List<Phylogeny> phylogenies ) {
+        _phylogenies = phylogenies;
+    }
+
+    @Override
+    public void startDocument() throws SAXException {
+        init();
+    }
+
+    @Override
+    public void startElement( final String namespace_uri,
+                              final String local_name,
+                              final String qualified_name,
+                              final Attributes attributes ) throws SAXException {
+        if ( ForesterUtil.isEmpty( namespace_uri ) || namespace_uri.startsWith( ForesterConstants.PHYLO_XML_LOCATION ) ) {
+            setCurrentElementName( local_name );
+            if ( local_name.equals( PhyloXmlMapping.CLADE ) ) {
+                final XmlElement element = new XmlElement( namespace_uri, local_name, local_name, attributes );
+                getCurrentXmlElement().addChildElement( element );
+                setCurrentXmlElement( element );
+                newClade();
+            }
+            else if ( local_name.equals( PhyloXmlMapping.PHYLOGENY ) ) {
+                setCurrentXmlElement( new XmlElement( "", "", "", null ) );
+                newPhylogeny();
+                final XmlElement element = new XmlElement( namespace_uri, local_name, local_name, attributes );
+                if ( element.isHasAttribute( PhyloXmlMapping.PHYLOGENY_IS_REROOTABLE_ATTR ) ) {
+                    getCurrentPhylogeny().setRerootable( Boolean.parseBoolean( element
+                            .getAttribute( PhyloXmlMapping.PHYLOGENY_IS_REROOTABLE_ATTR ) ) );
+                }
+                if ( element.isHasAttribute( PhyloXmlMapping.PHYLOGENY_BRANCHLENGTH_UNIT_ATTR ) ) {
+                    getCurrentPhylogeny().setDistanceUnit( element
+                            .getAttribute( PhyloXmlMapping.PHYLOGENY_BRANCHLENGTH_UNIT_ATTR ) );
+                }
+                if ( element.isHasAttribute( PhyloXmlMapping.PHYLOGENY_IS_ROOTED_ATTR ) ) {
+                    getCurrentPhylogeny().setRooted( Boolean.parseBoolean( element
+                            .getAttribute( PhyloXmlMapping.PHYLOGENY_IS_ROOTED_ATTR ) ) );
+                }
+                if ( element.isHasAttribute( PhyloXmlMapping.PHYLOGENY_TYPE_ATTR ) ) {
+                    getCurrentPhylogeny().setType( ( element.getAttribute( PhyloXmlMapping.PHYLOGENY_TYPE_ATTR ) ) );
+                }
+            }
+            else if ( local_name.equals( PHYLOXML ) ) {
+            }
+            else if ( getCurrentPhylogeny() != null ) {
+                final XmlElement element = new XmlElement( namespace_uri, local_name, local_name, attributes );
+                getCurrentXmlElement().addChildElement( element );
+                setCurrentXmlElement( element );
+            }
+        }
+    }
+
+    public static boolean attributeEqualsValue( final XmlElement element,
+                                                final String attributeName,
+                                                final String attributeValue ) {
+        final String attr = element.getAttribute( attributeName );
+        return ( ( attr != null ) && attr.equals( attributeValue ) );
+    }
+
+    public static String getAtttributeValue( final XmlElement element, final String attributeName ) {
+        final String attr = element.getAttribute( attributeName );
+        if ( attr != null ) {
+            return attr;
+        }
+        else {
+            return "";
+        }
+    }
+
+    static public Map<String, Sequence> getSequenceMapByIdForPhylogeny( final Phylogeny ph ) {
+        HashMap<String, Sequence> seqMap = phylogenySequencesById.get( ph );
+        if ( seqMap == null ) {
+            seqMap = new HashMap<String, Sequence>();
+            phylogenySequencesById.put( ph, seqMap );
+        }
+        return seqMap;
+    }
+
+    private static void mapElementToPhylogeny( final XmlElement xml_element, final Phylogeny phylogeny )
+            throws PhylogenyParserException {
+        for( int i = 0; i < xml_element.getNumberOfChildElements(); ++i ) {
+            final XmlElement element = xml_element.getChildElement( i );
+            final String qualified_name = element.getQualifiedName();
+            if ( qualified_name.equals( PhyloXmlMapping.PHYLOGENY_NAME ) ) {
+                phylogeny.setName( element.getValueAsString() );
+            }
+            else if ( qualified_name.equals( PhyloXmlMapping.PHYLOGENY_DESCRIPTION ) ) {
+                phylogeny.setDescription( element.getValueAsString() );
+            }
+            else if ( qualified_name.equals( PhyloXmlMapping.IDENTIFIER ) ) {
+                phylogeny.setIdentifier( ( Identifier ) IdentifierParser.getInstance().parse( element ) );
+            }
+            else if ( qualified_name.equals( PhyloXmlMapping.CONFIDENCE ) ) {
+                phylogeny.setConfidence( ( Confidence ) ConfidenceParser.getInstance().parse( element ) );
+            }
+        }
+    }
+}
diff --git a/forester/java/src/org/forester/io/parsers/phyloxml/PhyloXmlMapping.java b/forester/java/src/org/forester/io/parsers/phyloxml/PhyloXmlMapping.java

new file mode 100644 (file)

index 0000000..04a8cb6
--- /dev/null
+++ b/forester/java/src/org/forester/io/parsers/phyloxml/PhyloXmlMapping.java
@@ -0,0 +1,134 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// Copyright (C) 2000-2001 Washington University School of Medicine
+// and Howard Hughes Medical Institute
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.io.parsers.phyloxml;
+
+/*
+ * @author Christian Zmasek TODO To change the template for this generated type
+ * comment go to Window - Preferences - Java - Code Style - Code Templates
+ */
+public final class PhyloXmlMapping {
+
+    public static final String PHYLOGENY                                           = "phylogeny";
+    public static final String PHYLOGENY_NAME                                      = "name";
+    public static final String PHYLOGENY_DESCRIPTION                               = "description";
+    public static final String PHYLOGENY_IS_REROOTABLE_ATTR                        = "rerootable";
+    public static final String PHYLOGENY_BRANCHLENGTH_UNIT_ATTR                    = "branch_length_unit";
+    public static final String PHYLOGENY_IS_ROOTED_ATTR                            = "rooted";
+    public static final String PHYLOGENY_TYPE_ATTR                                 = "type";
+    public static final String CLADE                                               = "clade";
+    public static final String NODE_NAME                                           = "name";
+    public static final String SEQUENCE                                            = "sequence";
+    public static final String SEQUENCE_NAME                                       = "name";
+    public static final String SEQUENCE_SYMBOL                                     = "symbol";
+    public static final String ACCESSION                                           = "accession";
+    public static final String ACCESSION_SOURCE_ATTR                               = "source";
+    public static final String SEQUENCE_LOCATION                                   = "location";
+    public static final String SEQUENCE_MOL_SEQ                                    = "mol_seq";
+    public static final String SEQUENCE_MOL_SEQ_ALIGNED_ATTR                       = "is_aligned";
+    public static final String ANNOTATION                                          = "annotation";
+    public static final String ANNOTATION_DESC                                     = "desc";
+    public static final String ANNOTATION_REF_ATTR                                 = "ref";
+    public static final String ANNOTATION_EVIDENCE_ATTR                            = "evidence";
+    public static final String ANNOTATION_TYPE_ATTR                                = "type";
+    public static final String TAXONOMY                                            = "taxonomy";
+    public static final String TAXONOMY_SCIENTIFIC_NAME                            = "scientific_name";
+    public static final String TAXONOMY_COMMON_NAME                                = "common_name";
+    public static final String TAXONOMY_CODE                                       = "code";
+    public static final String TAXONOMY_RANK                                       = "rank";
+    public static final String TAXONOMY_SYNONYM                                    = "synonym";
+    public static final String TAXONOMY_AUTHORITY                                  = "authority";
+    public static final String DISTRIBUTION                                        = "distribution";
+    public static final String BINARY_CHARACTERS                                   = "binary_characters";
+    public static final String BINARY_CHARACTERS_PRESENT                           = "present";
+    public static final String BINARY_CHARACTERS_GAINED                            = "gained";
+    public static final String BINARY_CHARACTERS_LOST                              = "lost";
+    public static final String BINARY_CHARACTERS_TYPE_ATTR                         = "type";
+    public static final String BINARY_CHARACTERS_PRESENT_COUNT_ATTR                = "present_count";
+    public static final String BINARY_CHARACTERS_GAINED_COUNT_ATTR                 = "gained_count";
+    public static final String BINARY_CHARACTERS_LOST_COUNT_ATTR                   = "lost_count";
+    public static final String BRANCH_LENGTH                                       = "branch_length";
+    public static final String CONFIDENCE                                          = "confidence";
+    public static final String CONFIDENCE_TYPE_ATTR                                = "type";
+    public static final String COLOR                                               = "color";
+    public static final String COLOR_RED                                           = "red";
+    public static final String COLOR_GREEN                                         = "green";
+    public static final String COLOR_BLUE                                          = "blue";
+    public final static String SEQUENCE_DOMAIN_ARCHITECTURE_DOMAIN                 = "domain";
+    public final static String SEQUENCE_DOMAIN_ARCHITECTURE_PROT_DOMAIN_FROM       = "from";
+    public final static String SEQUENCE_DOMAIN_ARCHITECTURE_PROT_DOMAIN_TO         = "to";
+    public final static String SEQUENCE_DOMAIN_ARCHITECTURE_PROT_DOMAIN_CONFIDENCE = "confidence";
+    // public final static String NODE_IDENTIFIER                                     = "node_id";
+    public final static String IDENTIFIER                                          = "id";
+    public final static String IDENTIFIER_PROVIDER_ATTR                            = "provider";
+    public static final String URI                                                 = "uri";
+    public static final String WIDTH                                               = "width";
+    public final static String EVENTS                                              = "events";
+    public final static String EVENT_TYPE                                          = "type";
+    public final static String EVENT_DUPLICATIONS                                  = "duplications";
+    public final static String EVENT_SPECIATIONS                                   = "speciations";
+    public final static String EVENT_LOSSES                                        = "losses";
+    public final static String SEQUENCE_DOMAIN_ARCHITECURE                         = "domain_architecture";
+    public final static String SEQUENCE_DOMAIN_ARCHITECTURE_LENGTH                 = "length";
+    public final static String SEQUENCE_TYPE                                       = "type";
+    public static final String BINARY_CHARACTER                                    = "bc";
+    public static final String URI_DESC_ATTR                                       = "desc";
+    public static final String TYPE_ATTR                                           = "type";
+    public static final String REFERENCE                                           = "reference";
+    public static final String REFERENCE_DOI_ATTR                                  = "doi";
+    public static final String REFERENCE_DESC                                      = "desc";
+    public static final String PROPERTY                                            = "property";
+    public static final String PROPERTY_REF                                        = "ref";
+    public static final String PROPERTY_UNIT                                       = "unit";
+    public static final String PROPERTY_DATATYPE                                   = "datatype";
+    public static final String PROPERTY_APPLIES_TO                                 = "applies_to";
+    public static final String ID_REF                                              = "id_ref";
+    public static final String ANNOTATION_SOURCE_ATTR                              = "source";
+    public static final String DISTRIBUTION_DESC                                   = "desc";
+    public static final String POINT                                               = "point";
+    public static final String POINT_LONGITUDE                                     = "long";
+    public static final String POINT_LATITUDE                                      = "lat";
+    public static final String POINT_ALTITUDE                                      = "alt";
+    public static final String POINT_ALTITUDE_UNIT_ATTR                            = "alt_unit";
+    public static final String POINT_GEODETIC_DATUM                                = "geodetic_datum";
+    public static final String CLADE_DATE                                          = "date";
+    public static final String CLADE_DATE_UNIT                                     = "unit";
+    public static final String CLADE_DATE_DESC                                     = "desc";
+    public static final String CLADE_DATE_MIN                                      = "minimum";
+    public static final String CLADE_DATE_MAX                                      = "maximum";
+    public static final String CLADE_DATE_VALUE                                    = "value";
+    public final static String SEQUENCE_RELATION                                   = "sequence_relation";
+    public final static String SEQUENCE_RELATION_TYPE                              = "type";
+    public final static String SEQUENCE_RELATION_ID_REF0                           = "id_ref_0";
+    public final static String SEQUENCE_RELATION_ID_REF1                           = "id_ref_1";
+    public final static String SEQUENCE_RELATION_DISTANCE                          = "distance";
+    public final static String SEQUENCE_SOURCE_ID                                  = "id_source";
+    public final static String POLYGON                                             = "polygon";
+
+    private PhyloXmlMapping() {
+    }
+}
diff --git a/forester/java/src/org/forester/io/parsers/phyloxml/PhyloXmlParser.java b/forester/java/src/org/forester/io/parsers/phyloxml/PhyloXmlParser.java

new file mode 100644 (file)

index 0000000..e5fa2d0
--- /dev/null
+++ b/forester/java/src/org/forester/io/parsers/phyloxml/PhyloXmlParser.java
@@ -0,0 +1,313 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.io.parsers.phyloxml;
+
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.Reader;
+import java.io.StringReader;
+import java.net.URL;
+import java.util.Date;
+import java.util.Enumeration;
+import java.util.zip.ZipEntry;
+import java.util.zip.ZipFile;
+import java.util.zip.ZipInputStream;
+
+import javax.xml.parsers.ParserConfigurationException;
+import javax.xml.parsers.SAXParser;
+import javax.xml.parsers.SAXParserFactory;
+
+import org.forester.io.parsers.PhylogenyParser;
+import org.forester.io.parsers.util.PhylogenyParserException;
+import org.forester.phylogeny.Phylogeny;
+import org.forester.util.ForesterConstants;
+import org.forester.util.ForesterUtil;
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
+import org.xml.sax.SAXNotRecognizedException;
+import org.xml.sax.SAXNotSupportedException;
+import org.xml.sax.SAXParseException;
+import org.xml.sax.XMLReader;
+import org.xml.sax.helpers.DefaultHandler;
+
+public class PhyloXmlParser implements PhylogenyParser {
+
+    final public static String   JAXP_SCHEMA_LANGUAGE                       = "http://java.sun.com/xml/jaxp/properties/schemaLanguage";
+    final public static String   W3C_XML_SCHEMA                             = "http://www.w3.org/2001/XMLSchema";
+    final public static String   JAXP_SCHEMA_SOURCE                         = "http://java.sun.com/xml/jaxp/properties/schemaSource";
+    final public static String   SAX_FEATURES_VALIDATION                    = "http://xml.org/sax/features/validation";
+    final public static String   APACHE_FEATURES_VALIDATION_SCHEMA          = "http://apache.org/xml/features/validation/schema";
+    final public static String   APACHE_FEATURES_VALIDATION_SCHEMA_FULL     = "http://apache.org/xml/features/validation/schema-full-checking";
+    final public static String   APACHE_PROPERTIES_SCHEMA_EXTERNAL_LOCATION = "http://apache.org/xml/properties/schema/external-schemaLocation";
+    final static private boolean TIME                                       = false;
+    private Object               _source;
+    private boolean              _valid;
+    private boolean              _zipped_inputstream;
+    private int                  _error_count;
+    private int                  _warning_count;
+    private String               _schema_location;
+    private StringBuffer         _error_messages;
+    private StringBuffer         _warning_messages;
+
+    public PhyloXmlParser() {
+        init();
+        reset();
+    }
+
+    public int getErrorCount() {
+        return _error_count;
+    }
+
+    public StringBuffer getErrorMessages() {
+        return _error_messages;
+    }
+
+    private Reader getReaderFromZipFile() throws IOException {
+        Reader reader = null;
+        final ZipFile zip_file = new ZipFile( getSource().toString() );
+        final Enumeration<?> zip_file_entries = zip_file.entries();
+        while ( zip_file_entries.hasMoreElements() ) {
+            final ZipEntry zip_file_entry = ( ZipEntry ) zip_file_entries.nextElement();
+            if ( !zip_file_entry.isDirectory() && ( zip_file_entry.getSize() > 0 ) ) {
+                final InputStream is = zip_file.getInputStream( zip_file_entry );
+                reader = new InputStreamReader( is );
+                break;
+            }
+        }
+        return reader;
+    }
+
+    private String getSchemaLocation() {
+        return _schema_location;
+    }
+
+    private Object getSource() {
+        return _source;
+    }
+
+    public int getWarningCount() {
+        return _warning_count;
+    }
+
+    public StringBuffer getWarningMessages() {
+        return _warning_messages;
+    }
+
+    private void init() {
+        setZippedInputstream( false );
+    }
+
+    public boolean isValid() {
+        return _valid;
+    }
+
+    private boolean isZippedInputstream() {
+        return _zipped_inputstream;
+    }
+
+    public Phylogeny[] parse() throws IOException, PhylogenyParserException {
+        reset();
+        final PhyloXmlHandler handler = new PhyloXmlHandler();
+        final SAXParserFactory factory = SAXParserFactory.newInstance();
+        factory.setNamespaceAware( true );
+        try {
+            if ( !ForesterUtil.isEmpty( getSchemaLocation() ) ) {
+                factory.setFeature( SAX_FEATURES_VALIDATION, true );
+                factory.setFeature( APACHE_FEATURES_VALIDATION_SCHEMA, true );
+                factory.setFeature( APACHE_FEATURES_VALIDATION_SCHEMA_FULL, true );
+            }
+        }
+        catch ( final SAXNotRecognizedException e ) {
+            e.printStackTrace();
+            throw new PhylogenyParserException( "sax not recognized exception: " + e.getLocalizedMessage() );
+        }
+        catch ( final SAXNotSupportedException e ) {
+            e.printStackTrace();
+            throw new PhylogenyParserException( "sax not supported exception: " + e.getLocalizedMessage() );
+        }
+        catch ( final ParserConfigurationException e ) {
+            e.printStackTrace();
+            throw new PhylogenyParserException( "parser configuration exception: " + e.getLocalizedMessage() );
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace();
+            throw new PhylogenyParserException( "error while configuring sax parser: " + e.getLocalizedMessage() );
+        }
+        try {
+            final SAXParser parser = factory.newSAXParser();
+            if ( !ForesterUtil.isEmpty( getSchemaLocation() ) ) {
+                parser.setProperty( JAXP_SCHEMA_LANGUAGE, W3C_XML_SCHEMA );
+                parser.setProperty( JAXP_SCHEMA_SOURCE, getSchemaLocation() );
+                parser.setProperty( APACHE_PROPERTIES_SCHEMA_EXTERNAL_LOCATION, getSchemaLocation() );
+            }
+            final XMLReader xml_reader = parser.getXMLReader();
+            xml_reader.setContentHandler( handler );
+            xml_reader.setErrorHandler( new PhyloXmlParserErrorHandler() );
+            long start_time = 0;
+            if ( TIME ) {
+                start_time = new Date().getTime();
+            }
+            if ( getSource() instanceof File ) {
+                if ( !getSource().toString().toLowerCase().endsWith( ".zip" ) ) {
+                    xml_reader.parse( new InputSource( new FileReader( ( File ) getSource() ) ) );
+                }
+                else {
+                    final Reader reader = getReaderFromZipFile();
+                    if ( reader == null ) {
+                        throw new PhylogenyParserException( "zip file \"" + getSource()
+                                + "\" appears not to contain any entries" );
+                    }
+                    xml_reader.parse( new InputSource( reader ) );
+                }
+            }
+            else if ( getSource() instanceof InputSource ) {
+                xml_reader.parse( ( InputSource ) getSource() );
+            }
+            else if ( getSource() instanceof InputStream ) {
+                if ( !isZippedInputstream() ) {
+                    final InputStream is = ( InputStream ) getSource();
+                    final Reader reader = new InputStreamReader( is );
+                    xml_reader.parse( new InputSource( reader ) );
+                }
+                else {
+                    final ZipInputStream zip_is = new ZipInputStream( ( InputStream ) getSource() );
+                    zip_is.getNextEntry();
+                    final Reader reader = new InputStreamReader( zip_is );
+                    if ( reader == null ) {
+                        throw new PhylogenyParserException( "zip input stream \"" + getSource()
+                                + "\" appears not to contain any (phyloXML) data" );
+                    }
+                    xml_reader.parse( new InputSource( reader ) );
+                }
+            }
+            else if ( getSource() instanceof String ) {
+                final File file = new File( getSource().toString() );
+                final Reader reader = new FileReader( file );
+                xml_reader.parse( new InputSource( reader ) );
+            }
+            else if ( getSource() instanceof StringBuffer ) {
+                final StringReader string_reader = new StringReader( getSource().toString() );
+                xml_reader.parse( new InputSource( string_reader ) );
+            }
+            else {
+                throw new PhylogenyParserException( "phyloXML parser: attempt to parse object of unsupported type: \""
+                        + getSource().getClass() + "\"" );
+            }
+            if ( TIME ) {
+                System.out.println( "[TIME] phyloXML parsing: " + ( new Date().getTime() - start_time ) + "ms." );
+            }
+        }
+        catch ( final SAXException sax_exception ) {
+            throw new PhylogenyParserException( "failed to parse [" + getSource() + "]: "
+                    + sax_exception.getLocalizedMessage() );
+        }
+        catch ( final ParserConfigurationException parser_config_exception ) {
+            throw new PhylogenyParserException( "failed to parse [" + getSource()
+                    + "]. Problem with XML parser configuration: " + parser_config_exception.getLocalizedMessage() );
+        }
+        catch ( final IOException e ) {
+            throw new PhylogenyParserException( "problem with input source: " + e.getLocalizedMessage() );
+        }
+        catch ( final Exception e ) {
+            throw new PhylogenyParserException( e.getLocalizedMessage() );
+        }
+        catch ( final Error err ) {
+            err.printStackTrace();
+            throw new PhylogenyParserException( "severe error: " + err.getLocalizedMessage() );
+        }
+        final Phylogeny[] ps = new Phylogeny[ handler.getPhylogenies().size() ];
+        int i = 0;
+        for( final Phylogeny phylogeny : handler.getPhylogenies() ) {
+            ps[ i++ ] = phylogeny;
+        }
+        return ps;
+    }
+
+    private void reset() {
+        _valid = true;
+        _error_count = 0;
+        _warning_count = 0;
+        _error_messages = new StringBuffer();
+        _warning_messages = new StringBuffer();
+    }
+
+    public void setSource( final Object source ) {
+        _source = source;
+    }
+
+    public void setValidateAgainstSchema( final String schema_location ) {
+        _schema_location = schema_location;
+    }
+
+    public void setZippedInputstream( final boolean zipped_inputstream ) {
+        _zipped_inputstream = zipped_inputstream;
+    }
+
+    public static PhyloXmlParser createPhyloXmlParserXsdValidating() {
+        final PhyloXmlParser xml_parser = new PhyloXmlParser();
+        final ClassLoader cl = PhyloXmlParser.class.getClassLoader();
+        final URL xsd_url = cl.getResource( ForesterConstants.LOCAL_PHYLOXML_XSD_RESOURCE );
+        if ( xsd_url != null ) {
+            xml_parser.setValidateAgainstSchema( xsd_url.toString() );
+        }
+        else {
+            throw new RuntimeException( "failed to get URL for phyloXML XSD from jar file from ["
+                    + ForesterConstants.LOCAL_PHYLOXML_XSD_RESOURCE + "]" );
+        }
+        return xml_parser;
+    }
+
+    private class PhyloXmlParserErrorHandler extends DefaultHandler {
+
+        @Override
+        public void error( final SAXParseException e ) {
+            ++_error_count;
+            _valid = false;
+            throw new PhyloXmlException( "phyloXML error at line " + e.getLineNumber() + ": \n"
+                    + e.getLocalizedMessage() );
+        }
+
+        @Override
+        public void fatalError( final SAXParseException e ) {
+            ++_error_count;
+            _valid = false;
+            throw new PhyloXmlException( "fatal XML error at line " + e.getLineNumber() + ": \n"
+                    + e.getLocalizedMessage() );
+        }
+
+        @Override
+        public void warning( final SAXParseException e ) {
+            ++_warning_count;
+            if ( _error_messages.length() > 1 ) {
+                _error_messages.append( ForesterUtil.LINE_SEPARATOR );
+            }
+            _warning_messages.append( "[line: " + e.getLineNumber() + "] " + e.getMessage() );
+        }
+    }
+}
diff --git a/forester/java/src/org/forester/io/parsers/phyloxml/PhyloXmlUtil.java b/forester/java/src/org/forester/io/parsers/phyloxml/PhyloXmlUtil.java

new file mode 100644 (file)

index 0000000..277dfa1
--- /dev/null
+++ b/forester/java/src/org/forester/io/parsers/phyloxml/PhyloXmlUtil.java
@@ -0,0 +1,98 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.io.parsers.phyloxml;
+
+import java.util.HashSet;
+import java.util.Set;
+import java.util.regex.Pattern;
+
+public final class PhyloXmlUtil {
+
+    public final static Pattern     SEQUENCE_SYMBOL_PATTERN                    = Pattern.compile( "\\S{1,20}" );
+    public final static Pattern     TAXOMONY_CODE_PATTERN                      = Pattern.compile( "[a-zA-Z0-9_]{1,10}" );
+    public final static Pattern     LIT_REF_DOI_PATTERN                        = Pattern
+                                                                                       .compile( "[a-zA-Z0-9_\\.]+\\S+" );
+    public final static Set<String> SEQUENCE_TYPES                             = new HashSet<String>();
+    public final static Set<String> TAXONOMY_RANKS                             = new HashSet<String>();
+    public static final int         ROUNDING_DIGITS_FOR_PHYLOXML_DOUBLE_OUTPUT = 9;
+    public static final String      VECTOR_PROPERTY_REF                        = "vector:index=";
+    public static final String      VECTOR_PROPERTY_TYPE                       = "xsd:decimal";
+    static {
+        SEQUENCE_TYPES.add( "rna" );
+        SEQUENCE_TYPES.add( "protein" );
+        SEQUENCE_TYPES.add( "dna" );
+        TAXONOMY_RANKS.add( "domain" );
+        TAXONOMY_RANKS.add( "superkingdom" );
+        TAXONOMY_RANKS.add( "kingdom" );
+        TAXONOMY_RANKS.add( "subkingdom" );
+        TAXONOMY_RANKS.add( "branch" );
+        TAXONOMY_RANKS.add( "infrakingdom" );
+        TAXONOMY_RANKS.add( "superphylum" );
+        TAXONOMY_RANKS.add( "phylum" );
+        TAXONOMY_RANKS.add( "subphylum" );
+        TAXONOMY_RANKS.add( "infraphylum" );
+        TAXONOMY_RANKS.add( "microphylum" );
+        TAXONOMY_RANKS.add( "superdivision" );
+        TAXONOMY_RANKS.add( "division" );
+        TAXONOMY_RANKS.add( "subdivision" );
+        TAXONOMY_RANKS.add( "infradivision" );
+        TAXONOMY_RANKS.add( "superclass" );
+        TAXONOMY_RANKS.add( "class" );
+        TAXONOMY_RANKS.add( "subclass" );
+        TAXONOMY_RANKS.add( "infraclass" );
+        TAXONOMY_RANKS.add( "superlegion" );
+        TAXONOMY_RANKS.add( "legion" );
+        TAXONOMY_RANKS.add( "sublegion" );
+        TAXONOMY_RANKS.add( "infralegion" );
+        TAXONOMY_RANKS.add( "supercohort" );
+        TAXONOMY_RANKS.add( "cohort" );
+        TAXONOMY_RANKS.add( "subcohort" );
+        TAXONOMY_RANKS.add( "infracohort" );
+        TAXONOMY_RANKS.add( "superorder" );
+        TAXONOMY_RANKS.add( "order" );
+        TAXONOMY_RANKS.add( "suborder" );
+        TAXONOMY_RANKS.add( "superfamily" );
+        TAXONOMY_RANKS.add( "family" );
+        TAXONOMY_RANKS.add( "subfamily" );
+        TAXONOMY_RANKS.add( "supertribe" );
+        TAXONOMY_RANKS.add( "tribe" );
+        TAXONOMY_RANKS.add( "subtribe" );
+        TAXONOMY_RANKS.add( "infratribe" );
+        TAXONOMY_RANKS.add( "genus" );
+        TAXONOMY_RANKS.add( "subgenus" );
+        TAXONOMY_RANKS.add( "superspecies" );
+        TAXONOMY_RANKS.add( "species" );
+        TAXONOMY_RANKS.add( "subspecies" );
+        TAXONOMY_RANKS.add( "variety" );
+        TAXONOMY_RANKS.add( "subvariety" );
+        TAXONOMY_RANKS.add( "form" );
+        TAXONOMY_RANKS.add( "subform" );
+        TAXONOMY_RANKS.add( "cultivar" );
+        TAXONOMY_RANKS.add( "strain" );
+        TAXONOMY_RANKS.add( "unknown" );
+        TAXONOMY_RANKS.add( "other" );
+    };
+}
diff --git a/forester/java/src/org/forester/io/parsers/phyloxml/XmlElement.java b/forester/java/src/org/forester/io/parsers/phyloxml/XmlElement.java

new file mode 100644 (file)

index 0000000..442c937
--- /dev/null
+++ b/forester/java/src/org/forester/io/parsers/phyloxml/XmlElement.java
@@ -0,0 +1,213 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.io.parsers.phyloxml;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+
+import org.forester.io.parsers.util.PhylogenyParserException;
+import org.forester.util.ForesterUtil;
+import org.xml.sax.Attributes;
+
+public class XmlElement {
+
+    public final static boolean           DEBUG = false;
+    private final String                  _namespaceUri;
+    private final String                  _localName;
+    private final String                  _qualifiedName;
+    private String                        _value;
+    private final HashMap<String, String> _attributes;
+    private final ArrayList<XmlElement>   _childElements;
+    private XmlElement                    _parent;
+
+    public XmlElement( final String namespaceUri,
+                       final String localName,
+                       final String qualifiedName,
+                       final Attributes attributes ) {
+        _namespaceUri = namespaceUri;
+        _localName = localName;
+        _qualifiedName = qualifiedName;
+        if ( attributes != null ) {
+            _attributes = new HashMap<String, String>( attributes.getLength() );
+            for( int i = 0; i < attributes.getLength(); ++i ) {
+                getAttributes().put( new String( attributes.getQName( i ) ), new String( attributes.getValue( i ) ) );
+            }
+        }
+        else {
+            _attributes = new HashMap<String, String>();
+        }
+        _childElements = new ArrayList<XmlElement>();
+        _parent = null;
+    }
+
+    public void addChildElement( final XmlElement element ) {
+        element.setParent( this );
+        getChildElements().add( element );
+    }
+
+    public void appendValue( final String value ) {
+        _value = _value + value;
+    }
+
+    public String getAttribute( final String attribute_name ) {
+        if ( !isHasAttribute( attribute_name ) ) {
+            throw new IllegalArgumentException( "no attribute named [" + attribute_name + "] present in element ["
+                    + getQualifiedName() + "]" );
+        }
+        return getAttributes().get( attribute_name );
+    }
+
+    public HashMap<String, String> getAttributes() {
+        return _attributes;
+    }
+
+    public XmlElement getChildElement( final int i ) {
+        if ( ( i < 0 ) || ( i >= getNumberOfChildElements() ) ) {
+            throw new IllegalArgumentException( "attempt to get child element with index " + i + " for element with "
+                    + getNumberOfChildElements() + " child elements" );
+        }
+        return getChildElements().get( i );
+    }
+
+    ArrayList<XmlElement> getChildElements() {
+        return _childElements;
+    }
+
+    String getLocalName() {
+        return _localName;
+    }
+
+    String getNamespaceUri() {
+        return _namespaceUri;
+    }
+
+    public int getNumberOfChildElements() {
+        return getChildElements().size();
+    }
+
+    public XmlElement getParent() {
+        return _parent;
+    }
+
+    public String getQualifiedName() {
+        return _qualifiedName;
+    }
+
+    XmlElement getRoot() {
+        XmlElement e = this;
+        while ( e.getParent() != null ) {
+            e = e.getParent();
+        }
+        return e;
+    }
+
+    public boolean getValueAsBoolean() throws PhylogenyParserException {
+        boolean b = false;
+        try {
+            b = ( new Boolean( getValueAsString() ) ).booleanValue();
+        }
+        catch ( final NumberFormatException ex ) {
+            throw new PhylogenyParserException( "attempt to parse [" + getValueAsString() + "] into boolean, in "
+                    + toString() );
+        }
+        return b;
+    }
+
+    public double getValueAsDouble() throws PhylogenyParserException {
+        double d = 0.0;
+        try {
+            d = Double.parseDouble( getValueAsString() );
+        }
+        catch ( final NumberFormatException ex ) {
+            throw new PhylogenyParserException( "attempt to parse [" + getValueAsString() + "] into double, in "
+                    + toString() );
+        }
+        return d;
+    }
+
+    public int getValueAsInt() throws PhylogenyParserException {
+        int i = 0;
+        try {
+            i = Integer.parseInt( getValueAsString() );
+        }
+        catch ( final NumberFormatException ex ) {
+            throw new PhylogenyParserException( "attempt to parse [" + getValueAsString() + "] into integer, in "
+                    + toString() );
+        }
+        return i;
+    }
+
+    public String getValueAsString() {
+        if ( _value == null ) {
+            return "";
+        }
+        return _value.replaceAll( "\\s+", " " ).trim();
+    }
+
+    public boolean isHasAttribute( final String attribute_name ) {
+        return getAttributes().containsKey( attribute_name );
+    }
+
+    public boolean isHasValue() {
+        return !ForesterUtil.isEmpty( _value );
+    }
+
+    void setParent( final XmlElement parent ) {
+        _parent = parent;
+    }
+
+    /**
+     * [Careful, this does not call "new String(...)"]
+     * 
+     * @param value
+     */
+    public void setValue( final String value ) {
+        _value = value;
+        if ( XmlElement.DEBUG ) {
+            System.out.println();
+            System.out.println( "Value is \"" + value + "\" for" );
+            System.out.println( "Local name     = " + getLocalName() );
+            System.out.println( "Qualified name = " + getQualifiedName() );
+            System.out.println( "Namespace URI  = " + getNamespaceUri() );
+            System.out.print( "Attributes     : " );
+            for( final String string : getAttributes().keySet() ) {
+                final String key = string;
+                System.out.print( key + " = \"" + getAttributes().get( key ) + "\"  " );
+            }
+            System.out.println();
+            System.out.println();
+        }
+    }
+
+    @Override
+    public String toString() {
+        if ( getParent() != null ) {
+            return "\"" + getQualifiedName() + "\" [value: " + getValueAsString() + ", parent element: \""
+                    + getParent().getQualifiedName() + "\"]";
+        }
+        return "\"" + getQualifiedName() + "\" [value: " + getValueAsString() + "]";
+    }
+}
diff --git a/forester/java/src/org/forester/io/parsers/phyloxml/data/AccessionParser.java b/forester/java/src/org/forester/io/parsers/phyloxml/data/AccessionParser.java

new file mode 100644 (file)

index 0000000..01d3c54
--- /dev/null
+++ b/forester/java/src/org/forester/io/parsers/phyloxml/data/AccessionParser.java
@@ -0,0 +1,63 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/
+
+package org.forester.io.parsers.phyloxml.data;
+
+import org.forester.io.parsers.phyloxml.PhyloXmlMapping;
+import org.forester.io.parsers.phyloxml.XmlElement;
+import org.forester.io.parsers.util.PhylogenyParserException;
+import org.forester.phylogeny.data.Accession;
+import org.forester.phylogeny.data.PhylogenyData;
+
+public class AccessionParser implements PhylogenyDataPhyloXmlParser {
+
+    private final static PhylogenyDataPhyloXmlParser _instance;
+    static {
+        try {
+            _instance = new AccessionParser();
+        }
+        catch ( final Throwable e ) {
+            throw new RuntimeException( e.getMessage() );
+        }
+    }
+
+    private AccessionParser() {
+    }
+
+    @Override
+    public PhylogenyData parse( final XmlElement element ) throws PhylogenyParserException {
+        if ( element.isHasAttribute( PhyloXmlMapping.ACCESSION_SOURCE_ATTR ) ) {
+            return new Accession( element.getValueAsString(), element
+                    .getAttribute( PhyloXmlMapping.ACCESSION_SOURCE_ATTR ) );
+        }
+        else {
+            return new Accession( element.getValueAsString(), "?" );
+        }
+    }
+
+    public static PhylogenyDataPhyloXmlParser getInstance() {
+        return _instance;
+    }
+}
diff --git a/forester/java/src/org/forester/io/parsers/phyloxml/data/AnnotationParser.java b/forester/java/src/org/forester/io/parsers/phyloxml/data/AnnotationParser.java

new file mode 100644 (file)

index 0000000..8f25921
--- /dev/null
+++ b/forester/java/src/org/forester/io/parsers/phyloxml/data/AnnotationParser.java
@@ -0,0 +1,97 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.io.parsers.phyloxml.data;
+
+import org.forester.io.parsers.phyloxml.PhyloXmlMapping;
+import org.forester.io.parsers.phyloxml.XmlElement;
+import org.forester.io.parsers.util.PhylogenyParserException;
+import org.forester.phylogeny.data.Annotation;
+import org.forester.phylogeny.data.Confidence;
+import org.forester.phylogeny.data.PhylogenyData;
+import org.forester.phylogeny.data.PropertiesMap;
+import org.forester.phylogeny.data.Property;
+import org.forester.phylogeny.data.Uri;
+
+public class AnnotationParser implements PhylogenyDataPhyloXmlParser {
+
+    private static final PhylogenyDataPhyloXmlParser _instance;
+    static {
+        try {
+            _instance = new AnnotationParser();
+        }
+        catch ( final Throwable e ) {
+            throw new RuntimeException( e.getMessage() );
+        }
+    }
+
+    private AnnotationParser() {
+    }
+
+    @Override
+    public PhylogenyData parse( final XmlElement element ) throws PhylogenyParserException {
+        String ref;
+        if ( element.isHasAttribute( PhyloXmlMapping.ANNOTATION_REF_ATTR ) ) {
+            ref = element.getAttribute( PhyloXmlMapping.ANNOTATION_REF_ATTR );
+        }
+        else {
+            ref = "_:_";
+        }
+        final Annotation annotation = new Annotation( ref );
+        if ( element.isHasAttribute( PhyloXmlMapping.ANNOTATION_TYPE_ATTR ) ) {
+            annotation.setType( element.getAttribute( PhyloXmlMapping.ANNOTATION_TYPE_ATTR ) );
+        }
+        if ( element.isHasAttribute( PhyloXmlMapping.ANNOTATION_EVIDENCE_ATTR ) ) {
+            annotation.setEvidence( element.getAttribute( PhyloXmlMapping.ANNOTATION_EVIDENCE_ATTR ) );
+        }
+        if ( element.isHasAttribute( PhyloXmlMapping.ANNOTATION_SOURCE_ATTR ) ) {
+            annotation.setSource( element.getAttribute( PhyloXmlMapping.ANNOTATION_SOURCE_ATTR ) );
+        }
+        for( int i = 0; i < element.getNumberOfChildElements(); ++i ) {
+            final XmlElement child_element = element.getChildElement( i );
+            if ( child_element.getQualifiedName().equals( PhyloXmlMapping.ANNOTATION_DESC ) ) {
+                annotation.setDesc( child_element.getValueAsString() );
+            }
+            else if ( child_element.getQualifiedName().equals( PhyloXmlMapping.CONFIDENCE ) ) {
+                annotation.setConfidence( ( Confidence ) ConfidenceParser.getInstance().parse( child_element ) );
+            }
+            else if ( child_element.getQualifiedName().equals( PhyloXmlMapping.URI ) ) {
+                annotation.addUri( ( Uri ) UriParser.getInstance().parse( child_element ) );
+            }
+            else if ( child_element.getQualifiedName().equals( PhyloXmlMapping.PROPERTY ) ) {
+                if ( annotation.getProperties() == null ) {
+                    annotation.setProperties( new PropertiesMap() );
+                }
+                annotation.getProperties()
+                        .addProperty( ( Property ) PropertyParser.getInstance().parse( child_element ) );
+            }
+        }
+        return annotation;
+    }
+
+    public static PhylogenyDataPhyloXmlParser getInstance() {
+        return _instance;
+    }
+}
diff --git a/forester/java/src/org/forester/io/parsers/phyloxml/data/BinaryCharactersParser.java b/forester/java/src/org/forester/io/parsers/phyloxml/data/BinaryCharactersParser.java

new file mode 100644 (file)

index 0000000..11469c4
--- /dev/null
+++ b/forester/java/src/org/forester/io/parsers/phyloxml/data/BinaryCharactersParser.java
@@ -0,0 +1,115 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.io.parsers.phyloxml.data;
+
+import java.util.SortedSet;
+import java.util.TreeSet;
+
+import org.forester.io.parsers.phyloxml.PhyloXmlMapping;
+import org.forester.io.parsers.phyloxml.XmlElement;
+import org.forester.io.parsers.util.PhylogenyParserException;
+import org.forester.phylogeny.data.BinaryCharacters;
+import org.forester.phylogeny.data.PhylogenyData;
+
+public class BinaryCharactersParser implements PhylogenyDataPhyloXmlParser {
+
+    private static final BinaryCharactersParser _instance;
+    static {
+        try {
+            _instance = new BinaryCharactersParser();
+        }
+        catch ( final Throwable e ) {
+            throw new RuntimeException( e.getMessage() );
+        }
+    }
+
+    private BinaryCharactersParser() {
+    }
+
+    @Override
+    public PhylogenyData parse( final XmlElement element ) throws PhylogenyParserException {
+        final SortedSet<String> present = new TreeSet<String>();
+        final SortedSet<String> gained = new TreeSet<String>();
+        final SortedSet<String> lost = new TreeSet<String>();
+        String type = "";
+        int present_count = BinaryCharacters.COUNT_DEFAULT;
+        int gained_count = BinaryCharacters.COUNT_DEFAULT;
+        int lost_count = BinaryCharacters.COUNT_DEFAULT;
+        if ( element.isHasAttribute( PhyloXmlMapping.BINARY_CHARACTERS_TYPE_ATTR ) ) {
+            type = element.getAttribute( PhyloXmlMapping.BINARY_CHARACTERS_TYPE_ATTR );
+        }
+        try {
+            if ( element.isHasAttribute( PhyloXmlMapping.BINARY_CHARACTERS_PRESENT_COUNT_ATTR ) ) {
+                present_count = Integer.parseInt( element
+                        .getAttribute( PhyloXmlMapping.BINARY_CHARACTERS_PRESENT_COUNT_ATTR ) );
+            }
+            if ( element.isHasAttribute( PhyloXmlMapping.BINARY_CHARACTERS_GAINED_COUNT_ATTR ) ) {
+                gained_count = Integer.parseInt( element
+                        .getAttribute( PhyloXmlMapping.BINARY_CHARACTERS_GAINED_COUNT_ATTR ) );
+            }
+            if ( element.isHasAttribute( PhyloXmlMapping.BINARY_CHARACTERS_LOST_COUNT_ATTR ) ) {
+                lost_count = Integer
+                        .parseInt( element.getAttribute( PhyloXmlMapping.BINARY_CHARACTERS_LOST_COUNT_ATTR ) );
+            }
+        }
+        catch ( final NumberFormatException e ) {
+            throw new PhylogenyParserException( "failed to parse integer from element " + element.getQualifiedName() );
+        }
+        for( int i = 0; i < element.getNumberOfChildElements(); ++i ) {
+            final XmlElement child_element = element.getChildElement( i );
+            if ( child_element.getQualifiedName().equals( PhyloXmlMapping.BINARY_CHARACTERS_PRESENT ) ) {
+                parseCharacters( present, child_element );
+            }
+            else if ( child_element.getQualifiedName().equals( PhyloXmlMapping.BINARY_CHARACTERS_GAINED ) ) {
+                parseCharacters( gained, child_element );
+            }
+            else if ( child_element.getQualifiedName().equals( PhyloXmlMapping.BINARY_CHARACTERS_LOST ) ) {
+                parseCharacters( lost, child_element );
+            }
+        }
+        BinaryCharacters bc = null;
+        if ( present_count != BinaryCharacters.COUNT_DEFAULT ) {
+            bc = new BinaryCharacters( present, gained, lost, type, present_count, gained_count, lost_count );
+        }
+        else {
+            bc = new BinaryCharacters( present, gained, lost, type );
+        }
+        return bc;
+    }
+
+    private void parseCharacters( final SortedSet<String> present, final XmlElement child_element ) {
+        for( int j = 0; j < child_element.getNumberOfChildElements(); ++j ) {
+            final XmlElement child_child_element = child_element.getChildElement( j );
+            if ( child_child_element.getQualifiedName().equals( PhyloXmlMapping.BINARY_CHARACTER ) ) {
+                present.add( child_child_element.getValueAsString() );
+            }
+        }
+    }
+
+    public static PhylogenyDataPhyloXmlParser getInstance() {
+        return _instance;
+    }
+}
diff --git a/forester/java/src/org/forester/io/parsers/phyloxml/data/BranchWidthParser.java b/forester/java/src/org/forester/io/parsers/phyloxml/data/BranchWidthParser.java

new file mode 100644 (file)

index 0000000..b65513b
--- /dev/null
+++ b/forester/java/src/org/forester/io/parsers/phyloxml/data/BranchWidthParser.java
@@ -0,0 +1,56 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.io.parsers.phyloxml.data;
+
+import org.forester.io.parsers.phyloxml.XmlElement;
+import org.forester.io.parsers.util.PhylogenyParserException;
+import org.forester.phylogeny.data.BranchWidth;
+import org.forester.phylogeny.data.PhylogenyData;
+
+public class BranchWidthParser implements PhylogenyDataPhyloXmlParser {
+
+    private static final BranchWidthParser _instance;
+    static {
+        try {
+            _instance = new BranchWidthParser();
+        }
+        catch ( final Throwable e ) {
+            throw new RuntimeException( e.getMessage() );
+        }
+    }
+
+    private BranchWidthParser() {
+    }
+
+    @Override
+    public PhylogenyData parse( final XmlElement element ) throws PhylogenyParserException {
+        return new BranchWidth( element.getValueAsDouble() );
+    }
+
+    public static PhylogenyDataPhyloXmlParser getInstance() {
+        return _instance;
+    }
+}
diff --git a/forester/java/src/org/forester/io/parsers/phyloxml/data/ColorParser.java b/forester/java/src/org/forester/io/parsers/phyloxml/data/ColorParser.java

new file mode 100644 (file)

index 0000000..02255ef
--- /dev/null
+++ b/forester/java/src/org/forester/io/parsers/phyloxml/data/ColorParser.java
@@ -0,0 +1,76 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.io.parsers.phyloxml.data;
+
+import java.awt.Color;
+
+import org.forester.io.parsers.phyloxml.PhyloXmlMapping;
+import org.forester.io.parsers.phyloxml.XmlElement;
+import org.forester.io.parsers.util.PhylogenyParserException;
+import org.forester.phylogeny.data.BranchColor;
+import org.forester.phylogeny.data.PhylogenyData;
+
+public class ColorParser implements PhylogenyDataPhyloXmlParser {
+
+    private static final PhylogenyDataPhyloXmlParser _instance;
+    static {
+        try {
+            _instance = new ColorParser();
+        }
+        catch ( final Throwable e ) {
+            throw new RuntimeException( e.getMessage() );
+        }
+    }
+
+    private ColorParser() {
+    }
+
+    @Override
+    public PhylogenyData parse( final XmlElement element ) throws PhylogenyParserException {
+        int red = 0;
+        int green = 0;
+        int blue = 0;
+        for( int j = 0; j < element.getNumberOfChildElements(); ++j ) {
+            final XmlElement c = element.getChildElement( j );
+            if ( c.getQualifiedName().equals( PhyloXmlMapping.COLOR_RED ) ) {
+                red = c.getValueAsInt();
+            }
+            else if ( c.getQualifiedName().equals( PhyloXmlMapping.COLOR_GREEN ) ) {
+                green = c.getValueAsInt();
+            }
+            else if ( c.getQualifiedName().equals( PhyloXmlMapping.COLOR_BLUE ) ) {
+                blue = c.getValueAsInt();
+            }
+        }
+        final BranchColor color = new BranchColor();
+        color.setValue( new Color( red, green, blue ) );
+        return color;
+    }
+
+    public static PhylogenyDataPhyloXmlParser getInstance() {
+        return _instance;
+    }
+}
diff --git a/forester/java/src/org/forester/io/parsers/phyloxml/data/ConfidenceParser.java b/forester/java/src/org/forester/io/parsers/phyloxml/data/ConfidenceParser.java

new file mode 100644 (file)

index 0000000..92ce6c7
--- /dev/null
+++ b/forester/java/src/org/forester/io/parsers/phyloxml/data/ConfidenceParser.java
@@ -0,0 +1,62 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.io.parsers.phyloxml.data;
+
+import org.forester.io.parsers.phyloxml.PhyloXmlMapping;
+import org.forester.io.parsers.phyloxml.XmlElement;
+import org.forester.io.parsers.util.PhylogenyParserException;
+import org.forester.phylogeny.data.Confidence;
+import org.forester.phylogeny.data.PhylogenyData;
+
+public class ConfidenceParser implements PhylogenyDataPhyloXmlParser {
+
+    private static final PhylogenyDataPhyloXmlParser _instance;
+    static {
+        try {
+            _instance = new ConfidenceParser();
+        }
+        catch ( final Throwable e ) {
+            throw new RuntimeException( e.getMessage() );
+        }
+    }
+
+    private ConfidenceParser() {
+    }
+
+    @Override
+    public PhylogenyData parse( final XmlElement element ) throws PhylogenyParserException {
+        final Confidence confidence = new Confidence();
+        confidence.setValue( element.getValueAsDouble() );
+        if ( element.isHasAttribute( PhyloXmlMapping.CONFIDENCE_TYPE_ATTR ) ) {
+            confidence.setType( element.getAttribute( PhyloXmlMapping.CONFIDENCE_TYPE_ATTR ) );
+        }
+        return confidence;
+    }
+
+    public static PhylogenyDataPhyloXmlParser getInstance() {
+        return _instance;
+    }
+}
diff --git a/forester/java/src/org/forester/io/parsers/phyloxml/data/DateParser.java b/forester/java/src/org/forester/io/parsers/phyloxml/data/DateParser.java

new file mode 100644 (file)

index 0000000..3f5add7
--- /dev/null
+++ b/forester/java/src/org/forester/io/parsers/phyloxml/data/DateParser.java
@@ -0,0 +1,95 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.io.parsers.phyloxml.data;
+
+import java.math.BigDecimal;
+
+import org.forester.io.parsers.phyloxml.PhyloXmlMapping;
+import org.forester.io.parsers.phyloxml.XmlElement;
+import org.forester.io.parsers.util.PhylogenyParserException;
+import org.forester.phylogeny.data.Date;
+import org.forester.phylogeny.data.PhylogenyData;
+import org.forester.util.ForesterUtil;
+
+public class DateParser implements PhylogenyDataPhyloXmlParser {
+
+    private static final PhylogenyDataPhyloXmlParser _instance;
+    static {
+        try {
+            _instance = new DateParser();
+        }
+        catch ( final Throwable e ) {
+            throw new RuntimeException( e.getMessage() );
+        }
+    }
+
+    private DateParser() {
+    }
+
+    @Override
+    public PhylogenyData parse( final XmlElement element ) throws PhylogenyParserException {
+        String unit = "";
+        if ( element.isHasAttribute( PhyloXmlMapping.CLADE_DATE_UNIT ) ) {
+            unit = element.getAttribute( PhyloXmlMapping.CLADE_DATE_UNIT );
+        }
+        String val = null;
+        String min = null;
+        String max = null;
+        String desc = "";
+        for( int j = 0; j < element.getNumberOfChildElements(); ++j ) {
+            final XmlElement e = element.getChildElement( j );
+            if ( e.getQualifiedName().equals( PhyloXmlMapping.CLADE_DATE_VALUE ) ) {
+                val = e.getValueAsString();
+            }
+            else if ( e.getQualifiedName().equals( PhyloXmlMapping.CLADE_DATE_MIN ) ) {
+                min = e.getValueAsString();
+            }
+            else if ( e.getQualifiedName().equals( PhyloXmlMapping.CLADE_DATE_MAX ) ) {
+                max = e.getValueAsString();
+            }
+            else if ( e.getQualifiedName().equals( PhyloXmlMapping.CLADE_DATE_DESC ) ) {
+                desc = e.getValueAsString();
+            }
+        }
+        BigDecimal val_bd = null;
+        BigDecimal min_bd = null;
+        BigDecimal max_bd = null;
+        if ( !ForesterUtil.isEmpty( val ) ) {
+            val_bd = new BigDecimal( val );
+        }
+        if ( !ForesterUtil.isEmpty( min ) ) {
+            min_bd = new BigDecimal( min );
+        }
+        if ( !ForesterUtil.isEmpty( max ) ) {
+            max_bd = new BigDecimal( max );
+        }
+        return new Date( desc, val_bd, min_bd, max_bd, unit );
+    }
+
+    public static PhylogenyDataPhyloXmlParser getInstance() {
+        return _instance;
+    }
+}
diff --git a/forester/java/src/org/forester/io/parsers/phyloxml/data/DistributionParser.java b/forester/java/src/org/forester/io/parsers/phyloxml/data/DistributionParser.java

new file mode 100644 (file)

index 0000000..c211106
--- /dev/null
+++ b/forester/java/src/org/forester/io/parsers/phyloxml/data/DistributionParser.java
@@ -0,0 +1,83 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.io.parsers.phyloxml.data;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.forester.io.parsers.phyloxml.PhyloXmlMapping;
+import org.forester.io.parsers.phyloxml.XmlElement;
+import org.forester.io.parsers.util.PhylogenyParserException;
+import org.forester.phylogeny.data.Distribution;
+import org.forester.phylogeny.data.PhylogenyData;
+import org.forester.phylogeny.data.Point;
+import org.forester.phylogeny.data.Polygon;
+
+public class DistributionParser implements PhylogenyDataPhyloXmlParser {
+
+    private static final PhylogenyDataPhyloXmlParser _instance;
+    static {
+        try {
+            _instance = new DistributionParser();
+        }
+        catch ( final Throwable e ) {
+            throw new RuntimeException( e.getMessage() );
+        }
+    }
+
+    private DistributionParser() {
+    }
+
+    @Override
+    public PhylogenyData parse( final XmlElement element ) throws PhylogenyParserException {
+        String desc = "";
+        List<Point> points = null;
+        List<Polygon> polygons = null;
+        for( int i = 0; i < element.getNumberOfChildElements(); ++i ) {
+            final XmlElement child_element = element.getChildElement( i );
+            if ( child_element.getQualifiedName().equals( PhyloXmlMapping.DISTRIBUTION_DESC ) ) {
+                desc = child_element.getValueAsString();
+            }
+            else if ( child_element.getQualifiedName().equals( PhyloXmlMapping.POINT ) ) {
+                if ( points == null ) {
+                    points = new ArrayList<Point>();
+                }
+                points.add( ( Point ) PointParser.getInstance().parse( child_element ) );
+            }
+            else if ( child_element.getQualifiedName().equals( PhyloXmlMapping.POLYGON ) ) {
+                if ( polygons == null ) {
+                    polygons = new ArrayList<Polygon>();
+                }
+                polygons.add( ( Polygon ) PolygonParser.getInstance().parse( child_element ) );
+            }
+        }
+        return new Distribution( desc, points, polygons );
+    }
+
+    public static PhylogenyDataPhyloXmlParser getInstance() {
+        return _instance;
+    }
+}
diff --git a/forester/java/src/org/forester/io/parsers/phyloxml/data/DomainArchitectureParser.java b/forester/java/src/org/forester/io/parsers/phyloxml/data/DomainArchitectureParser.java

new file mode 100644 (file)

index 0000000..34bba14
--- /dev/null
+++ b/forester/java/src/org/forester/io/parsers/phyloxml/data/DomainArchitectureParser.java
@@ -0,0 +1,76 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+//
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.io.parsers.phyloxml.data;
+
+import org.forester.io.parsers.phyloxml.PhyloXmlMapping;
+import org.forester.io.parsers.phyloxml.XmlElement;
+import org.forester.io.parsers.util.PhylogenyParserException;
+import org.forester.phylogeny.data.DomainArchitecture;
+import org.forester.phylogeny.data.ProteinDomain;
+
+public class DomainArchitectureParser implements PhylogenyDataPhyloXmlParser {
+
+    private static final PhylogenyDataPhyloXmlParser _instance;
+    static {
+        try {
+            _instance = new DomainArchitectureParser();
+        }
+        catch ( final Throwable e ) {
+            throw new RuntimeException( e.getMessage() );
+        }
+    }
+
+    private DomainArchitectureParser() {
+    }
+
+    @Override
+    public DomainArchitecture parse( final XmlElement element ) throws PhylogenyParserException {
+        final DomainArchitecture architecure = new DomainArchitecture();
+        if ( !element.isHasAttribute( PhyloXmlMapping.SEQUENCE_DOMAIN_ARCHITECTURE_LENGTH ) ) {
+            throw new PhylogenyParserException( PhyloXmlMapping.SEQUENCE_DOMAIN_ARCHITECTURE_LENGTH
+                    + " attribute is required for domain architecture" );
+        }
+        final String lenght_str = element.getAttribute( PhyloXmlMapping.SEQUENCE_DOMAIN_ARCHITECTURE_LENGTH );
+        try {
+            architecure.setTotalLength( Integer.parseInt( lenght_str ) );
+        }
+        catch ( final NumberFormatException e ) {
+            throw new PhylogenyParserException( "could not extract domain architecture length from [" + lenght_str
+                    + "]: " + e.getMessage() );
+        }
+        for( int i = 0; i < element.getNumberOfChildElements(); ++i ) {
+            final XmlElement child_element = element.getChildElement( i );
+            if ( child_element.getQualifiedName().equals( PhyloXmlMapping.SEQUENCE_DOMAIN_ARCHITECTURE_DOMAIN ) ) {
+                architecure.addDomain( ( ProteinDomain ) ProteinDomainParser.getInstance().parse( child_element ) );
+            }
+        }
+        return architecure;
+    }
+
+    public static PhylogenyDataPhyloXmlParser getInstance() {
+        return _instance;
+    }
+}
diff --git a/forester/java/src/org/forester/io/parsers/phyloxml/data/EventParser.java b/forester/java/src/org/forester/io/parsers/phyloxml/data/EventParser.java

new file mode 100644 (file)

index 0000000..f58448e
--- /dev/null
+++ b/forester/java/src/org/forester/io/parsers/phyloxml/data/EventParser.java
@@ -0,0 +1,97 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.io.parsers.phyloxml.data;
+
+import org.forester.io.parsers.phyloxml.PhyloXmlMapping;
+import org.forester.io.parsers.phyloxml.XmlElement;
+import org.forester.io.parsers.util.PhylogenyParserException;
+import org.forester.phylogeny.data.Confidence;
+import org.forester.phylogeny.data.Event;
+import org.forester.phylogeny.data.PhylogenyData;
+import org.forester.util.ForesterUtil;
+
+public class EventParser implements PhylogenyDataPhyloXmlParser {
+
+    private static final PhylogenyDataPhyloXmlParser _instance;
+    static {
+        try {
+            _instance = new EventParser();
+        }
+        catch ( final Throwable e ) {
+            throw new RuntimeException( e.getMessage() );
+        }
+    }
+
+    private EventParser() {
+    }
+
+    @Override
+    public PhylogenyData parse( final XmlElement element ) throws PhylogenyParserException {
+        String type = "";
+        Confidence conf = null;
+        int duplications = Event.DEFAULT_VALUE;
+        int speciations = Event.DEFAULT_VALUE;
+        int losses = Event.DEFAULT_VALUE;
+        for( int i = 0; i < element.getNumberOfChildElements(); ++i ) {
+            final XmlElement child_element = element.getChildElement( i );
+            if ( child_element.getQualifiedName().equals( PhyloXmlMapping.EVENT_TYPE ) ) {
+                type = child_element.getValueAsString();
+            }
+            else if ( child_element.getQualifiedName().equals( PhyloXmlMapping.CONFIDENCE ) ) {
+                conf = ( ( Confidence ) ConfidenceParser.getInstance().parse( child_element ) );
+            }
+            else if ( child_element.getQualifiedName().equals( PhyloXmlMapping.EVENT_DUPLICATIONS ) ) {
+                duplications = child_element.getValueAsInt();
+            }
+            else if ( child_element.getQualifiedName().equals( PhyloXmlMapping.EVENT_SPECIATIONS ) ) {
+                speciations = child_element.getValueAsInt();
+            }
+            else if ( child_element.getQualifiedName().equals( PhyloXmlMapping.EVENT_LOSSES ) ) {
+                losses = child_element.getValueAsInt();
+            }
+        }
+        Event event = null;
+        if ( ForesterUtil.isEmpty( type ) ) {
+            event = new Event( duplications, speciations, losses );
+        }
+        else {
+            try {
+                event = new Event( duplications, speciations, losses, type );
+            }
+            catch ( final Exception e ) {
+                throw new PhylogenyParserException( "problem with " + element.toString() + ": " + e.getMessage() );
+            }
+        }
+        if ( conf != null ) {
+            event.setConfidence( conf );
+        }
+        return event;
+    }
+
+    public static PhylogenyDataPhyloXmlParser getInstance() {
+        return _instance;
+    }
+}
diff --git a/forester/java/src/org/forester/io/parsers/phyloxml/data/IdentifierParser.java b/forester/java/src/org/forester/io/parsers/phyloxml/data/IdentifierParser.java

new file mode 100644 (file)

index 0000000..6d68234
--- /dev/null
+++ b/forester/java/src/org/forester/io/parsers/phyloxml/data/IdentifierParser.java
@@ -0,0 +1,67 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/
+
+package org.forester.io.parsers.phyloxml.data;
+
+import org.forester.io.parsers.phyloxml.PhyloXmlMapping;
+import org.forester.io.parsers.phyloxml.XmlElement;
+import org.forester.io.parsers.util.PhylogenyParserException;
+import org.forester.phylogeny.data.Identifier;
+import org.forester.phylogeny.data.PhylogenyData;
+
+public class IdentifierParser implements PhylogenyDataPhyloXmlParser {
+
+    final private static String                      TYPE = "type"; //TODO deprecated, remove, to ensure comp. with phyloxml 1.00
+    private static final PhylogenyDataPhyloXmlParser _instance;
+    static {
+        try {
+            _instance = new IdentifierParser();
+        }
+        catch ( final Throwable e ) {
+            throw new RuntimeException( e.getMessage() );
+        }
+    }
+
+    private IdentifierParser() {
+    }
+
+    @Override
+    public PhylogenyData parse( final XmlElement element ) throws PhylogenyParserException {
+        if ( element.isHasAttribute( PhyloXmlMapping.IDENTIFIER_PROVIDER_ATTR ) ) {
+            return new Identifier( element.getValueAsString(), element
+                    .getAttribute( PhyloXmlMapping.IDENTIFIER_PROVIDER_ATTR ) );
+        }
+        else if ( element.isHasAttribute( TYPE ) ) {
+            return new Identifier( element.getValueAsString(), element.getAttribute( TYPE ) );
+        }
+        else {
+            return new Identifier( element.getValueAsString() );
+        }
+    }
+
+    public static PhylogenyDataPhyloXmlParser getInstance() {
+        return _instance;
+    }
+}
diff --git a/forester/java/src/org/forester/io/parsers/phyloxml/data/PhylogenyDataPhyloXmlParser.java b/forester/java/src/org/forester/io/parsers/phyloxml/data/PhylogenyDataPhyloXmlParser.java

new file mode 100644 (file)

index 0000000..d8db199
--- /dev/null
+++ b/forester/java/src/org/forester/io/parsers/phyloxml/data/PhylogenyDataPhyloXmlParser.java
@@ -0,0 +1,36 @@
+// $Id:
+// $
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+//
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.io.parsers.phyloxml.data;
+
+import org.forester.io.parsers.phyloxml.XmlElement;
+import org.forester.io.parsers.util.PhylogenyParserException;
+import org.forester.phylogeny.data.PhylogenyData;
+
+public interface PhylogenyDataPhyloXmlParser {
+
+    public PhylogenyData parse( final XmlElement element ) throws PhylogenyParserException;
+}
\ No newline at end of file
diff --git a/forester/java/src/org/forester/io/parsers/phyloxml/data/PointParser.java b/forester/java/src/org/forester/io/parsers/phyloxml/data/PointParser.java

new file mode 100644 (file)

index 0000000..8ac0ee5
--- /dev/null
+++ b/forester/java/src/org/forester/io/parsers/phyloxml/data/PointParser.java
@@ -0,0 +1,95 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.io.parsers.phyloxml.data;
+
+import java.math.BigDecimal;
+
+import org.forester.io.parsers.phyloxml.PhyloXmlMapping;
+import org.forester.io.parsers.phyloxml.XmlElement;
+import org.forester.io.parsers.util.PhylogenyParserException;
+import org.forester.phylogeny.data.PhylogenyData;
+import org.forester.phylogeny.data.Point;
+import org.forester.util.ForesterUtil;
+
+public class PointParser implements PhylogenyDataPhyloXmlParser {
+
+    private static final PhylogenyDataPhyloXmlParser _instance;
+    static {
+        try {
+            _instance = new PointParser();
+        }
+        catch ( final Throwable e ) {
+            throw new RuntimeException( e.getMessage() );
+        }
+    }
+
+    private PointParser() {
+    }
+
+    @Override
+    public PhylogenyData parse( final XmlElement element ) throws PhylogenyParserException {
+        String alt_unit = "";
+        String geo_datum = "";
+        if ( element.isHasAttribute( PhyloXmlMapping.POINT_ALTITUDE_UNIT_ATTR ) ) {
+            alt_unit = element.getAttribute( PhyloXmlMapping.POINT_ALTITUDE_UNIT_ATTR );
+        }
+        if ( element.isHasAttribute( PhyloXmlMapping.POINT_GEODETIC_DATUM ) ) {
+            geo_datum = element.getAttribute( PhyloXmlMapping.POINT_GEODETIC_DATUM );
+        }
+        String lat_str = null;
+        String lon_str = null;
+        String alt_str = null;
+        for( int j = 0; j < element.getNumberOfChildElements(); ++j ) {
+            final XmlElement e = element.getChildElement( j );
+            if ( e.getQualifiedName().equals( PhyloXmlMapping.POINT_LATITUDE ) ) {
+                lat_str = e.getValueAsString();
+            }
+            else if ( e.getQualifiedName().equals( PhyloXmlMapping.POINT_LONGITUDE ) ) {
+                lon_str = e.getValueAsString();
+            }
+            else if ( e.getQualifiedName().equals( PhyloXmlMapping.POINT_ALTITUDE ) ) {
+                alt_str = e.getValueAsString();
+            }
+        }
+        BigDecimal lat = null;
+        BigDecimal lon = null;
+        BigDecimal alt = null;
+        if ( !ForesterUtil.isEmpty( lat_str ) ) {
+            lat = new BigDecimal( lat_str );
+        }
+        if ( !ForesterUtil.isEmpty( lon_str ) ) {
+            lon = new BigDecimal( lon_str );
+        }
+        if ( !ForesterUtil.isEmpty( alt_str ) ) {
+            alt = new BigDecimal( alt_str );
+        }
+        return new Point( geo_datum, lat, lon, alt, alt_unit );
+    }
+
+    public static PhylogenyDataPhyloXmlParser getInstance() {
+        return _instance;
+    }
+}
diff --git a/forester/java/src/org/forester/io/parsers/phyloxml/data/PolygonParser.java b/forester/java/src/org/forester/io/parsers/phyloxml/data/PolygonParser.java

new file mode 100644 (file)

index 0000000..6d5ff38
--- /dev/null
+++ b/forester/java/src/org/forester/io/parsers/phyloxml/data/PolygonParser.java
@@ -0,0 +1,68 @@
+// $Id:
+// forester -- software libraries and applications
+// for genomics and evolutionary biology research.
+//
+// Copyright (C) 2010 Christian M Zmasek
+// Copyright (C) 2010 Sanford-Burnham Medical Research Institute
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.io.parsers.phyloxml.data;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.forester.io.parsers.phyloxml.PhyloXmlMapping;
+import org.forester.io.parsers.phyloxml.XmlElement;
+import org.forester.io.parsers.util.PhylogenyParserException;
+import org.forester.phylogeny.data.PhylogenyData;
+import org.forester.phylogeny.data.Point;
+import org.forester.phylogeny.data.Polygon;
+
+public class PolygonParser implements PhylogenyDataPhyloXmlParser {
+
+    private static final PhylogenyDataPhyloXmlParser _instance;
+    static {
+        try {
+            _instance = new PolygonParser();
+        }
+        catch ( final Throwable e ) {
+            throw new RuntimeException( e.getMessage() );
+        }
+    }
+
+    private PolygonParser() {
+    }
+
+    @Override
+    public PhylogenyData parse( final XmlElement element ) throws PhylogenyParserException {
+        final List<Point> points = new ArrayList<Point>();
+        for( int j = 0; j < element.getNumberOfChildElements(); ++j ) {
+            final XmlElement e = element.getChildElement( j );
+            if ( e.getQualifiedName().equals( PhyloXmlMapping.POINT ) ) {
+                points.add( ( Point ) PointParser.getInstance().parse( e ) );
+            }
+        }
+        return new Polygon( points );
+    }
+
+    public static PhylogenyDataPhyloXmlParser getInstance() {
+        return _instance;
+    }
+}
diff --git a/forester/java/src/org/forester/io/parsers/phyloxml/data/PropertyParser.java b/forester/java/src/org/forester/io/parsers/phyloxml/data/PropertyParser.java

new file mode 100644 (file)

index 0000000..38f4dfb
--- /dev/null
+++ b/forester/java/src/org/forester/io/parsers/phyloxml/data/PropertyParser.java
@@ -0,0 +1,99 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/
+
+package org.forester.io.parsers.phyloxml.data;
+
+import org.forester.io.parsers.phyloxml.PhyloXmlMapping;
+import org.forester.io.parsers.phyloxml.XmlElement;
+import org.forester.io.parsers.util.PhylogenyParserException;
+import org.forester.phylogeny.data.PhylogenyData;
+import org.forester.phylogeny.data.Property;
+import org.forester.phylogeny.data.Property.AppliesTo;
+import org.forester.util.ForesterUtil;
+
+public class PropertyParser implements PhylogenyDataPhyloXmlParser {
+
+    private static final PhylogenyDataPhyloXmlParser _instance;
+    static {
+        try {
+            _instance = new PropertyParser();
+        }
+        catch ( final Throwable e ) {
+            throw new RuntimeException( e.getMessage() );
+        }
+    }
+
+    private PropertyParser() {
+    }
+
+    @Override
+    public PhylogenyData parse( final XmlElement element ) throws PhylogenyParserException {
+        String ref = "";
+        String value = "";
+        String unit = "";
+        String datatype = "";
+        String applies_to_str = "";
+        String id_ref = "";
+        if ( element.isHasAttribute( PhyloXmlMapping.PROPERTY_REF ) ) {
+            ref = element.getAttribute( PhyloXmlMapping.PROPERTY_REF );
+        }
+        if ( element.isHasAttribute( PhyloXmlMapping.PROPERTY_UNIT ) ) {
+            unit = element.getAttribute( PhyloXmlMapping.PROPERTY_UNIT );
+        }
+        if ( element.isHasAttribute( PhyloXmlMapping.PROPERTY_DATATYPE ) ) {
+            datatype = element.getAttribute( PhyloXmlMapping.PROPERTY_DATATYPE );
+        }
+        if ( element.isHasAttribute( PhyloXmlMapping.PROPERTY_APPLIES_TO ) ) {
+            applies_to_str = element.getAttribute( PhyloXmlMapping.PROPERTY_APPLIES_TO );
+        }
+        if ( element.isHasAttribute( PhyloXmlMapping.ID_REF ) ) {
+            id_ref = element.getAttribute( PhyloXmlMapping.ID_REF );
+        }
+        if ( !ForesterUtil.isEmpty( element.getValueAsString() ) ) {
+            value = element.getValueAsString();
+        }
+        AppliesTo applies_to = AppliesTo.OTHER;
+        if ( applies_to_str.equals( AppliesTo.NODE.toString() ) ) {
+            applies_to = AppliesTo.NODE;
+        }
+        else if ( applies_to_str.equals( AppliesTo.PARENT_BRANCH.toString() ) ) {
+            applies_to = AppliesTo.PARENT_BRANCH;
+        }
+        else if ( applies_to_str.equals( AppliesTo.CLADE.toString() ) ) {
+            applies_to = AppliesTo.CLADE;
+        }
+        else if ( applies_to_str.equals( AppliesTo.ANNOTATION.toString() ) ) {
+            applies_to = AppliesTo.ANNOTATION;
+        }
+        else if ( applies_to_str.equals( AppliesTo.PHYLOGENY.toString() ) ) {
+            applies_to = AppliesTo.PHYLOGENY;
+        }
+        return new Property( ref, value, unit, datatype, applies_to, id_ref );
+    }
+
+    public static PhylogenyDataPhyloXmlParser getInstance() {
+        return _instance;
+    }
+}
diff --git a/forester/java/src/org/forester/io/parsers/phyloxml/data/ProteinDomainParser.java b/forester/java/src/org/forester/io/parsers/phyloxml/data/ProteinDomainParser.java

new file mode 100644 (file)

index 0000000..e32096e
--- /dev/null
+++ b/forester/java/src/org/forester/io/parsers/phyloxml/data/ProteinDomainParser.java
@@ -0,0 +1,78 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+//
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.io.parsers.phyloxml.data;
+
+import org.forester.io.parsers.phyloxml.PhyloXmlMapping;
+import org.forester.io.parsers.phyloxml.XmlElement;
+import org.forester.io.parsers.util.PhylogenyParserException;
+import org.forester.phylogeny.data.ProteinDomain;
+
+public class ProteinDomainParser implements PhylogenyDataPhyloXmlParser {
+
+    private static final PhylogenyDataPhyloXmlParser _instance;
+    static {
+        try {
+            _instance = new ProteinDomainParser();
+        }
+        catch ( final Throwable e ) {
+            throw new RuntimeException( e.getMessage() );
+        }
+    }
+
+    private ProteinDomainParser() {
+    }
+
+    @Override
+    public ProteinDomain parse( final XmlElement element ) throws PhylogenyParserException {
+        String name = "";
+        int f = -1;
+        int t = -1;
+        double conf = ProteinDomain.CONFIDENCE_DEFAULT;
+        String id = ProteinDomain.IDENTIFIER_DEFAULT;
+        try {
+            f = Integer
+                    .parseInt( element.getAttribute( PhyloXmlMapping.SEQUENCE_DOMAIN_ARCHITECTURE_PROT_DOMAIN_FROM ) );
+            t = Integer.parseInt( element.getAttribute( PhyloXmlMapping.SEQUENCE_DOMAIN_ARCHITECTURE_PROT_DOMAIN_TO ) );
+            conf = Double.parseDouble( element
+                    .getAttribute( PhyloXmlMapping.SEQUENCE_DOMAIN_ARCHITECTURE_PROT_DOMAIN_CONFIDENCE ) );
+            if ( element.isHasAttribute( PhyloXmlMapping.IDENTIFIER ) ) {
+                id = element.getAttribute( PhyloXmlMapping.IDENTIFIER );
+            }
+        }
+        catch ( final Exception e ) {
+            throw new PhylogenyParserException( "failed to parse element [" + element + "]: " + e.getMessage() );
+        }
+        name = element.getValueAsString();
+        if ( ( f == -1 ) || ( t == -1 ) || ( conf == ProteinDomain.CONFIDENCE_DEFAULT ) ) {
+            throw new PhylogenyParserException( "from, to, or confidence attribute not set in: " + element );
+        }
+        return new ProteinDomain( name, f, t, id, conf );
+    }
+
+    public static PhylogenyDataPhyloXmlParser getInstance() {
+        return _instance;
+    }
+}
diff --git a/forester/java/src/org/forester/io/parsers/phyloxml/data/ReferenceParser.java b/forester/java/src/org/forester/io/parsers/phyloxml/data/ReferenceParser.java

new file mode 100644 (file)

index 0000000..cad2b26
--- /dev/null
+++ b/forester/java/src/org/forester/io/parsers/phyloxml/data/ReferenceParser.java
@@ -0,0 +1,75 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/
+
+package org.forester.io.parsers.phyloxml.data;
+
+import org.forester.io.parsers.phyloxml.PhyloXmlMapping;
+import org.forester.io.parsers.phyloxml.XmlElement;
+import org.forester.io.parsers.util.PhylogenyParserException;
+import org.forester.phylogeny.data.PhylogenyData;
+import org.forester.phylogeny.data.Reference;
+import org.forester.util.ForesterUtil;
+
+public class ReferenceParser implements PhylogenyDataPhyloXmlParser {
+
+    private static final PhylogenyDataPhyloXmlParser _instance;
+    static {
+        try {
+            _instance = new ReferenceParser();
+        }
+        catch ( final Throwable e ) {
+            throw new RuntimeException( e.getMessage() );
+        }
+    }
+
+    private ReferenceParser() {
+    }
+
+    @Override
+    public PhylogenyData parse( final XmlElement element ) throws PhylogenyParserException {
+        String desc = "";
+        String doi = "";
+        if ( element.isHasAttribute( PhyloXmlMapping.REFERENCE_DOI_ATTR ) ) {
+            doi = element.getAttribute( PhyloXmlMapping.REFERENCE_DOI_ATTR );
+        }
+        for( int i = 0; i < element.getNumberOfChildElements(); ++i ) {
+            final XmlElement child_element = element.getChildElement( i );
+            if ( child_element.getQualifiedName().equals( PhyloXmlMapping.REFERENCE_DESC ) ) {
+                desc = child_element.getValueAsString();
+                break;
+            }
+        }
+        if ( !ForesterUtil.isEmpty( doi ) ) {
+            return new Reference( desc, doi );
+        }
+        else {
+            return new Reference( desc );
+        }
+    }
+
+    public static PhylogenyDataPhyloXmlParser getInstance() {
+        return _instance;
+    }
+}
diff --git a/forester/java/src/org/forester/io/parsers/phyloxml/data/SequenceParser.java b/forester/java/src/org/forester/io/parsers/phyloxml/data/SequenceParser.java

new file mode 100644 (file)

index 0000000..f0abd4b
--- /dev/null
+++ b/forester/java/src/org/forester/io/parsers/phyloxml/data/SequenceParser.java
@@ -0,0 +1,99 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+//
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.io.parsers.phyloxml.data;
+
+import org.forester.io.parsers.phyloxml.PhyloXmlMapping;
+import org.forester.io.parsers.phyloxml.XmlElement;
+import org.forester.io.parsers.util.PhylogenyParserException;
+import org.forester.phylogeny.data.Accession;
+import org.forester.phylogeny.data.Annotation;
+import org.forester.phylogeny.data.DomainArchitecture;
+import org.forester.phylogeny.data.Sequence;
+import org.forester.phylogeny.data.Uri;
+
+public class SequenceParser implements PhylogenyDataPhyloXmlParser {
+
+    private static final PhylogenyDataPhyloXmlParser _instance;
+    static {
+        try {
+            _instance = new SequenceParser();
+        }
+        catch ( final Throwable e ) {
+            throw new RuntimeException( e.getMessage() );
+        }
+    }
+
+    private SequenceParser() {
+    }
+
+    @Override
+    public Sequence parse( final XmlElement element ) throws PhylogenyParserException {
+        final Sequence sequence = new Sequence();
+        if ( element.isHasAttribute( PhyloXmlMapping.SEQUENCE_TYPE ) ) {
+            sequence.setType( element.getAttribute( PhyloXmlMapping.SEQUENCE_TYPE ) );
+        }
+        if ( element.isHasAttribute( PhyloXmlMapping.SEQUENCE_SOURCE_ID ) ) {
+            sequence.setSourceId( element.getAttribute( PhyloXmlMapping.SEQUENCE_SOURCE_ID ) );
+        }
+        for( int i = 0; i < element.getNumberOfChildElements(); ++i ) {
+            final XmlElement child_element = element.getChildElement( i );
+            if ( child_element.getQualifiedName().equals( PhyloXmlMapping.SEQUENCE_LOCATION ) ) {
+                sequence.setLocation( child_element.getValueAsString() );
+            }
+            else if ( child_element.getQualifiedName().equals( PhyloXmlMapping.SEQUENCE_NAME ) ) {
+                sequence.setName( child_element.getValueAsString() );
+            }
+            else if ( child_element.getQualifiedName().equals( PhyloXmlMapping.SEQUENCE_MOL_SEQ ) ) {
+                if ( child_element.isHasAttribute( PhyloXmlMapping.SEQUENCE_MOL_SEQ_ALIGNED_ATTR ) ) {
+                    sequence.setMolecularSequenceAligned( Boolean.parseBoolean( child_element
+                            .getAttribute( PhyloXmlMapping.SEQUENCE_MOL_SEQ_ALIGNED_ATTR ) ) );
+                }
+                sequence.setMolecularSequence( child_element.getValueAsString() );
+            }
+            else if ( child_element.getQualifiedName().equals( PhyloXmlMapping.ACCESSION ) ) {
+                sequence.setAccession( ( Accession ) AccessionParser.getInstance().parse( child_element ) );
+            }
+            else if ( child_element.getQualifiedName().equals( PhyloXmlMapping.SEQUENCE_SYMBOL ) ) {
+                sequence.setSymbol( child_element.getValueAsString() );
+            }
+            else if ( child_element.getQualifiedName().equals( PhyloXmlMapping.ANNOTATION ) ) {
+                sequence.addAnnotation( ( Annotation ) AnnotationParser.getInstance().parse( child_element ) );
+            }
+            else if ( child_element.getQualifiedName().equals( PhyloXmlMapping.SEQUENCE_DOMAIN_ARCHITECURE ) ) {
+                sequence.setDomainArchitecture( ( DomainArchitecture ) DomainArchitectureParser.getInstance()
+                        .parse( child_element ) );
+            }
+            else if ( child_element.getQualifiedName().equals( PhyloXmlMapping.URI ) ) {
+                sequence.addUri( ( Uri ) UriParser.getInstance().parse( child_element ) );
+            }
+        }
+        return sequence;
+    }
+
+    public static PhylogenyDataPhyloXmlParser getInstance() {
+        return _instance;
+    }
+}
diff --git a/forester/java/src/org/forester/io/parsers/phyloxml/data/SequenceRelationParser.java b/forester/java/src/org/forester/io/parsers/phyloxml/data/SequenceRelationParser.java

new file mode 100644 (file)

index 0000000..6c0a849
--- /dev/null
+++ b/forester/java/src/org/forester/io/parsers/phyloxml/data/SequenceRelationParser.java
@@ -0,0 +1,91 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+//
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.io.parsers.phyloxml.data;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import org.forester.io.parsers.phyloxml.PhyloXmlHandler;
+import org.forester.io.parsers.phyloxml.PhyloXmlMapping;
+import org.forester.io.parsers.phyloxml.XmlElement;
+import org.forester.io.parsers.util.PhylogenyParserException;
+import org.forester.phylogeny.Phylogeny;
+import org.forester.phylogeny.data.Confidence;
+import org.forester.phylogeny.data.Sequence;
+import org.forester.phylogeny.data.SequenceRelation;
+
+public class SequenceRelationParser implements PhylogenyDataPhyloXmlParser {
+
+    private static final Map<Phylogeny, SequenceRelationParser> _instances = new HashMap<Phylogeny, SequenceRelationParser>();
+    private Phylogeny                                           _phylogeny;
+
+    private SequenceRelationParser() {
+    }
+
+    @Override
+    public SequenceRelation parse( final XmlElement element ) throws PhylogenyParserException {
+        final SequenceRelation seqRelation = new SequenceRelation();
+        if ( element.isHasAttribute( PhyloXmlMapping.SEQUENCE_RELATION_TYPE ) ) {
+            final String sType = element.getAttribute( PhyloXmlMapping.SEQUENCE_RELATION_TYPE );
+            seqRelation.setType( SequenceRelation.SEQUENCE_RELATION_TYPE.valueOf( sType ) );
+        }
+        if ( element.isHasAttribute( PhyloXmlMapping.SEQUENCE_RELATION_ID_REF0 ) && ( _phylogeny != null ) ) {
+            final Sequence ref = PhyloXmlHandler.getSequenceMapByIdForPhylogeny( _phylogeny ).get( element
+                    .getAttribute( PhyloXmlMapping.SEQUENCE_RELATION_ID_REF0 ) );
+            if ( ref != null ) {
+                seqRelation.setRef0( ref );
+            }
+        }
+        if ( element.isHasAttribute( PhyloXmlMapping.SEQUENCE_RELATION_ID_REF1 ) && ( _phylogeny != null ) ) {
+            final Sequence ref = PhyloXmlHandler.getSequenceMapByIdForPhylogeny( _phylogeny ).get( element
+                    .getAttribute( PhyloXmlMapping.SEQUENCE_RELATION_ID_REF1 ) );
+            if ( ref != null ) {
+                seqRelation.setRef1( ref );
+            }
+        }
+        if ( element.isHasAttribute( PhyloXmlMapping.SEQUENCE_RELATION_DISTANCE ) ) {
+            seqRelation.setDistance( Double
+                    .valueOf( element.getAttribute( PhyloXmlMapping.SEQUENCE_RELATION_DISTANCE ) ) );
+        }
+        for( int i = 0; i < element.getNumberOfChildElements(); ++i ) {
+            final XmlElement child_element = element.getChildElement( i );
+            if ( child_element.getQualifiedName().equals( PhyloXmlMapping.CONFIDENCE ) ) {
+                seqRelation.setConfidence( ( Confidence ) ConfidenceParser.getInstance().parse( child_element ) );
+            }
+        }
+        return seqRelation;
+    }
+
+    public static PhylogenyDataPhyloXmlParser getInstance( final Phylogeny phylogeny ) {
+        SequenceRelationParser instance = _instances.get( phylogeny );
+        if ( instance == null ) {
+            instance = new SequenceRelationParser();
+            instance._phylogeny = phylogeny;
+            _instances.put( phylogeny, instance );
+        }
+        return instance;
+    }
+}
diff --git a/forester/java/src/org/forester/io/parsers/phyloxml/data/TaxonomyParser.java b/forester/java/src/org/forester/io/parsers/phyloxml/data/TaxonomyParser.java

new file mode 100644 (file)

index 0000000..d124ad0
--- /dev/null
+++ b/forester/java/src/org/forester/io/parsers/phyloxml/data/TaxonomyParser.java
@@ -0,0 +1,87 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+//
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.io.parsers.phyloxml.data;
+
+import org.forester.io.parsers.phyloxml.PhyloXmlMapping;
+import org.forester.io.parsers.phyloxml.XmlElement;
+import org.forester.io.parsers.util.PhylogenyParserException;
+import org.forester.phylogeny.data.Identifier;
+import org.forester.phylogeny.data.Taxonomy;
+import org.forester.phylogeny.data.Uri;
+
+public class TaxonomyParser implements PhylogenyDataPhyloXmlParser {
+
+    private static final PhylogenyDataPhyloXmlParser _instance;
+    static {
+        try {
+            _instance = new TaxonomyParser();
+        }
+        catch ( final Throwable e ) {
+            throw new RuntimeException( e.getMessage() );
+        }
+    }
+
+    private TaxonomyParser() {
+    }
+
+    public Taxonomy parse( final XmlElement element ) throws PhylogenyParserException {
+        final Taxonomy taxonomy = new Taxonomy();
+        for( int i = 0; i < element.getNumberOfChildElements(); ++i ) {
+            final XmlElement child_element = element.getChildElement( i );
+            if ( child_element.isHasValue() ) {
+                if ( child_element.getQualifiedName().equals( PhyloXmlMapping.TAXONOMY_CODE ) ) {
+                    taxonomy.setTaxonomyCode( child_element.getValueAsString() );
+                }
+                else if ( child_element.getQualifiedName().equals( PhyloXmlMapping.TAXONOMY_COMMON_NAME ) ) {
+                    taxonomy.setCommonName( child_element.getValueAsString() );
+                }
+                else if ( child_element.getQualifiedName().equals( PhyloXmlMapping.TAXONOMY_AUTHORITY ) ) {
+                    taxonomy.setAuthority( child_element.getValueAsString() );
+                }
+                else if ( child_element.getQualifiedName().equals( PhyloXmlMapping.TAXONOMY_SYNONYM ) ) {
+                    taxonomy.getSynonyms().add( ( child_element.getValueAsString() ) );
+                }
+                else if ( child_element.getQualifiedName().equals( PhyloXmlMapping.IDENTIFIER ) ) {
+                    taxonomy.setIdentifier( ( Identifier ) IdentifierParser.getInstance().parse( child_element ) );
+                }
+                else if ( child_element.getQualifiedName().equals( PhyloXmlMapping.TAXONOMY_RANK ) ) {
+                    taxonomy.setRank( child_element.getValueAsString() );
+                }
+                else if ( child_element.getQualifiedName().equals( PhyloXmlMapping.TAXONOMY_SCIENTIFIC_NAME ) ) {
+                    taxonomy.setScientificName( child_element.getValueAsString() );
+                }
+                else if ( child_element.getQualifiedName().equals( PhyloXmlMapping.URI ) ) {
+                    taxonomy.addUri( ( Uri ) UriParser.getInstance().parse( child_element ) );
+                }
+            }
+        }
+        return taxonomy;
+    }
+
+    public static PhylogenyDataPhyloXmlParser getInstance() {
+        return _instance;
+    }
+}
diff --git a/forester/java/src/org/forester/io/parsers/phyloxml/data/UriParser.java b/forester/java/src/org/forester/io/parsers/phyloxml/data/UriParser.java

new file mode 100644 (file)

index 0000000..57c106e
--- /dev/null
+++ b/forester/java/src/org/forester/io/parsers/phyloxml/data/UriParser.java
@@ -0,0 +1,75 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.io.parsers.phyloxml.data;
+
+import java.net.URI;
+import java.net.URISyntaxException;
+
+import org.forester.io.parsers.phyloxml.PhyloXmlMapping;
+import org.forester.io.parsers.phyloxml.XmlElement;
+import org.forester.io.parsers.util.PhylogenyParserException;
+import org.forester.phylogeny.data.PhylogenyData;
+import org.forester.phylogeny.data.Uri;
+
+public class UriParser implements PhylogenyDataPhyloXmlParser {
+
+    private static final PhylogenyDataPhyloXmlParser _instance;
+    static {
+        try {
+            _instance = new UriParser();
+        }
+        catch ( final Throwable e ) {
+            throw new RuntimeException( e.getMessage() );
+        }
+    }
+
+    private UriParser() {
+    }
+
+    @Override
+    public PhylogenyData parse( final XmlElement element ) throws PhylogenyParserException {
+        String type = "";
+        String desc = "";
+        URI uri = null;
+        try {
+            uri = new URI( element.getValueAsString() );
+        }
+        catch ( final URISyntaxException e ) {
+            throw new PhylogenyParserException( "ill formatted Uri: " + element.getValueAsString() );
+        }
+        if ( element.isHasAttribute( PhyloXmlMapping.URI_DESC_ATTR ) ) {
+            desc = element.getAttribute( PhyloXmlMapping.URI_DESC_ATTR );
+        }
+        if ( element.isHasAttribute( PhyloXmlMapping.TYPE_ATTR ) ) {
+            type = element.getAttribute( PhyloXmlMapping.TYPE_ATTR );
+        }
+        return new Uri( uri, desc, type );
+    }
+
+    public static PhylogenyDataPhyloXmlParser getInstance() {
+        return _instance;
+    }
+}
diff --git a/forester/java/src/org/forester/io/parsers/tol/TolParser.java b/forester/java/src/org/forester/io/parsers/tol/TolParser.java

new file mode 100644 (file)

index 0000000..8968885
--- /dev/null
+++ b/forester/java/src/org/forester/io/parsers/tol/TolParser.java
@@ -0,0 +1,286 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.io.parsers.tol;
+
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.Reader;
+import java.io.StringReader;
+import java.util.Enumeration;
+import java.util.zip.ZipEntry;
+import java.util.zip.ZipFile;
+import java.util.zip.ZipInputStream;
+
+import javax.xml.parsers.ParserConfigurationException;
+import javax.xml.parsers.SAXParser;
+import javax.xml.parsers.SAXParserFactory;
+
+import org.forester.io.parsers.PhylogenyParser;
+import org.forester.io.parsers.util.PhylogenyParserException;
+import org.forester.phylogeny.Phylogeny;
+import org.forester.util.ForesterUtil;
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
+import org.xml.sax.SAXNotRecognizedException;
+import org.xml.sax.SAXNotSupportedException;
+import org.xml.sax.SAXParseException;
+import org.xml.sax.XMLReader;
+import org.xml.sax.helpers.DefaultHandler;
+
+public class TolParser implements PhylogenyParser {
+
+    final public static String JAXP_SCHEMA_LANGUAGE                       = "http://java.sun.com/xml/jaxp/properties/schemaLanguage";
+    final public static String W3C_XML_SCHEMA                             = "http://www.w3.org/2001/XMLSchema";
+    final public static String JAXP_SCHEMA_SOURCE                         = "http://java.sun.com/xml/jaxp/properties/schemaSource";
+    final public static String SAX_FEATURES_VALIDATION                    = "http://xml.org/sax/features/validation";
+    final public static String APACHE_FEATURES_VALIDATION_SCHEMA          = "http://apache.org/xml/features/validation/schema";
+    final public static String APACHE_FEATURES_VALIDATION_SCHEMA_FULL     = "http://apache.org/xml/features/validation/schema-full-checking";
+    final public static String APACHE_PROPERTIES_SCHEMA_EXTERNAL_LOCATION = "http://apache.org/xml/properties/schema/external-schemaLocation";
+    private Object             _source;
+    private boolean            _valid;
+    private boolean            _zipped_inputstream;
+    private int                _error_count;
+    private int                _warning_count;
+    private String             _schema_location;
+    private StringBuffer       _error_messages;
+    private StringBuffer       _warning_messages;
+
+    public TolParser() {
+        init();
+        reset();
+    }
+
+    public int getErrorCount() {
+        return _error_count;
+    }
+
+    public StringBuffer getErrorMessages() {
+        return _error_messages;
+    }
+
+    private Reader getReaderFromZipFile() throws IOException {
+        Reader reader = null;
+        final ZipFile zip_file = new ZipFile( getSource().toString() );
+        final Enumeration<?> zip_file_entries = zip_file.entries();
+        while ( zip_file_entries.hasMoreElements() ) {
+            final ZipEntry zip_file_entry = ( ZipEntry ) zip_file_entries.nextElement();
+            if ( !zip_file_entry.isDirectory() && ( zip_file_entry.getSize() > 0 ) ) {
+                final InputStream is = zip_file.getInputStream( zip_file_entry );
+                reader = new InputStreamReader( is );
+                break;
+            }
+        }
+        return reader;
+    }
+
+    private String getSchemaLocation() {
+        return _schema_location;
+    }
+
+    private Object getSource() {
+        return _source;
+    }
+
+    public int getWarningCount() {
+        return _warning_count;
+    }
+
+    public StringBuffer getWarningMessages() {
+        return _warning_messages;
+    }
+
+    private void init() {
+        setZippedInputstream( false );
+    }
+
+    public boolean isValid() {
+        return _valid;
+    }
+
+    private boolean isZippedInputstream() {
+        return _zipped_inputstream;
+    }
+
+    public Phylogeny[] parse() throws IOException, PhylogenyParserException {
+        reset();
+        final TolXmlHandler handler = new TolXmlHandler();
+        final SAXParserFactory factory = SAXParserFactory.newInstance();
+        factory.setNamespaceAware( true );
+        try {
+            if ( !ForesterUtil.isEmpty( getSchemaLocation() ) ) {
+                factory.setFeature( SAX_FEATURES_VALIDATION, true );
+                factory.setFeature( APACHE_FEATURES_VALIDATION_SCHEMA, true );
+                factory.setFeature( APACHE_FEATURES_VALIDATION_SCHEMA_FULL, true );
+            }
+        }
+        catch ( final SAXNotRecognizedException e ) {
+            e.printStackTrace();
+            throw new PhylogenyParserException( "sax not recognized exception: " + e.getMessage() );
+        }
+        catch ( final SAXNotSupportedException e ) {
+            e.printStackTrace();
+            throw new PhylogenyParserException( "sax not supported exception: " + e.getMessage() );
+        }
+        catch ( final ParserConfigurationException e ) {
+            e.printStackTrace();
+            throw new PhylogenyParserException( "parser _configuration exception: " + e.getMessage() );
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace();
+            throw new PhylogenyParserException( "error while configuring sax parser: " + e.getMessage() );
+        }
+        try {
+            final SAXParser parser = factory.newSAXParser();
+            if ( !ForesterUtil.isEmpty( getSchemaLocation() ) ) {
+                parser.setProperty( JAXP_SCHEMA_LANGUAGE, W3C_XML_SCHEMA );
+                parser.setProperty( JAXP_SCHEMA_SOURCE, getSchemaLocation() );
+                parser.setProperty( APACHE_PROPERTIES_SCHEMA_EXTERNAL_LOCATION, getSchemaLocation() );
+            }
+            final XMLReader xml_reader = parser.getXMLReader();
+            xml_reader.setContentHandler( handler );
+            xml_reader.setErrorHandler( new TolParserErrorHandler() );
+            if ( getSource() instanceof File ) {
+                if ( !getSource().toString().toLowerCase().endsWith( ".zip" ) ) {
+                    xml_reader.parse( new InputSource( new FileReader( ( File ) getSource() ) ) );
+                }
+                else {
+                    final Reader reader = getReaderFromZipFile();
+                    if ( reader == null ) {
+                        throw new PhylogenyParserException( "Zip file \"" + getSource()
+                                + "\" appears not to contain any entries" );
+                    }
+                    xml_reader.parse( new InputSource( reader ) );
+                }
+            }
+            else if ( getSource() instanceof InputSource ) {
+                xml_reader.parse( ( InputSource ) getSource() );
+            }
+            else if ( getSource() instanceof InputStream ) {
+                if ( !isZippedInputstream() ) {
+                    final InputStream is = ( InputStream ) getSource();
+                    final Reader reader = new InputStreamReader( is );
+                    xml_reader.parse( new InputSource( reader ) );
+                }
+                else {
+                    final ZipInputStream zip_is = new ZipInputStream( ( InputStream ) getSource() );
+                    zip_is.getNextEntry();
+                    final Reader reader = new InputStreamReader( zip_is );
+                    if ( reader == null ) {
+                        throw new PhylogenyParserException( "Zip input stream \"" + getSource()
+                                + "\" appears not to contain any data" );
+                    }
+                    xml_reader.parse( new InputSource( reader ) );
+                }
+            }
+            else if ( getSource() instanceof String ) {
+                final File file = new File( getSource().toString() );
+                final Reader reader = new FileReader( file );
+                xml_reader.parse( new InputSource( reader ) );
+            }
+            else if ( getSource() instanceof StringBuffer ) {
+                final StringReader string_reader = new StringReader( getSource().toString() );
+                xml_reader.parse( new InputSource( string_reader ) );
+            }
+            else {
+                throw new PhylogenyParserException( "attempt to parse object of unsupported type: \""
+                        + getSource().getClass() + "\"" );
+            }
+        }
+        catch ( final SAXException sax_exception ) {
+            throw new PhylogenyParserException( "Failed to parse [" + getSource() + "]: " + sax_exception.getMessage() );
+        }
+        catch ( final ParserConfigurationException parser_config_exception ) {
+            throw new PhylogenyParserException( "Failed to parse [" + getSource()
+                    + "] Problem with xml parser _configuration: " + parser_config_exception.getMessage() );
+        }
+        catch ( final IOException e ) {
+            throw new PhylogenyParserException( "Problem with input source [" + getSource() + "]: \n" + e.getMessage() );
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace();
+            throw new PhylogenyParserException( "Failed to parse [" + getSource() + "]: " + e.getMessage() );
+        }
+        catch ( final Error err ) {
+            err.printStackTrace();
+            throw new PhylogenyParserException( "Severe error: " + err.getMessage() );
+        }
+        final Phylogeny[] ps = new Phylogeny[ handler.getPhylogenies().size() ];
+        int i = 0;
+        for( final Phylogeny phylogeny : handler.getPhylogenies() ) {
+            ps[ i++ ] = phylogeny;
+        }
+        return ps;
+    }
+
+    private void reset() {
+        _valid = true;
+        _error_count = 0;
+        _warning_count = 0;
+        _error_messages = new StringBuffer();
+        _warning_messages = new StringBuffer();
+    }
+
+    public void setSource( final Object source ) {
+        _source = source;
+    }
+
+    public void setValidateAgainstSchema( final String schema_location ) {
+        _schema_location = schema_location;
+    }
+
+    public void setZippedInputstream( final boolean zipped_inputstream ) {
+        _zipped_inputstream = zipped_inputstream;
+    }
+
+    private class TolParserErrorHandler extends DefaultHandler {
+
+        @Override
+        public void error( final SAXParseException e ) {
+            ++_error_count;
+            _valid = false;
+            throw new RuntimeException( "XML error at line " + e.getLineNumber() + ": \n" + e.getMessage() );
+        }
+
+        @Override
+        public void fatalError( final SAXParseException e ) {
+            ++_error_count;
+            _valid = false;
+            throw new RuntimeException( "Fatal XML error at line " + e.getLineNumber() + ": \n" + e.getMessage() );
+        }
+
+        @Override
+        public void warning( final SAXParseException e ) {
+            ++_warning_count;
+            if ( _error_messages.length() > 1 ) {
+                _error_messages.append( ForesterUtil.LINE_SEPARATOR );
+            }
+            _warning_messages.append( "[line: " + e.getLineNumber() + "] " + e.getMessage() );
+        }
+    }
+}
\ No newline at end of file
diff --git a/forester/java/src/org/forester/io/parsers/tol/TolXmlHandler.java b/forester/java/src/org/forester/io/parsers/tol/TolXmlHandler.java

new file mode 100644 (file)

index 0000000..c3b452d
--- /dev/null
+++ b/forester/java/src/org/forester/io/parsers/tol/TolXmlHandler.java
@@ -0,0 +1,318 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.io.parsers.tol;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.forester.io.parsers.phyloxml.XmlElement;
+import org.forester.io.parsers.util.PhylogenyParserException;
+import org.forester.phylogeny.Phylogeny;
+import org.forester.phylogeny.PhylogenyNode;
+import org.forester.phylogeny.data.Identifier;
+import org.forester.phylogeny.data.Taxonomy;
+import org.forester.util.FailedConditionCheckException;
+import org.forester.util.ForesterConstants;
+import org.forester.util.ForesterUtil;
+import org.xml.sax.Attributes;
+import org.xml.sax.SAXException;
+import org.xml.sax.helpers.DefaultHandler;
+
+public final class TolXmlHandler extends DefaultHandler {
+
+    private String                    _current_element_name;
+    private Phylogeny                 _current_phylogeny;
+    private List<Phylogeny>           _phylogenies;
+    private XmlElement                _current_xml_element;
+    private PhylogenyNode             _current_node;
+    private final static StringBuffer _buffer = new StringBuffer();
+
+    TolXmlHandler() {
+        // Constructor.
+    }
+
+    private void addNode() {
+        final PhylogenyNode new_node = new PhylogenyNode();
+        getCurrentNode().addAsChild( new_node );
+        setCurrentNode( new_node );
+    }
+
+    @Override
+    public void characters( final char[] chars, final int start_index, final int end_index ) {
+        if ( ( ( getCurrentXmlElement() != null ) && ( getCurrentElementName() != null ) )
+                && !getCurrentElementName().equals( TolXmlMapping.CLADE )
+                && !getCurrentElementName().equals( TolXmlMapping.PHYLOGENY ) ) {
+            getCurrentXmlElement().setValue( new String( chars, start_index, end_index ).trim() );
+        }
+    }
+
+    @Override
+    public void endElement( final String namespace_uri, final String local_name, final String qualified_name )
+            throws SAXException {
+        if ( ForesterUtil.isEmpty( namespace_uri ) || namespace_uri.startsWith( ForesterConstants.PHYLO_XML_LOCATION ) ) {
+            if ( local_name.equals( TolXmlMapping.CLADE ) ) {
+                try {
+                    TolXmlHandler.mapElementToPhylogenyNode( getCurrentXmlElement(), getCurrentNode() );
+                    if ( !getCurrentNode().isRoot() ) {
+                        setCurrentNode( getCurrentNode().getParent() );
+                    }
+                    setCurrentXmlElement( getCurrentXmlElement().getParent() );
+                }
+                catch ( final PhylogenyParserException ex ) {
+                    throw new SAXException( ex.getMessage() );
+                }
+            }
+            else if ( local_name.equals( TolXmlMapping.PHYLOGENY ) ) {
+                try {
+                    TolXmlHandler.mapElementToPhylogeny( getCurrentXmlElement(), getCurrentPhylogeny() );
+                }
+                catch ( final PhylogenyParserException ex ) {
+                    throw new SAXException( ex.getMessage() );
+                }
+                finishPhylogeny();
+                reset();
+            }
+            else if ( ( getCurrentPhylogeny() != null ) && ( getCurrentXmlElement().getParent() != null ) ) {
+                setCurrentXmlElement( getCurrentXmlElement().getParent() );
+            }
+            setCurrentElementName( null );
+        }
+    }
+
+    private void finishPhylogeny() throws SAXException {
+        getCurrentPhylogeny().setRooted( true );
+        getCurrentPhylogeny().recalculateNumberOfExternalDescendants( false );
+        getPhylogenies().add( getCurrentPhylogeny() );
+    }
+
+    private String getCurrentElementName() {
+        return _current_element_name;
+    }
+
+    private PhylogenyNode getCurrentNode() {
+        return _current_node;
+    }
+
+    private Phylogeny getCurrentPhylogeny() {
+        return _current_phylogeny;
+    }
+
+    private XmlElement getCurrentXmlElement() {
+        return _current_xml_element;
+    }
+
+    List<Phylogeny> getPhylogenies() {
+        return _phylogenies;
+    }
+
+    private void init() {
+        reset();
+        setPhylogenies( new ArrayList<Phylogeny>() );
+    }
+
+    private void initCurrentNode() {
+        if ( getCurrentNode() != null ) {
+            throw new FailedConditionCheckException( "attempt to create new current node when current node already exists" );
+        }
+        if ( getCurrentPhylogeny() == null ) {
+            throw new FailedConditionCheckException( "attempt to create new current node for non-existing phylogeny" );
+        }
+        final PhylogenyNode node = new PhylogenyNode();
+        getCurrentPhylogeny().setRoot( node );
+        setCurrentNode( getCurrentPhylogeny().getRoot() );
+    }
+
+    private void newClade() {
+        if ( getCurrentNode() == null ) {
+            initCurrentNode();
+        }
+        else {
+            addNode();
+        }
+    }
+
+    private void newPhylogeny() {
+        setCurrentPhylogeny( new Phylogeny() );
+    }
+
+    private void reset() {
+        setCurrentPhylogeny( null );
+        setCurrentNode( null );
+        setCurrentElementName( null );
+        setCurrentXmlElement( null );
+    }
+
+    private void setCurrentElementName( final String element_name ) {
+        _current_element_name = element_name;
+    }
+
+    private void setCurrentNode( final PhylogenyNode current_node ) {
+        _current_node = current_node;
+    }
+
+    private void setCurrentPhylogeny( final Phylogeny phylogeny ) {
+        _current_phylogeny = phylogeny;
+    }
+
+    private void setCurrentXmlElement( final XmlElement element ) {
+        _current_xml_element = element;
+    }
+
+    private void setPhylogenies( final List<Phylogeny> phylogenies ) {
+        _phylogenies = phylogenies;
+    }
+
+    @Override
+    public void startDocument() throws SAXException {
+        init();
+    }
+
+    @Override
+    public void startElement( final String namespace_uri,
+                              final String local_name,
+                              final String qualified_name,
+                              final Attributes attributes ) throws SAXException {
+        setCurrentElementName( local_name );
+        if ( local_name.equals( TolXmlMapping.CLADE ) ) {
+            final XmlElement element = new XmlElement( namespace_uri, local_name, local_name, attributes );
+            getCurrentXmlElement().addChildElement( element );
+            setCurrentXmlElement( element );
+            newClade();
+        }
+        else if ( local_name.equals( TolXmlMapping.PHYLOGENY ) ) {
+            setCurrentXmlElement( new XmlElement( "", "", "", null ) );
+            newPhylogeny();
+        }
+        else if ( getCurrentPhylogeny() != null ) {
+            final XmlElement element = new XmlElement( namespace_uri, local_name, local_name, attributes );
+            getCurrentXmlElement().addChildElement( element );
+            setCurrentXmlElement( element );
+        }
+    }
+
+    public static boolean attributeEqualsValue( final XmlElement element,
+                                                final String attributeName,
+                                                final String attributeValue ) {
+        final String attr = element.getAttribute( attributeName );
+        return ( ( attr != null ) && attr.equals( attributeValue ) );
+    }
+
+    public static String getAtttributeValue( final XmlElement element, final String attributeName ) {
+        final String attr = element.getAttribute( attributeName );
+        if ( attr != null ) {
+            return attr;
+        }
+        else {
+            return "";
+        }
+    }
+
+    private static void mapElementToPhylogeny( final XmlElement xml_element, final Phylogeny phylogeny )
+            throws PhylogenyParserException {
+        // Not needed for now.
+    }
+
+    private static void mapElementToPhylogenyNode( final XmlElement xml_element, final PhylogenyNode node )
+            throws PhylogenyParserException {
+        if ( xml_element.isHasAttribute( TolXmlMapping.NODE_ID_ATTR ) ) {
+            final String id = xml_element.getAttribute( TolXmlMapping.NODE_ID_ATTR );
+            if ( !ForesterUtil.isEmpty( id ) ) {
+                if ( !node.getNodeData().isHasTaxonomy() ) {
+                    node.getNodeData().setTaxonomy( new Taxonomy() );
+                }
+                node.getNodeData().getTaxonomy()
+                        .setIdentifier( new Identifier( id, TolXmlMapping.TOL_TAXONOMY_ID_TYPE ) );
+            }
+        }
+        final boolean put_into_scientific_name = true; // Allways put into scientific name.
+        //        if ( xml_element.isHasAttribute( TolXmlMapping.NODE_ITALICIZENAME_ATTR ) ) {
+        //            final String ital = xml_element.getAttribute( TolXmlMapping.NODE_ITALICIZENAME_ATTR );
+        //            if ( !ForesterUtil.isEmpty( ital ) && ital.equals( "1" ) ) {
+        //                put_into_scientific_name = true;
+        //            }
+        //        }
+        for( int i = 0; i < xml_element.getNumberOfChildElements(); ++i ) {
+            final XmlElement element = xml_element.getChildElement( i );
+            final String qualified_name = element.getQualifiedName();
+            if ( qualified_name.equals( TolXmlMapping.TAXONOMY_NAME ) ) {
+                final String name = element.getValueAsString();
+                if ( !ForesterUtil.isEmpty( name ) ) {
+                    if ( !node.getNodeData().isHasTaxonomy() ) {
+                        node.getNodeData().setTaxonomy( new Taxonomy() );
+                    }
+                    if ( put_into_scientific_name ) {
+                        node.getNodeData().getTaxonomy().setScientificName( name );
+                    }
+                    else {
+                        node.getNodeData().getTaxonomy().setCommonName( name );
+                    }
+                }
+            }
+            else if ( qualified_name.equals( TolXmlMapping.AUTHORITY ) ) {
+                String auth = element.getValueAsString();
+                if ( !ForesterUtil.isEmpty( auth ) && !auth.equalsIgnoreCase( "null" ) ) {
+                    if ( !node.getNodeData().isHasTaxonomy() ) {
+                        node.getNodeData().setTaxonomy( new Taxonomy() );
+                    }
+                    auth = auth.replaceAll( "&amp;", "&" );
+                    node.getNodeData().getTaxonomy().setAuthority( auth );
+                }
+            }
+            else if ( qualified_name.equals( TolXmlMapping.AUTHDATE ) ) {
+                final String authdate = element.getValueAsString();
+                if ( !ForesterUtil.isEmpty( authdate ) && !authdate.equalsIgnoreCase( "null" ) ) {
+                    if ( node.getNodeData().isHasTaxonomy()
+                            && !ForesterUtil.isEmpty( node.getNodeData().getTaxonomy().getAuthority() ) ) {
+                        _buffer.setLength( 0 );
+                        _buffer.append( node.getNodeData().getTaxonomy().getAuthority() );
+                        _buffer.append( " " );
+                        _buffer.append( authdate );
+                        node.getNodeData().getTaxonomy().setAuthority( _buffer.toString() );
+                    }
+                }
+            }
+            else if ( qualified_name.equals( TolXmlMapping.OTHERNAMES ) ) {
+                for( int j = 0; j < element.getNumberOfChildElements(); ++j ) {
+                    final XmlElement element_j = element.getChildElement( j );
+                    if ( element_j.getQualifiedName().equals( TolXmlMapping.OTHERNAME ) ) {
+                        for( int z = 0; z < element_j.getNumberOfChildElements(); ++z ) {
+                            final XmlElement element_z = element_j.getChildElement( z );
+                            if ( element_z.getQualifiedName().equals( TolXmlMapping.OTHERNAME_NAME ) ) {
+                                final String syn = element_z.getValueAsString();
+                                if ( !ForesterUtil.isEmpty( syn ) && !syn.equalsIgnoreCase( "null" ) ) {
+                                    if ( !node.getNodeData().isHasTaxonomy() ) {
+                                        node.getNodeData().setTaxonomy( new Taxonomy() );
+                                    }
+                                    node.getNodeData().getTaxonomy().getSynonyms().add( syn );
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/forester/java/src/org/forester/io/parsers/tol/TolXmlMapping.java b/forester/java/src/org/forester/io/parsers/tol/TolXmlMapping.java

new file mode 100644 (file)

index 0000000..bb7732d
--- /dev/null
+++ b/forester/java/src/org/forester/io/parsers/tol/TolXmlMapping.java
@@ -0,0 +1,47 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// Copyright (C) 2000-2001 Washington University School of Medicine
+// and Howard Hughes Medical Institute
+// All rights reserved
+//
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.io.parsers.tol;
+
+public final class TolXmlMapping {
+
+    public static final String PHYLOGENY               = "TREE";
+    public static final String CLADE                   = "NODE";
+    public static final String AUTHDATE                = "AUTHDATE";
+    public static final String AUTHORITY               = "AUTHORITY";
+    public static final String TAXONOMY_NAME           = "NAME";
+    public static final String OTHERNAMES              = "OTHERNAMES";
+    public static final String OTHERNAME               = "OTHERNAME";
+    public static final String OTHERNAME_NAME          = "NAME";
+    public static final String NODE_ID_ATTR            = "ID";
+    public static final String NODE_ITALICIZENAME_ATTR = "ITALICIZENAME";
+    public static final String TOL_TAXONOMY_ID_TYPE    = "tol";
+
+    private TolXmlMapping() {
+        // Hidden.
+    }
+}
\ No newline at end of file
diff --git a/forester/java/src/org/forester/io/parsers/util/ParserUtils.java b/forester/java/src/org/forester/io/parsers/util/ParserUtils.java

new file mode 100644 (file)

index 0000000..09d5b24
--- /dev/null
+++ b/forester/java/src/org/forester/io/parsers/util/ParserUtils.java
@@ -0,0 +1,73 @@
+// $Id:
+//
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/
+
+package org.forester.io.parsers.util;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.StringReader;
+
+public final class ParserUtils {
+
+    public static BufferedReader createReader( final Object source ) throws IOException, FileNotFoundException {
+        BufferedReader reader = null;
+        if ( ( source instanceof File ) || ( source instanceof String ) ) {
+            File f = null;
+            if ( source instanceof File ) {
+                f = ( File ) source;
+            }
+            else {
+                f = new File( ( String ) source );
+            }
+            if ( !f.exists() ) {
+                throw new IOException( "[" + f.getAbsolutePath() + "] does not exist" );
+            }
+            else if ( !f.isFile() ) {
+                throw new IOException( "[" + f.getAbsolutePath() + "] is not a file" );
+            }
+            else if ( !f.canRead() ) {
+                throw new IOException( "[" + f.getAbsolutePath() + "] is not a readable" );
+            }
+            reader = new BufferedReader( new FileReader( f ) );
+        }
+        else if ( source instanceof InputStream ) {
+            reader = new BufferedReader( new InputStreamReader( ( InputStream ) source ) );
+        }
+        else if ( ( source instanceof StringBuffer ) || ( source instanceof StringBuilder ) ) {
+            reader = new BufferedReader( new StringReader( source.toString() ) );
+        }
+        else {
+            throw new IllegalArgumentException( "attempt to parse object of type [" + source.getClass()
+                    + "] (can only parse objects of type File/String, InputStream, StringBuffer, or StringBuilder)" );
+        }
+        return reader;
+    }
+}
diff --git a/forester/java/src/org/forester/io/parsers/util/PhylogenyParserException.java b/forester/java/src/org/forester/io/parsers/util/PhylogenyParserException.java

new file mode 100644 (file)

index 0000000..e15472f
--- /dev/null
+++ b/forester/java/src/org/forester/io/parsers/util/PhylogenyParserException.java
@@ -0,0 +1,53 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.io.parsers.util;
+
+import java.io.IOException;
+
+/*
+ * @author Christian Zmasek
+ */
+public class PhylogenyParserException extends IOException {
+
+    /**
+     * 
+     */
+    private static final long serialVersionUID = -4810333295377881086L;
+
+    /**
+     * 
+     */
+    public PhylogenyParserException() {
+        super();
+    }
+
+    /**
+     * @param arg0
+     */
+    public PhylogenyParserException( final String message ) {
+        super( message );
+    }
+}
diff --git a/forester/java/src/org/forester/io/writers/PhyloXmlNodeWriter.java b/forester/java/src/org/forester/io/writers/PhyloXmlNodeWriter.java

new file mode 100644 (file)

index 0000000..586a709
--- /dev/null
+++ b/forester/java/src/org/forester/io/writers/PhyloXmlNodeWriter.java
@@ -0,0 +1,59 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2000-2009 Christian M. Zmasek
+// Copyright (C) 2007-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.io.writers;
+
+import java.io.IOException;
+import java.io.Writer;
+
+import org.forester.io.parsers.phyloxml.PhyloXmlMapping;
+import org.forester.io.parsers.phyloxml.PhyloXmlUtil;
+import org.forester.phylogeny.PhylogenyNode;
+import org.forester.phylogeny.data.PhylogenyDataUtil;
+import org.forester.util.ForesterUtil;
+
+public class PhyloXmlNodeWriter {
+
+    public static void toPhyloXml( final Writer w, final PhylogenyNode node, final int level, final String indentation )
+            throws IOException {
+        String ind = "";
+        if ( indentation.length() > 0 ) {
+            ind = indentation + PhylogenyWriter.PHYLO_XML_INTENDATION_BASE;
+        }
+        if ( !ForesterUtil.isEmpty( node.getName() ) ) {
+            PhylogenyDataUtil.appendElement( w, PhyloXmlMapping.NODE_NAME, node.getName(), indentation );
+        }
+        if ( node.getDistanceToParent() != PhylogenyNode.DISTANCE_DEFAULT ) {
+            PhylogenyDataUtil.appendElement( w, PhyloXmlMapping.BRANCH_LENGTH, String.valueOf( ForesterUtil.round( node
+                    .getDistanceToParent(), PhyloXmlUtil.ROUNDING_DIGITS_FOR_PHYLOXML_DOUBLE_OUTPUT ) ), indentation );
+        }
+        if ( node.getBranchData() != null ) {
+            node.getBranchData().toPhyloXML( w, level, ind );
+        }
+        if ( node.getNodeData() != null ) {
+            node.getNodeData().toPhyloXML( w, level, ind );
+        }
+    }
+}
diff --git a/forester/java/src/org/forester/io/writers/PhylogenyWriter.java b/forester/java/src/org/forester/io/writers/PhylogenyWriter.java

new file mode 100644 (file)

index 0000000..630f057
--- /dev/null
+++ b/forester/java/src/org/forester/io/writers/PhylogenyWriter.java
@@ -0,0 +1,761 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.io.writers;
+
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.io.StringWriter;
+import java.io.Writer;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Stack;
+
+import org.forester.io.parsers.nexus.NexusConstants;
+import org.forester.io.parsers.phyloxml.PhyloXmlMapping;
+import org.forester.phylogeny.Phylogeny;
+import org.forester.phylogeny.PhylogenyNode;
+import org.forester.phylogeny.data.PhylogenyDataUtil;
+import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
+import org.forester.phylogeny.iterators.PostOrderStackObject;
+import org.forester.util.ForesterConstants;
+import org.forester.util.ForesterUtil;
+
+public final class PhylogenyWriter {
+
+    public final static boolean         INDENT_PHYLOXML_DEAFULT         = true;
+    public final static String          PHYLO_XML_INTENDATION_BASE      = "  ";
+    public final static String          PHYLO_XML_VERSION_ENCODING_LINE = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
+    public final static String          PHYLO_XML_NAMESPACE_LINE        = "<phyloxml xmlns:xsi=\""
+                                                                                + ForesterConstants.XML_SCHEMA_INSTANCE
+                                                                                + "\" xsi:schemaLocation=\""
+                                                                                + ForesterConstants.PHYLO_XML_LOCATION
+                                                                                + " "
+                                                                                + ForesterConstants.PHYLO_XML_LOCATION
+                                                                                + "/"
+                                                                                + ForesterConstants.PHYLO_XML_VERSION
+                                                                                + "/" + ForesterConstants.PHYLO_XML_XSD
+                                                                                + "\" " + "xmlns=\""
+                                                                                + ForesterConstants.PHYLO_XML_LOCATION
+                                                                                + "\">";
+    public final static String          PHYLO_XML_END                   = "</phyloxml>";
+    private boolean                     _saw_comma;
+    private StringBuffer                _buffer;
+    private Writer                      _writer;
+    private PhylogenyNode               _root;
+    private boolean                     _has_next;
+    private Stack<PostOrderStackObject> _stack;
+    private boolean                     _simple_nh;
+    private boolean                     _nh_write_distance_to_parent;
+    private boolean                     _indent_phyloxml;
+    private int                         _node_level;
+    private int                         _phyloxml_level;
+    private FORMAT                      _format;
+
+    public PhylogenyWriter() {
+        setIndentPhyloxml( INDENT_PHYLOXML_DEAFULT );
+    }
+
+    private void appendPhylogenyLevelPhyloXml( final Writer writer, final Phylogeny tree ) throws IOException {
+        final String indentation = new String();
+        if ( !ForesterUtil.isEmpty( tree.getName() ) ) {
+            PhylogenyDataUtil.appendElement( writer, PhyloXmlMapping.PHYLOGENY_NAME, tree.getName(), indentation );
+        }
+        if ( tree.getIdentifier() != null ) {
+            if ( ForesterUtil.isEmpty( tree.getIdentifier().getProvider() ) ) {
+                PhylogenyDataUtil.appendElement( writer,
+                                                 PhyloXmlMapping.IDENTIFIER,
+                                                 tree.getIdentifier().getValue(),
+                                                 indentation );
+            }
+            PhylogenyDataUtil.appendElement( writer,
+                                             PhyloXmlMapping.IDENTIFIER,
+                                             tree.getIdentifier().getValue(),
+                                             PhyloXmlMapping.IDENTIFIER_PROVIDER_ATTR,
+                                             tree.getIdentifier().getProvider(),
+                                             indentation );
+        }
+        if ( !ForesterUtil.isEmpty( tree.getDescription() ) ) {
+            PhylogenyDataUtil.appendElement( writer,
+                                             PhyloXmlMapping.PHYLOGENY_DESCRIPTION,
+                                             tree.getDescription(),
+                                             indentation );
+        }
+        if ( tree.getConfidence() != null ) {
+            if ( ForesterUtil.isEmpty( tree.getConfidence().getType() ) ) {
+                PhylogenyDataUtil.appendElement( writer, PhyloXmlMapping.CONFIDENCE, tree.getConfidence().getValue()
+                        + "", indentation );
+            }
+            PhylogenyDataUtil.appendElement( writer,
+                                             PhyloXmlMapping.CONFIDENCE,
+                                             tree.getConfidence().getValue() + "",
+                                             PhyloXmlMapping.CONFIDENCE_TYPE_ATTR,
+                                             tree.getConfidence().getType(),
+                                             indentation );
+        }
+    }
+
+    private StringBuffer createIndentation() {
+        if ( !isIndentPhyloxml() ) {
+            return null;
+        }
+        final StringBuffer sb = new StringBuffer( getNodeLevel() * 2 );
+        for( int i = 0; i < getNodeLevel(); ++i ) {
+            sb.append( PhylogenyWriter.PHYLO_XML_INTENDATION_BASE );
+        }
+        return sb;
+    }
+
+    private void decreaseNodeLevel() {
+        --_node_level;
+    }
+
+    private StringBuffer getBuffer() {
+        return _buffer;
+    }
+
+    private int getNodeLevel() {
+        return _node_level;
+    }
+
+    private StringBuffer getOutput( final Phylogeny tree ) throws IOException {
+        if ( getOutputFormt() == FORMAT.PHYLO_XML ) {
+            throw new RuntimeException( "method inappropriately called" );
+        }
+        if ( tree != null ) {
+            reset( tree );
+            while ( isHasNext() ) {
+                next();
+            }
+            if ( getOutputFormt() == FORMAT.NH ) {
+                getBuffer().append( ';' );
+            }
+            return getBuffer();
+        }
+        else {
+            return new StringBuffer( 0 );
+        }
+    }
+
+    private FORMAT getOutputFormt() {
+        return _format;
+    }
+
+    private int getPhyloXmlLevel() {
+        return _phyloxml_level;
+    }
+
+    private PhylogenyNode getRoot() {
+        return _root;
+    }
+
+    private Stack<PostOrderStackObject> getStack() {
+        return _stack;
+    }
+
+    private Writer getWriter() {
+        return _writer;
+    }
+
+    private void increaseNodeLevel() {
+        ++_node_level;
+    }
+
+    private boolean isHasNext() {
+        return _has_next;
+    }
+
+    private boolean isIndentPhyloxml() {
+        return _indent_phyloxml;
+    }
+
+    private boolean isSawComma() {
+        return _saw_comma;
+    }
+
+    private boolean isSimpleNH() {
+        return _simple_nh;
+    }
+
+    private boolean isWriteDistanceToParentInNH() {
+        return _nh_write_distance_to_parent;
+    }
+
+    private void next() throws IOException {
+        while ( true ) {
+            final PostOrderStackObject si = getStack().pop();
+            final PhylogenyNode node = si.getNode();
+            final int phase = si.getPhase();
+            if ( phase > node.getNumberOfDescendants() ) {
+                setHasNext( node != getRoot() );
+                if ( ( getOutputFormt() != FORMAT.PHYLO_XML ) || node.isExternal() ) {
+                    if ( !node.isRoot() && node.isFirstChildNode() ) {
+                        increaseNodeLevel();
+                    }
+                    if ( getOutputFormt() == FORMAT.PHYLO_XML ) {
+                        writeNode( node, createIndentation() );
+                    }
+                    else {
+                        writeNode( node, null );
+                    }
+                }
+                if ( !node.isRoot() ) {
+                    if ( !node.isLastChildNode() ) {
+                        writeCladeSeparator();
+                    }
+                    else {
+                        writeCloseClade();
+                    }
+                }
+                return;
+            }
+            else {
+                getStack().push( new PostOrderStackObject( node, ( phase + 1 ) ) );
+                if ( node.isInternal() ) {
+                    getStack().push( new PostOrderStackObject( node.getChildNode( phase - 1 ), 1 ) );
+                    writeOpenClade( node );
+                    if ( getOutputFormt() == FORMAT.PHYLO_XML ) {
+                        if ( phase == 1 ) {
+                            writeNode( node, createIndentation() );
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    private void reset( final Phylogeny tree ) {
+        setBuffer( new StringBuffer() );
+        setWriter( null );
+        setSawComma( false );
+        setHasNext( true );
+        setRoot( tree.getRoot() );
+        setStack( new Stack<PostOrderStackObject>() );
+        getStack().push( new PostOrderStackObject( tree.getRoot(), 1 ) );
+        setNodeLevel( 1 );
+    }
+
+    private void reset( final Writer writer, final Phylogeny tree ) {
+        setBuffer( null );
+        setWriter( writer );
+        setSawComma( false );
+        setHasNext( true );
+        setRoot( tree.getRoot() );
+        setStack( new Stack<PostOrderStackObject>() );
+        getStack().push( new PostOrderStackObject( tree.getRoot(), 1 ) );
+        setNodeLevel( 1 );
+    }
+
+    private void setBuffer( final StringBuffer buffer ) {
+        _buffer = buffer;
+    }
+
+    private void setHasNext( final boolean has_next ) {
+        _has_next = has_next;
+    }
+
+    public void setIndentPhyloxml( final boolean indent_phyloxml ) {
+        _indent_phyloxml = indent_phyloxml;
+    }
+
+    private void setNodeLevel( final int level ) {
+        _node_level = level;
+    }
+
+    private void setOutputFormt( final FORMAT format ) {
+        _format = format;
+    }
+
+    private void setPhyloXmlLevel( final int phyloxml_level ) {
+        _phyloxml_level = phyloxml_level;
+    }
+
+    private void setRoot( final PhylogenyNode root ) {
+        _root = root;
+    }
+
+    private void setSawComma( final boolean saw_comma ) {
+        _saw_comma = saw_comma;
+    }
+
+    private void setSimpleNH( final boolean simple_nh ) {
+        _simple_nh = simple_nh;
+    }
+
+    private void setStack( final Stack<PostOrderStackObject> stack ) {
+        _stack = stack;
+    }
+
+    private void setWriteDistanceToParentInNH( final boolean nh_write_distance_to_parent ) {
+        _nh_write_distance_to_parent = nh_write_distance_to_parent;
+    }
+
+    private void setWriter( final Writer writer ) {
+        _writer = writer;
+    }
+
+    public void toNewHampshire( final List<Phylogeny> trees,
+                                final boolean simple_nh,
+                                final boolean write_distance_to_parent,
+                                final File out_file,
+                                final String separator ) throws IOException {
+        final Iterator<Phylogeny> it = trees.iterator();
+        final StringBuffer sb = new StringBuffer();
+        while ( it.hasNext() ) {
+            sb.append( toNewHampshire( it.next(), simple_nh, write_distance_to_parent ) );
+            sb.append( separator );
+        }
+        writeToFile( sb, out_file );
+    }
+
+    public StringBuffer toNewHampshire( final Phylogeny tree,
+                                        final boolean simple_nh,
+                                        final boolean nh_write_distance_to_parent ) throws IOException {
+        setOutputFormt( FORMAT.NH );
+        setSimpleNH( simple_nh );
+        setWriteDistanceToParentInNH( nh_write_distance_to_parent );
+        return getOutput( tree );
+    }
+
+    public void toNewHampshire( final Phylogeny tree,
+                                final boolean simple_nh,
+                                final boolean write_distance_to_parent,
+                                final File out_file ) throws IOException {
+        writeToFile( toNewHampshire( tree, simple_nh, write_distance_to_parent ), out_file );
+    }
+
+    public void toNewHampshire( final Phylogeny[] trees,
+                                final boolean simple_nh,
+                                final boolean write_distance_to_parent,
+                                final File out_file,
+                                final String separator ) throws IOException {
+        final StringBuffer sb = new StringBuffer();
+        for( final Phylogeny element : trees ) {
+            sb.append( toNewHampshire( element, simple_nh, write_distance_to_parent ) );
+            sb.append( separator );
+        }
+        writeToFile( sb, out_file );
+    }
+
+    public void toNewHampshireX( final List<Phylogeny> trees, final File out_file, final String separator )
+            throws IOException {
+        final Iterator<Phylogeny> it = trees.iterator();
+        final StringBuffer sb = new StringBuffer();
+        while ( it.hasNext() ) {
+            sb.append( toNewHampshireX( it.next() ) );
+            sb.append( separator );
+        }
+        writeToFile( sb, out_file );
+    }
+
+    public StringBuffer toNewHampshireX( final Phylogeny tree ) throws IOException {
+        setOutputFormt( FORMAT.NHX );
+        return getOutput( tree );
+    }
+
+    public void toNewHampshireX( final Phylogeny tree, final File out_file ) throws IOException {
+        writeToFile( toNewHampshireX( tree ), out_file );
+    }
+
+    public void toNewHampshireX( final Phylogeny[] trees, final File out_file, final String separator )
+            throws IOException {
+        final StringBuffer sb = new StringBuffer();
+        for( final Phylogeny element : trees ) {
+            sb.append( toNewHampshireX( element ) );
+            sb.append( separator );
+        }
+        writeToFile( sb, out_file );
+    }
+
+    public void toNexus( final File out_file, final List<Phylogeny> trees ) throws IOException {
+        final Writer writer = new BufferedWriter( new PrintWriter( out_file ) );
+        writeNexusStart( writer );
+        writeNexusTaxaBlock( writer, trees.get( 0 ) );
+        writeNexusTreesBlock( writer, trees );
+        writer.flush();
+        writer.close();
+    }
+
+    public void toNexus( final File out_file, final Phylogeny tree ) throws IOException {
+        final Writer writer = new BufferedWriter( new PrintWriter( out_file ) );
+        final List<Phylogeny> trees = new ArrayList<Phylogeny>( 1 );
+        trees.add( tree );
+        writeNexusStart( writer );
+        writeNexusTaxaBlock( writer, tree );
+        writeNexusTreesBlock( writer, trees );
+        writer.flush();
+        writer.close();
+    }
+
+    public StringBuffer toNexus( final Phylogeny tree ) throws IOException {
+        final StringWriter string_writer = new StringWriter();
+        final Writer writer = new BufferedWriter( string_writer );
+        final List<Phylogeny> trees = new ArrayList<Phylogeny>( 1 );
+        trees.add( tree );
+        writeNexusStart( writer );
+        writeNexusTaxaBlock( writer, tree );
+        writeNexusTreesBlock( writer, trees );
+        writer.flush();
+        writer.close();
+        return string_writer.getBuffer();
+    }
+
+    public void toPhyloXML( final File out_file,
+                            final List<Phylogeny> trees,
+                            final int phyloxml_level,
+                            final String separator ) throws IOException {
+        final Writer writer = new BufferedWriter( new PrintWriter( out_file ) );
+        toPhyloXML( writer, trees, phyloxml_level, separator );
+        writer.flush();
+        writer.close();
+    }
+
+    public void toPhyloXML( final File out_file, final Phylogeny tree, final int phyloxml_level ) throws IOException {
+        final Writer writer = new BufferedWriter( new PrintWriter( out_file ) );
+        writePhyloXmlStart( writer );
+        toPhyloXMLNoPhyloXmlSource( writer, tree, phyloxml_level );
+        writePhyloXmlEnd( writer );
+        writer.flush();
+        writer.close();
+    }
+
+    public StringBuffer toPhyloXML( final Phylogeny tree, final int phyloxml_level ) throws IOException {
+        final StringWriter string_writer = new StringWriter();
+        final Writer writer = new BufferedWriter( string_writer );
+        setPhyloXmlLevel( phyloxml_level );
+        setOutputFormt( FORMAT.PHYLO_XML );
+        writePhyloXmlStart( writer );
+        writeOutput( writer, tree );
+        writePhyloXmlEnd( writer );
+        writer.flush();
+        writer.close();
+        return string_writer.getBuffer();
+    }
+
+    public void toPhyloXML( final Phylogeny[] trees,
+                            final int phyloxml_level,
+                            final File out_file,
+                            final String separator ) throws IOException {
+        final Writer writer = new BufferedWriter( new PrintWriter( out_file ) );
+        toPhyloXML( writer, trees, phyloxml_level, separator );
+        writer.flush();
+        writer.close();
+    }
+
+    public void toPhyloXML( final Writer writer,
+                            final List<Phylogeny> trees,
+                            final int phyloxml_level,
+                            final String separator ) throws IOException {
+        writePhyloXmlStart( writer );
+        final Iterator<Phylogeny> it = trees.iterator();
+        while ( it.hasNext() ) {
+            toPhyloXMLNoPhyloXmlSource( writer, it.next(), phyloxml_level );
+            writer.write( separator );
+        }
+        writePhyloXmlEnd( writer );
+    }
+
+    public void toPhyloXML( final Writer writer, final Phylogeny tree, final int phyloxml_level ) throws IOException {
+        setPhyloXmlLevel( phyloxml_level );
+        setOutputFormt( FORMAT.PHYLO_XML );
+        writePhyloXmlStart( writer );
+        writeOutput( writer, tree );
+        writePhyloXmlEnd( writer );
+    }
+
+    public void toPhyloXML( final Writer writer,
+                            final Phylogeny[] trees,
+                            final int phyloxml_level,
+                            final String separator ) throws IOException {
+        writePhyloXmlStart( writer );
+        for( final Phylogeny phylogeny : trees ) {
+            toPhyloXMLNoPhyloXmlSource( writer, phylogeny, phyloxml_level );
+            writer.write( separator );
+        }
+        writePhyloXmlEnd( writer );
+    }
+
+    private void toPhyloXMLNoPhyloXmlSource( final Writer writer, final Phylogeny tree, final int phyloxml_level )
+            throws IOException {
+        setPhyloXmlLevel( phyloxml_level );
+        setOutputFormt( FORMAT.PHYLO_XML );
+        writeOutput( writer, tree );
+    }
+
+    private void writeCladeSeparator() {
+        setSawComma( true );
+        if ( ( getOutputFormt() == FORMAT.NHX ) || ( getOutputFormt() == FORMAT.NH ) ) {
+            getBuffer().append( "," );
+        }
+    }
+
+    private void writeCloseClade() throws IOException {
+        decreaseNodeLevel();
+        if ( getOutputFormt() == FORMAT.PHYLO_XML ) {
+            getWriter().write( ForesterUtil.LINE_SEPARATOR );
+            if ( isIndentPhyloxml() ) {
+                getWriter().write( createIndentation().toString() );
+            }
+            PhylogenyDataUtil.appendClose( getWriter(), PhyloXmlMapping.CLADE );
+        }
+        else if ( ( getOutputFormt() == FORMAT.NHX ) || ( getOutputFormt() == FORMAT.NH ) ) {
+            getBuffer().append( ")" );
+        }
+    }
+
+    private void writeNode( final PhylogenyNode node, final StringBuffer indentation ) throws IOException {
+        if ( getOutputFormt() == FORMAT.PHYLO_XML ) {
+            if ( node.isExternal() ) {
+                getWriter().write( ForesterUtil.LINE_SEPARATOR );
+                if ( indentation != null ) {
+                    getWriter().write( indentation.toString() );
+                }
+                PhylogenyDataUtil.appendOpen( getWriter(), PhyloXmlMapping.CLADE );
+            }
+            if ( indentation != null ) {
+                PhyloXmlNodeWriter.toPhyloXml( getWriter(), node, getPhyloXmlLevel(), indentation.toString() );
+            }
+            else {
+                PhyloXmlNodeWriter.toPhyloXml( getWriter(), node, getPhyloXmlLevel(), "" );
+            }
+            if ( node.isExternal() ) {
+                getWriter().write( ForesterUtil.LINE_SEPARATOR );
+                if ( indentation != null ) {
+                    getWriter().write( indentation.toString() );
+                }
+                PhylogenyDataUtil.appendClose( getWriter(), PhyloXmlMapping.CLADE );
+            }
+        }
+        else if ( getOutputFormt() == FORMAT.NHX ) {
+            getBuffer().append( node.toNewHampshireX() );
+        }
+        else if ( getOutputFormt() == FORMAT.NH ) {
+            getBuffer().append( node.toNewHampshire( isSimpleNH(), isWriteDistanceToParentInNH() ) );
+        }
+    }
+
+    private void writeOpenClade( final PhylogenyNode node ) throws IOException {
+        if ( !isSawComma() ) {
+            if ( !node.isRoot() && node.isFirstChildNode() ) {
+                increaseNodeLevel();
+            }
+            if ( getOutputFormt() == FORMAT.PHYLO_XML ) {
+                getWriter().write( ForesterUtil.LINE_SEPARATOR );
+                if ( isIndentPhyloxml() ) {
+                    getWriter().write( createIndentation().toString() );
+                }
+                PhylogenyDataUtil.appendOpen( getWriter(), PhyloXmlMapping.CLADE );
+            }
+            else if ( ( getOutputFormt() == FORMAT.NHX ) || ( getOutputFormt() == FORMAT.NH ) ) {
+                getBuffer().append( "(" );
+            }
+        }
+        setSawComma( false );
+    }
+
+    private void writeOutput( final Writer writer, final Phylogeny tree ) throws IOException {
+        if ( getOutputFormt() != FORMAT.PHYLO_XML ) {
+            throw new RuntimeException( "method inappropriately called" );
+        }
+        if ( tree != null ) {
+            reset( writer, tree );
+            boolean rerootable = true;
+            String unit = "";
+            String type = "";
+            String rooted = "false";
+            if ( tree.isRooted() ) {
+                rooted = "true";
+            }
+            if ( !tree.isRerootable() ) {
+                rerootable = false;
+            }
+            if ( !ForesterUtil.isEmpty( tree.getDistanceUnit() ) ) {
+                unit = tree.getDistanceUnit();
+            }
+            if ( !ForesterUtil.isEmpty( tree.getType() ) ) {
+                type = tree.getType();
+            }
+            if ( rerootable ) {
+                PhylogenyDataUtil.appendOpen( writer,
+                                              PhyloXmlMapping.PHYLOGENY,
+                                              PhyloXmlMapping.PHYLOGENY_IS_ROOTED_ATTR,
+                                              rooted,
+                                              PhyloXmlMapping.PHYLOGENY_BRANCHLENGTH_UNIT_ATTR,
+                                              unit,
+                                              PhyloXmlMapping.PHYLOGENY_TYPE_ATTR,
+                                              type );
+            }
+            else {
+                PhylogenyDataUtil.appendOpen( writer,
+                                              PhyloXmlMapping.PHYLOGENY,
+                                              PhyloXmlMapping.PHYLOGENY_IS_ROOTED_ATTR,
+                                              rooted,
+                                              PhyloXmlMapping.PHYLOGENY_BRANCHLENGTH_UNIT_ATTR,
+                                              unit,
+                                              PhyloXmlMapping.PHYLOGENY_TYPE_ATTR,
+                                              type,
+                                              PhyloXmlMapping.PHYLOGENY_IS_REROOTABLE_ATTR,
+                                              "false" );
+            }
+            appendPhylogenyLevelPhyloXml( writer, tree );
+            while ( isHasNext() ) {
+                next();
+            }
+            writer.write( ForesterUtil.LINE_SEPARATOR );
+            PhylogenyDataUtil.appendClose( writer, PhyloXmlMapping.PHYLOGENY );
+        }
+    }
+
+    private void writeToFile( final StringBuffer sb, final File out_file ) throws IOException {
+        if ( out_file.exists() ) {
+            throw new IOException( "attempt to overwrite existing file \"" + out_file.getAbsolutePath() + "\"" );
+        }
+        final PrintWriter out = new PrintWriter( new FileWriter( out_file ), true );
+        if ( getOutputFormt() == FORMAT.PHYLO_XML ) {
+            out.print( PHYLO_XML_VERSION_ENCODING_LINE );
+            out.print( ForesterUtil.LINE_SEPARATOR );
+            out.print( PHYLO_XML_NAMESPACE_LINE );
+            out.print( ForesterUtil.LINE_SEPARATOR );
+        }
+        out.print( sb );
+        if ( getOutputFormt() == FORMAT.PHYLO_XML ) {
+            out.print( ForesterUtil.LINE_SEPARATOR );
+            out.print( PHYLO_XML_END );
+        }
+        out.flush();
+        out.close();
+    }
+
+    public static PhylogenyWriter createPhylogenyWriter() {
+        return new PhylogenyWriter();
+    }
+
+    private static void writeNexusStart( final Writer writer ) throws IOException {
+        writer.write( NexusConstants.NEXUS );
+        writer.write( ForesterUtil.LINE_SEPARATOR );
+    }
+
+    public static void writeNexusTaxaBlock( final Writer writer, final Phylogeny tree ) throws IOException {
+        writer.write( NexusConstants.BEGIN_TAXA );
+        writer.write( ForesterUtil.LINE_SEPARATOR );
+        writer.write( " " );
+        writer.write( NexusConstants.DIMENSIONS );
+        writer.write( " " );
+        writer.write( NexusConstants.NTAX );
+        writer.write( "=" );
+        writer.write( String.valueOf( tree.getNumberOfExternalNodes() ) );
+        writer.write( ";" );
+        writer.write( ForesterUtil.LINE_SEPARATOR );
+        writer.write( " " );
+        writer.write( NexusConstants.TAXLABELS );
+        for( final PhylogenyNodeIterator it = tree.iteratorExternalForward(); it.hasNext(); ) {
+            final PhylogenyNode node = it.next();
+            writer.write( " " );
+            String data = "";
+            if ( !ForesterUtil.isEmpty( node.getName() ) ) {
+                data = node.getName();
+            }
+            else if ( node.getNodeData().isHasTaxonomy() ) {
+                if ( !ForesterUtil.isEmpty( node.getNodeData().getTaxonomy().getTaxonomyCode() ) ) {
+                    data = node.getNodeData().getTaxonomy().getTaxonomyCode();
+                }
+                else if ( !ForesterUtil.isEmpty( node.getNodeData().getTaxonomy().getScientificName() ) ) {
+                    data = node.getNodeData().getTaxonomy().getScientificName();
+                }
+                else if ( !ForesterUtil.isEmpty( node.getNodeData().getTaxonomy().getCommonName() ) ) {
+                    data = node.getNodeData().getTaxonomy().getCommonName();
+                }
+                else if ( node.getNodeData().getTaxonomy().getTaxonomyCode() != null ) {
+                    data = node.getNodeData().getTaxonomy().getTaxonomyCode();
+                }
+            }
+            else if ( node.getNodeData().isHasSequence() ) {
+                if ( !ForesterUtil.isEmpty( node.getNodeData().getSequence().getName() ) ) {
+                    data = node.getNodeData().getSequence().getName();
+                }
+            }
+            if ( data.length() > 0 ) {
+                data = data.replaceAll( " ", "_" );
+            }
+            writer.write( data );
+        }
+        writer.write( ";" );
+        writer.write( ForesterUtil.LINE_SEPARATOR );
+        writer.write( NexusConstants.END );
+        writer.write( ForesterUtil.LINE_SEPARATOR );
+    }
+
+    public static void writeNexusTreesBlock( final Writer writer, final List<Phylogeny> trees ) throws IOException {
+        writer.write( NexusConstants.BEGIN_TREES );
+        writer.write( ForesterUtil.LINE_SEPARATOR );
+        int i = 1;
+        for( final Phylogeny phylogeny : trees ) {
+            writer.write( " " );
+            writer.write( NexusConstants.TREE );
+            writer.write( " " );
+            if ( !ForesterUtil.isEmpty( phylogeny.getName() ) ) {
+                writer.write( "\'" );
+                writer.write( phylogeny.getName() );
+                writer.write( "\'" );
+            }
+            else {
+                writer.write( "tree" );
+                writer.write( String.valueOf( i ) );
+            }
+            writer.write( "=" );
+            if ( phylogeny.isRooted() ) {
+                writer.write( "[&R]" );
+            }
+            else {
+                writer.write( "[&U]" );
+            }
+            writer.write( phylogeny.toNewHampshire( false ) );
+            writer.write( ForesterUtil.LINE_SEPARATOR );
+            i++;
+        }
+        writer.write( NexusConstants.END );
+        writer.write( ForesterUtil.LINE_SEPARATOR );
+    }
+
+    private static void writePhyloXmlEnd( final Writer writer ) throws IOException {
+        writer.write( ForesterUtil.LINE_SEPARATOR );
+        writer.write( PhylogenyWriter.PHYLO_XML_END );
+    }
+
+    private static void writePhyloXmlStart( final Writer writer ) throws IOException {
+        writer.write( PhylogenyWriter.PHYLO_XML_VERSION_ENCODING_LINE );
+        writer.write( ForesterUtil.LINE_SEPARATOR );
+        writer.write( PhylogenyWriter.PHYLO_XML_NAMESPACE_LINE );
+        writer.write( ForesterUtil.LINE_SEPARATOR );
+    }
+
+    public static enum FORMAT {
+        NH, NHX, PHYLO_XML, NEXUS;
+    }
+}
diff --git a/forester/java/src/org/forester/io/writers/SequenceWriter.java b/forester/java/src/org/forester/io/writers/SequenceWriter.java

new file mode 100644 (file)

index 0000000..b4135d1
--- /dev/null
+++ b/forester/java/src/org/forester/io/writers/SequenceWriter.java
@@ -0,0 +1,96 @@
+
+package org.forester.io.writers;
+
+import java.io.IOException;
+import java.io.Writer;
+import java.util.List;
+
+import org.forester.sequence.BasicSequence;
+import org.forester.sequence.Sequence;
+import org.forester.util.ForesterUtil;
+
+public class SequenceWriter {
+
+    public static enum SEQ_FORMAT {
+        FASTA;
+    }
+
+    public static void main( final String[] args ) {
+        final Sequence s = BasicSequence.createAaSequence( "name", "abcdefghiiklmnap" );
+        System.out.println( s.toString() );
+        System.out.println( SequenceWriter.toFasta( s, 0 ).toString() );
+        System.out.println( SequenceWriter.toFasta( s, 5 ).toString() );
+        System.out.println( SequenceWriter.toFasta( s, 8 ).toString() );
+        System.out.println( SequenceWriter.toFasta( s, 4 ).toString() );
+        System.out.println( SequenceWriter.toFasta( s, 3 ).toString() );
+        System.out.println( SequenceWriter.toFasta( s, 2 ).toString() );
+        System.out.println( SequenceWriter.toFasta( s, 1 ).toString() );
+        System.out.println( SequenceWriter.toFasta( s, 100 ).toString() );
+        System.out.println( SequenceWriter.toFasta( s, 15 ).toString() );
+        System.out.println( SequenceWriter.toFasta( s, 16 ).toString() );
+    }
+
+    public static StringBuilder toFasta( final Sequence seq, final int width ) {
+        final StringBuilder sb = new StringBuilder();
+        sb.append( ">" );
+        sb.append( seq.getIdentifier().toString() );
+        sb.append( ForesterUtil.LINE_SEPARATOR );
+        if ( ( width < 1 ) || ( width >= seq.getLength() ) ) {
+            sb.append( seq.getMolecularSequence() );
+        }
+        else {
+            final int lines = seq.getLength() / width;
+            final int rest = seq.getLength() - ( lines * width );
+            for( int i = 0; i < lines; ++i ) {
+                sb.append( seq.getMolecularSequence(), i * width, width );
+                if ( i < ( lines - 1 ) ) {
+                    sb.append( ForesterUtil.LINE_SEPARATOR );
+                }
+            }
+            if ( rest > 0 ) {
+                sb.append( ForesterUtil.LINE_SEPARATOR );
+                sb.append( seq.getMolecularSequence(), lines * width, rest );
+            }
+        }
+        return sb;
+    }
+
+    public static void toFasta( final Sequence seq, final Writer w, final int width ) throws IOException {
+        w.write( ">" );
+        w.write( seq.getIdentifier().toString() );
+        w.write( ForesterUtil.LINE_SEPARATOR );
+        if ( ( width < 1 ) || ( width >= seq.getLength() ) ) {
+            w.write( seq.getMolecularSequence() );
+        }
+        else {
+            final int lines = seq.getLength() / width;
+            final int rest = seq.getLength() - ( lines * width );
+            for( int i = 0; i < lines; ++i ) {
+                w.write( seq.getMolecularSequence(), i * width, width );
+                if ( i < ( lines - 1 ) ) {
+                    w.write( ForesterUtil.LINE_SEPARATOR );
+                }
+            }
+            if ( rest > 0 ) {
+                w.write( ForesterUtil.LINE_SEPARATOR );
+                w.write( seq.getMolecularSequence(), lines * width, rest );
+            }
+        }
+    }
+
+    public static void writeSeqs( final List<Sequence> seqs,
+                                  final Writer writer,
+                                  final SEQ_FORMAT format,
+                                  final int width ) throws IOException {
+        switch ( format ) {
+            case FASTA:
+                for( final Sequence s : seqs ) {
+                    toFasta( s, writer, width );
+                    writer.write( ForesterUtil.LINE_SEPARATOR );
+                }
+                break;
+            default:
+                throw new RuntimeException( "unknown format " + format );
+        }
+    }
+}
diff --git a/forester/java/src/org/forester/msa/BasicMsa.java b/forester/java/src/org/forester/msa/BasicMsa.java

new file mode 100644 (file)

index 0000000..6e407f8
--- /dev/null
+++ b/forester/java/src/org/forester/msa/BasicMsa.java
@@ -0,0 +1,156 @@
+// $Id:
+// forester -- software libraries and applications
+// for genomics and evolutionary biology research.
+//
+// Copyright (C) 2010 Christian M Zmasek
+// Copyright (C) 2010 Sanford-Burnham Medical Research Institute
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.msa;
+
+import java.io.IOException;
+import java.io.Writer;
+import java.util.List;
+
+import org.forester.sequence.Sequence;
+import org.forester.sequence.Sequence.TYPE;
+import org.forester.util.ForesterUtil;
+
+public class BasicMsa implements Msa {
+
+    private final char[][] _data;
+    private final Object[] _identifiers;
+    private final TYPE     _type;
+
+    public BasicMsa( final int rows, final int columns, final TYPE type ) {
+        if ( ( rows < 1 ) || ( columns < 1 ) ) {
+            throw new IllegalArgumentException( "basic msa of size zero are illegal" );
+        }
+        _data = new char[ rows ][ columns ];
+        _identifiers = new Object[ rows ];
+        _type = type;
+    }
+
+    BasicMsa( final BasicMsa msa ) {
+        _data = msa._data;
+        _identifiers = msa._identifiers;
+        _type = msa._type;
+    }
+
+    private int determineMaxIdLength() {
+        int max = 0;
+        for( int row = 0; row < _data.length; ++row ) {
+            final int l = _identifiers[ row ].toString().length();
+            if ( l > max ) {
+                max = l;
+            }
+        }
+        return max;
+    }
+
+    @Override
+    public Object getIdentifier( final int row ) {
+        return _identifiers[ row ];
+    }
+
+    @Override
+    public int getLength() {
+        return _data[ 0 ].length;
+    }
+
+    @Override
+    public int getNumberOfSequences() {
+        return _identifiers.length;
+    }
+
+    @Override
+    public char getResidueAt( final int row, final int col ) {
+        return _data[ row ][ col ];
+    }
+
+    @Override
+    public StringBuffer getSequenceAsString( final int row ) {
+        final StringBuffer sb = new StringBuffer( _data[ 0 ].length );
+        for( int col = 0; col < _data[ 0 ].length; ++col ) {
+            sb.append( getResidueAt( row, col ) );
+        }
+        return sb;
+    }
+
+    @Override
+    public TYPE getType() {
+        return _type;
+    }
+
+    public void setIdentifier( final int row, final Object id ) {
+        _identifiers[ row ] = id;
+    }
+
+    public void setResidueAt( final int row, final int col, final char residue ) {
+        _data[ row ][ col ] = residue;
+    }
+
+    @Override
+    public String toString() {
+        final int max = determineMaxIdLength() + 1;
+        final StringBuffer sb = new StringBuffer();
+        for( int row = 0; row < _data.length; ++row ) {
+            sb.append( ForesterUtil.pad( _identifiers[ row ].toString(), max, ' ', false ) );
+            for( int col = 0; col < _data[ 0 ].length; ++col ) {
+                sb.append( getResidueAt( row, col ) );
+            }
+            sb.append( ForesterUtil.LINE_SEPARATOR );
+        }
+        return sb.toString();
+    }
+
+    public void write( final Writer w ) throws IOException {
+        final int max = determineMaxIdLength() + 1;
+        for( int row = 0; row < _data.length; ++row ) {
+            w.write( ForesterUtil.pad( _identifiers[ row ].toString(), max, ' ', false ).toString() );
+            for( int col = 0; col < _data[ 0 ].length; ++col ) {
+                w.write( getResidueAt( row, col ) );
+            }
+            w.write( ForesterUtil.LINE_SEPARATOR );
+        }
+    }
+
+    public static Msa createInstance( final List<Sequence> seqs ) {
+        if ( seqs.size() < 1 ) {
+            throw new IllegalArgumentException( "cannot create basic msa from less than one sequence" );
+        }
+        final int length = seqs.get( 0 ).getLength();
+        final BasicMsa msa = new BasicMsa( seqs.size(), length, seqs.get( 0 ).getType() );
+        for( int row = 0; row < seqs.size(); ++row ) {
+            final Sequence seq = seqs.get( row );
+            if ( seq.getLength() != length ) {
+                throw new IllegalArgumentException( "illegal attempt to build msa from sequences of unequal length" );
+            }
+            if ( seq.getType() != msa.getType() ) {
+                throw new IllegalArgumentException( "illegal attempt to build msa from sequences of different type" );
+            }
+            msa.setIdentifier( row, seq.getIdentifier() );
+            for( int col = 0; col < length; ++col ) {
+                msa._data[ row ][ col ] = seq.getResidueAt( col );
+            }
+        }
+        return msa;
+    }
+}
diff --git a/forester/java/src/org/forester/msa/Mafft.java b/forester/java/src/org/forester/msa/Mafft.java

new file mode 100644 (file)

index 0000000..644b5b5
--- /dev/null
+++ b/forester/java/src/org/forester/msa/Mafft.java
@@ -0,0 +1,124 @@
+// $Id:
+// forester -- software libraries and applications
+// for genomics and evolutionary biology research.
+//
+// Copyright (C) 2010 Christian M Zmasek
+// Copyright (C) 2010 Sanford-Burnham Medical Research Institute
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.msa;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.forester.io.parsers.FastaParser;
+import org.forester.util.ForesterUtil;
+import org.forester.util.SystemCommandExecutor;
+
+public final class Mafft implements MsaInferrer {
+
+    private final static String DEFAULT_PARAMETERS = "--maxiterate 1000 --localpair";
+    private String              _error;
+    private int                 _exit_code;
+    private final String        _path_to_prg;
+
+    public static MsaInferrer createInstance( final String path_to_prg ) {
+        return new Mafft( path_to_prg );
+    }
+
+    private static String getPathToCmd() {
+        //TODO this needs to come from env variable, etc.
+        String path = "";
+        final String os = ForesterUtil.OS_NAME.toLowerCase();
+        if ( ( os.indexOf( "mac" ) >= 0 ) && ( os.indexOf( "os" ) > 0 ) ) {
+            path = "/usr/local/bin/mafft";
+        }
+        else if ( os.indexOf( "win" ) >= 0 ) {
+            path = "C:\\Program Files\\mafft-win\\mafft.bat";
+        }
+        else {
+            path = "/home/czmasek/SOFTWARE/MSA/MAFFT/mafft-6.832-without-extensions/scripts/mafft";
+        }
+        return path;
+    }
+
+    public static boolean isInstalled() {
+        return SystemCommandExecutor.isExecuteableFile( new File( getPathToCmd() ) );
+    }
+
+    public static MsaInferrer createInstance() {
+        return createInstance( getPathToCmd() );
+    }
+
+    private Mafft( final String path_to_prg ) {
+        if ( !SystemCommandExecutor.isExecuteableFile( new File( path_to_prg ) ) ) {
+            throw new IllegalArgumentException( "cannot execute MAFFT via [" + path_to_prg + "]" );
+        }
+        _path_to_prg = new String( path_to_prg );
+        init();
+    }
+
+    public static String getDefaultParameters() {
+        return DEFAULT_PARAMETERS;
+    }
+
+    @Override
+    public Object clone() {
+        throw new NoSuchMethodError();
+    }
+
+    public String getErrorDescription() {
+        return _error;
+    }
+
+    public int getExitCode() {
+        return _exit_code;
+    }
+
+    public Msa infer( final File path_to_input_seqs, final List<String> opts ) throws IOException, InterruptedException {
+        init();
+        final List<String> my_opts = new ArrayList<String>();
+        my_opts.add( _path_to_prg );
+        for( int i = 0; i < opts.size(); i++ ) {
+            my_opts.add( opts.get( i ) );
+        }
+        my_opts.add( path_to_input_seqs.getAbsolutePath() );
+        final SystemCommandExecutor commandExecutor = new SystemCommandExecutor( my_opts );
+        final int _exit_code = commandExecutor.executeCommand();
+        if ( _exit_code != 0 ) {
+            throw new IOException( "MAFFT failed, exit code: " + _exit_code );
+        }
+        final StringBuilder stdout = commandExecutor.getStandardOutputFromCommand();
+        final StringBuilder stderr = commandExecutor.getStandardErrorFromCommand();
+        System.out.println( stdout );
+        System.out.println();
+        System.out.println( stderr );
+        _error = stderr.toString();
+        final Msa msa = FastaParser.parseMsa( stdout.toString() );
+        return msa;
+    }
+
+    private void init() {
+        _error = null;
+        _exit_code = -100;
+    }
+}
diff --git a/forester/java/src/org/forester/msa/MafftOLD.java b/forester/java/src/org/forester/msa/MafftOLD.java

new file mode 100644 (file)

index 0000000..323d94a
--- /dev/null
+++ b/forester/java/src/org/forester/msa/MafftOLD.java
@@ -0,0 +1,78 @@
+
+package org.forester.msa;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.util.List;
+
+import org.forester.io.parsers.FastaParser;
+import org.forester.util.ExternalProgram;
+import org.forester.util.ForesterUtil;
+
+public final class MafftOLD implements MsaInferrer {
+
+    private String       _error;
+    private int          _exit_code;
+    private final String _path_to_prg;
+
+    public static MsaInferrer createInstance( final String path_to_prg ) {
+        return new MafftOLD( path_to_prg );
+    }
+
+    private MafftOLD( final String path_to_prg ) {
+        _path_to_prg = new String( path_to_prg );
+        init();
+    }
+
+    @Override
+    public Object clone() {
+        throw new NoSuchMethodError();
+    }
+
+    public String getErrorDescription() {
+        return _error;
+    }
+
+    public int getExitCode() {
+        return _exit_code;
+    }
+
+    public Msa infer( final File path_to_input_seqs, final List<String> opts ) throws IOException, InterruptedException {
+        init();
+        final String[] my_opts = new String[ opts.size() + 1 ];
+        for( int i = 0; i < opts.size(); i++ ) {
+            my_opts[ i ] = opts.get( i );
+        }
+        my_opts[ opts.size() ] = path_to_input_seqs.getAbsolutePath();
+        final ExternalProgram mafft_prg = new ExternalProgram( _path_to_prg );
+        mafft_prg.launch( my_opts );
+        // _exit_code = mafft_prg.waitFor();
+        // if ( _exit_code != 0 ) {
+        //    throw new IOException( "MAFFT failed, exit code: " + _exit_code );
+        // }
+        final BufferedReader r = new BufferedReader( new InputStreamReader( mafft_prg.getErrorStream() ) );
+        final StringBuffer error_sb = new StringBuffer();
+        String line = null;
+        while ( ( line = r.readLine() ) != null ) {
+            error_sb.append( line );
+            error_sb.append( ForesterUtil.LINE_SEPARATOR );
+        }
+        r.close();
+        if ( error_sb.length() > 0 ) {
+            _error = error_sb.toString();
+            throw new IOException( "MAFFT failed" );
+        }
+        final InputStream is = mafft_prg.getInputStream();
+        final Msa msa = FastaParser.parseMsa( is );
+        is.close();
+        return msa;
+    }
+
+    private void init() {
+        _error = null;
+        _exit_code = -100;
+    }
+}
diff --git a/forester/java/src/org/forester/msa/Msa.java b/forester/java/src/org/forester/msa/Msa.java

new file mode 100644 (file)

index 0000000..2867989
--- /dev/null
+++ b/forester/java/src/org/forester/msa/Msa.java
@@ -0,0 +1,52 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.msa;
+
+import java.io.IOException;
+import java.io.Writer;
+
+import org.forester.sequence.Sequence.TYPE;
+
+public interface Msa {
+
+    public Object getIdentifier( int row );
+
+    public void setIdentifier( int row, Object identifier );
+
+    public int getLength();
+
+    public int getNumberOfSequences();
+
+    public char getResidueAt( int row, int col );
+
+    public StringBuffer getSequenceAsString( int row );
+
+    public abstract TYPE getType();
+
+    public void setResidueAt( final int row, final int col, final char residue );
+
+    public void write( Writer w ) throws IOException;
+}
diff --git a/forester/java/src/org/forester/msa/MsaFormatException.java b/forester/java/src/org/forester/msa/MsaFormatException.java

new file mode 100644 (file)

index 0000000..949b072
--- /dev/null
+++ b/forester/java/src/org/forester/msa/MsaFormatException.java
@@ -0,0 +1,37 @@
+// $Id:
+// forester -- software libraries and applications
+// for genomics and evolutionary biology research.
+//
+// Copyright (C) 2010 Christian M Zmasek
+// Copyright (C) 2010 Sanford-Burnham Medical Research Institute
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.msa;
+
+import java.io.IOException;
+
+public class MsaFormatException extends IOException {
+
+    private static final long serialVersionUID = 690079849050106491L;
+
+    public MsaFormatException( final String msg ) {
+        super( msg );
+    }
+}
diff --git a/forester/java/src/org/forester/msa/MsaInferrer.java b/forester/java/src/org/forester/msa/MsaInferrer.java

new file mode 100644 (file)

index 0000000..801c247
--- /dev/null
+++ b/forester/java/src/org/forester/msa/MsaInferrer.java
@@ -0,0 +1,39 @@
+// $Id:
+// forester -- software libraries and applications
+// for genomics and evolutionary biology research.
+//
+// Copyright (C) 2010 Christian M Zmasek
+// Copyright (C) 2010 Sanford-Burnham Medical Research Institute
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.msa;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.List;
+
+public interface MsaInferrer {
+
+    public String getErrorDescription();
+
+    public int getExitCode();
+
+    public Msa infer( File path_to_input_seqs, List<String> opts ) throws IOException, InterruptedException;
+}
diff --git a/forester/java/src/org/forester/msa/MsaTools.java b/forester/java/src/org/forester/msa/MsaTools.java

new file mode 100644 (file)

index 0000000..a2dfa06
--- /dev/null
+++ b/forester/java/src/org/forester/msa/MsaTools.java
@@ -0,0 +1,126 @@
+// $Id:
+// forester -- software libraries and applications
+// for genomics and evolutionary biology research.
+//
+// Copyright (C) 2010 Christian M Zmasek
+// Copyright (C) 2010 Sanford-Burnham Medical Research Institute
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.msa;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.forester.sequence.BasicSequence;
+import org.forester.sequence.Sequence;
+import org.forester.util.BasicDescriptiveStatistics;
+import org.forester.util.DescriptiveStatistics;
+
+public final class MsaTools {
+
+    private ArrayList<String> _ignored_seqs_ids;
+
+    synchronized public ArrayList<String> getIgnoredSequenceIds() {
+        return _ignored_seqs_ids;
+    }
+
+    synchronized public static MsaTools createInstance() {
+        return new MsaTools();
+    }
+
+    private MsaTools() {
+        init();
+    }
+
+    synchronized private void init() {
+        _ignored_seqs_ids = new ArrayList<String>();
+    }
+
+    @Override
+    public Object clone() {
+        throw new NoSuchMethodError();
+    }
+
+    public static int calcGapSumPerColumn( final Msa msa, final int col ) {
+        int gap_rows = 0;
+        for( int j = 0; j < msa.getNumberOfSequences(); ++j ) {
+            if ( msa.getResidueAt( j, col ) == Sequence.GAP ) {
+                gap_rows++;
+            }
+        }
+        return gap_rows;
+    }
+
+    synchronized public Msa removeGapColumns( final double max_allowed_gap_ratio,
+                                              final int min_allowed_length,
+                                              final Msa msa ) {
+        init();
+        if ( ( max_allowed_gap_ratio < 0 ) || ( max_allowed_gap_ratio > 1 ) ) {
+            throw new IllegalArgumentException( "max allowed gap ration is out of range: " + max_allowed_gap_ratio );
+        }
+        final boolean ignore_too_short_seqs = min_allowed_length > 0;
+        final boolean[] delete_cols = new boolean[ msa.getLength() ];
+        int new_length = 0;
+        for( int col = 0; col < msa.getLength(); ++col ) {
+            delete_cols[ col ] = ( ( double ) calcGapSumPerColumn( msa, col ) / msa.getNumberOfSequences() ) > max_allowed_gap_ratio;
+            if ( !delete_cols[ col ] ) {
+                ++new_length;
+            }
+        }
+        final List<Sequence> seqs = new ArrayList<Sequence>( msa.getNumberOfSequences() );
+        for( int row = 0; row < msa.getNumberOfSequences(); ++row ) {
+            final char[] mol_seq = new char[ new_length ];
+            int new_col = 0;
+            int non_gap_cols_sum = 0;
+            for( int col = 0; col < msa.getLength(); ++col ) {
+                if ( !delete_cols[ col ] ) {
+                    final char residue = msa.getResidueAt( row, col );
+                    mol_seq[ new_col++ ] = ( residue );
+                    if ( residue != Sequence.GAP ) {
+                        ++non_gap_cols_sum;
+                    }
+                }
+            }
+            if ( ignore_too_short_seqs ) {
+                if ( non_gap_cols_sum >= min_allowed_length ) {
+                    seqs.add( new BasicSequence( msa.getIdentifier( row ), mol_seq, msa.getType() ) );
+                }
+                else {
+                    _ignored_seqs_ids.add( msa.getIdentifier( row ).toString() );
+                }
+            }
+            else {
+                seqs.add( new BasicSequence( msa.getIdentifier( row ), mol_seq, msa.getType() ) );
+            }
+        }
+        if ( seqs.size() < 1 ) {
+            return null;
+        }
+        return BasicMsa.createInstance( seqs );
+    }
+
+    public static DescriptiveStatistics calcBasicGapinessStatistics( final Msa msa ) {
+        final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
+        for( int i = 0; i < msa.getLength(); ++i ) {
+            stats.addValue( ( double ) calcGapSumPerColumn( msa, i ) / msa.getNumberOfSequences() );
+        }
+        return stats;
+    }
+}
diff --git a/forester/java/src/org/forester/msa/ResampleableMsa.java b/forester/java/src/org/forester/msa/ResampleableMsa.java

new file mode 100644 (file)

index 0000000..7476cad
--- /dev/null
+++ b/forester/java/src/org/forester/msa/ResampleableMsa.java
@@ -0,0 +1,57 @@
+// / $Id: ResampleableMsa.java,v 1.3 2010/12/13 18:59:48 cmzmasek Exp $
+// forester -- software libraries and applications
+// for genomics and evolutionary biology research.
+//
+// Copyright (C) 2010 Christian M Zmasek
+// Copyright (C) 2010 Sanford-Burnham Medical Research Institute
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.msa;
+
+public final class ResampleableMsa extends BasicMsa {
+
+    private int[] _resampled_column_positions = null;
+
+    public ResampleableMsa( final BasicMsa msa ) {
+        super( msa );
+    }
+
+    public void resample( final int[] resampled_column_positions ) {
+        if ( resampled_column_positions.length != getLength() ) {
+            _resampled_column_positions = null;
+            throw new IllegalArgumentException( "illegal attempt to use " + resampled_column_positions.length
+                    + " resampled column positions on msa of length " + getLength() );
+        }
+        _resampled_column_positions = resampled_column_positions;
+    }
+
+    @Override
+    public char getResidueAt( final int row, final int col ) {
+        if ( _resampled_column_positions != null ) {
+            return super.getResidueAt( row, _resampled_column_positions[ col ] );
+        }
+        return super.getResidueAt( row, col );
+    }
+
+    @Override
+    public void setResidueAt( final int row, final int col, final char residue ) {
+        throw new NoSuchMethodError( "illegal attempt to set residue in resampleable msa" );
+    }
+}
diff --git a/forester/java/src/org/forester/pccx/BasicExternalNodeBasedCoverageExtender.java b/forester/java/src/org/forester/pccx/BasicExternalNodeBasedCoverageExtender.java

new file mode 100644 (file)

index 0000000..790518a
--- /dev/null
+++ b/forester/java/src/org/forester/pccx/BasicExternalNodeBasedCoverageExtender.java
@@ -0,0 +1,178 @@
+// $Id:
+// cmzmasek Exp $
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.pccx;
+
+import java.io.PrintStream;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+import java.util.SortedMap;
+import java.util.TreeMap;
+
+import org.forester.phylogeny.Phylogeny;
+import org.forester.phylogeny.PhylogenyNode;
+import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
+import org.forester.util.ForesterUtil;
+
+/*
+ * @author Christian M. Zmasek
+ */
+public class BasicExternalNodeBasedCoverageExtender implements CoverageExtender {
+
+    private String find( final CoverageCalculationOptions options,
+                         final BranchCountingBasedScoringMethod scoring_method,
+                         final List<SortedMap<PhylogenyNode, Double>> external_node_scores_list,
+                         final List<SortedMap<PhylogenyNode, Double>> external_node_scores_list_temp,
+                         final List<Phylogeny> phylogenies,
+                         final Set<String> already_covered,
+                         final PrintStream out,
+                         final int i,
+                         final double normalization_factor ) {
+        final Phylogeny p = phylogenies.get( 0 );
+        String best_name = null;
+        double best_score = -Double.MAX_VALUE;
+        for( final PhylogenyNodeIterator iter = p.iteratorExternalForward(); iter.hasNext(); ) {
+            final String name = iter.next().getName();
+            if ( !already_covered.contains( name ) ) {
+                final double score = BasicExternalNodeBasedCoverageExtender
+                        .calculateCoverage( phylogenies,
+                                            name,
+                                            options,
+                                            scoring_method,
+                                            external_node_scores_list_temp,
+                                            false );
+                if ( score > best_score ) {
+                    best_score = score;
+                    best_name = name;
+                }
+            }
+        }
+        BasicExternalNodeBasedCoverageExtender.calculateCoverage( phylogenies,
+                                                                  best_name,
+                                                                  options,
+                                                                  scoring_method,
+                                                                  external_node_scores_list_temp,
+                                                                  true );
+        if ( out != null ) {
+            out.println( i + "\t" + best_name + "\t" + ( best_score * normalization_factor ) );
+        }
+        return best_name;
+    }
+
+    /*
+     * (non-Javadoc)
+     * 
+     * @see org.forester.tools.modeling.CoverageExtender#find(java.util.List,
+     *      java.util.List, int,
+     *      org.forester.tools.modeling.CoverageCalculationMethod,
+     *      org.forester.tools.modeling.CoverageCalculationOptions,
+     *      java.io.PrintStream)
+     */
+    public List<String> find( final List<Phylogeny> phylogenies,
+                              final List<String> already_covered,
+                              int number_names_to_find,
+                              final CoverageCalculationOptions options,
+                              final PrintStream out ) {
+        final ExternalNodeBasedCoverageMethodOptions my_options = ( ExternalNodeBasedCoverageMethodOptions ) options;
+        if ( ( my_options == null ) || ForesterUtil.isEmpty( my_options.getScoringMethod() ) ) {
+            throw new IllegalArgumentException( "options for external node based coverage method appear to not have been set" );
+        }
+        BranchCountingBasedScoringMethod scoring_method;
+        try {
+            scoring_method = ( BranchCountingBasedScoringMethod ) ( Class.forName( my_options.getScoringMethod() ) )
+                    .newInstance();
+        }
+        catch ( final Exception e ) {
+            throw new IllegalArgumentException( "could not create scoring method class \""
+                    + my_options.getScoringMethod() + "\"" );
+        }
+        final List<String> best_names = new ArrayList<String>();
+        final Set<String> my_already_covered = new HashSet<String>();
+        final List<SortedMap<PhylogenyNode, Double>> external_node_scores_list = new ArrayList<SortedMap<PhylogenyNode, Double>>();
+        for( int i = 0; i < phylogenies.size(); ++i ) {
+            external_node_scores_list.add( ModelingUtils.setUpExternalCoverageHashMap( phylogenies.get( i ) ) );
+        }
+        if ( already_covered != null ) {
+            for( final String name : already_covered ) {
+                my_already_covered.add( name );
+                BasicExternalNodeBasedCoverageExtender.calculateCoverage( phylogenies,
+                                                                          name,
+                                                                          options,
+                                                                          scoring_method,
+                                                                          external_node_scores_list,
+                                                                          true );
+            }
+        }
+        if ( number_names_to_find < 1 ) {
+            number_names_to_find = phylogenies.get( 0 ).getNumberOfExternalNodes() - my_already_covered.size();
+        }
+        final double normalization_factor = scoring_method.getNormalizationFactor( phylogenies.get( 0 ) );
+        for( int i = 0; i < number_names_to_find; ++i ) {
+            final String name = find( my_options,
+                                      scoring_method,
+                                      external_node_scores_list,
+                                      external_node_scores_list,
+                                      phylogenies,
+                                      my_already_covered,
+                                      out,
+                                      i,
+                                      normalization_factor );
+            my_already_covered.add( name );
+            best_names.add( name );
+        }
+        return best_names;
+    }
+
+    private static double calculateCoverage( final List<Phylogeny> phylogenies,
+                                             final String name,
+                                             final CoverageCalculationOptions options,
+                                             final BranchCountingBasedScoringMethod scoring_method,
+                                             final List<SortedMap<PhylogenyNode, Double>> external_node_scores_list,
+                                             final boolean update_external_node_scores_list ) {
+        int i = 0;
+        double score_sum = 0.0;
+        for( final Object element : phylogenies ) {
+            SortedMap<PhylogenyNode, Double> external_node_scores;
+            if ( update_external_node_scores_list ) {
+                external_node_scores = external_node_scores_list.get( i++ );
+            }
+            else {
+                external_node_scores = new TreeMap<PhylogenyNode, Double>( external_node_scores_list.get( i++ ) );
+            }
+            final Phylogeny phylogeny = ( Phylogeny ) element;
+            scoring_method.calculateScoreForExternalNode( external_node_scores,
+                                                          phylogeny,
+                                                          phylogeny.getNode( name ),
+                                                          options );
+            for( final Object element2 : external_node_scores.values() ) {
+                score_sum += ( ( Double ) element2 ).doubleValue();
+            }
+        }
+        return score_sum / i;
+    }
+}
diff --git a/forester/java/src/org/forester/pccx/BranchCountingBasedScoringMethod.java b/forester/java/src/org/forester/pccx/BranchCountingBasedScoringMethod.java

new file mode 100644 (file)

index 0000000..815d13d
--- /dev/null
+++ b/forester/java/src/org/forester/pccx/BranchCountingBasedScoringMethod.java
@@ -0,0 +1,74 @@
+// $Id:
+// Exp $
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.pccx;
+
+import java.util.SortedMap;
+
+import org.forester.phylogeny.Phylogeny;
+import org.forester.phylogeny.PhylogenyNode;
+
+/*
+ * Scoring method according to an idea by Adam Godzik, PhD.
+ * 
+ * @author Christian M. Zmasek
+ */
+public class BranchCountingBasedScoringMethod implements ScoringMethodForExternalNode {
+
+    double calculateScoreContributionPerExternalNode( final PhylogenyNode external_node,
+                                                      final PhylogenyNode current_node ) {
+        double score_contribution = 0.0;
+        if ( current_node == external_node ) {
+            score_contribution = 1.0;
+        }
+        else {
+            score_contribution = 1.0 / ModelingUtils.calculateBranchSum( external_node, current_node );
+        }
+        return score_contribution;
+    }
+
+    public void calculateScoreForExternalNode( final SortedMap<PhylogenyNode, Double> external_node_scores,
+                                               final Phylogeny phylogeny,
+                                               final PhylogenyNode external_node,
+                                               final CoverageCalculationOptions options ) {
+        for( final Object element : external_node_scores.keySet() ) {
+            final PhylogenyNode current_node = ( PhylogenyNode ) element;
+            final double score_contribution = calculateScoreContributionPerExternalNode( external_node, current_node );
+            final double prev_score_contribution = external_node_scores.get( current_node );
+            if ( score_contribution > prev_score_contribution ) {
+                external_node_scores.put( current_node, score_contribution );
+            }
+        }
+    }
+
+    public String getDesciption() {
+        return "sum of 1/branch-segment-sum";
+    }
+
+    public double getNormalizationFactor( final Phylogeny phylogeny ) {
+        return ( 1.0 / phylogeny.getNumberOfExternalNodes() );
+    }
+}
diff --git a/forester/java/src/org/forester/pccx/BranchLengthBasedScoringMethod.java b/forester/java/src/org/forester/pccx/BranchLengthBasedScoringMethod.java

new file mode 100644 (file)

index 0000000..7cddfdf
--- /dev/null
+++ b/forester/java/src/org/forester/pccx/BranchLengthBasedScoringMethod.java
@@ -0,0 +1,73 @@
+// $Id:
+// Exp $
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.pccx;
+
+import org.forester.phylogeny.Phylogeny;
+import org.forester.phylogeny.PhylogenyNode;
+import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
+
+/*
+ * 
+ * @author Christian M. Zmasek
+ */
+public class BranchLengthBasedScoringMethod extends BranchCountingBasedScoringMethod {
+
+    public static final double MIN_ALLOWED_BL_VALUE = 0.001;
+
+    @Override
+    double calculateScoreContributionPerExternalNode( final PhylogenyNode external_node,
+                                                      final PhylogenyNode current_node ) {
+        double score_contribution = 0.0;
+        if ( current_node == external_node ) {
+            score_contribution = external_node.getDistanceToParent();
+            // This, of course, is completely /ad hoc/.
+        }
+        else {
+            score_contribution = ModelingUtils.calculateBranchLengthSum( external_node, current_node );
+        }
+        return 1.0 / ( score_contribution > BranchLengthBasedScoringMethod.MIN_ALLOWED_BL_VALUE ? score_contribution
+                : BranchLengthBasedScoringMethod.MIN_ALLOWED_BL_VALUE );
+    }
+
+    @Override
+    public String getDesciption() {
+        return "sum of 1/branch-length-sum [for self: 1/branch-length] [min branch length: "
+                + BranchLengthBasedScoringMethod.MIN_ALLOWED_BL_VALUE + "]";
+    }
+
+    @Override
+    public double getNormalizationFactor( final Phylogeny phylogeny ) {
+        double s = 0.0;
+        double d = 0.0;
+        for( final PhylogenyNodeIterator iter = phylogeny.iteratorExternalForward(); iter.hasNext(); ) {
+            d = iter.next().getDistanceToParent();
+            s += ( 1.0 / ( d > BranchLengthBasedScoringMethod.MIN_ALLOWED_BL_VALUE ? d
+                    : BranchLengthBasedScoringMethod.MIN_ALLOWED_BL_VALUE ) );
+        }
+        return 1.0 / s;
+    }
+}
diff --git a/forester/java/src/org/forester/pccx/Coverage.java b/forester/java/src/org/forester/pccx/Coverage.java

new file mode 100644 (file)

index 0000000..59827f9
--- /dev/null
+++ b/forester/java/src/org/forester/pccx/Coverage.java
@@ -0,0 +1,36 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.pccx;
+
+/*
+ * @author Christian M. Zmasek
+ */
+public interface Coverage {
+
+    public String asString();
+
+    public double getScore();
+}
diff --git a/forester/java/src/org/forester/pccx/CoverageCalculationMethod.java b/forester/java/src/org/forester/pccx/CoverageCalculationMethod.java

new file mode 100644 (file)

index 0000000..638ca22
--- /dev/null
+++ b/forester/java/src/org/forester/pccx/CoverageCalculationMethod.java
@@ -0,0 +1,41 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.pccx;
+
+import java.util.List;
+
+import org.forester.phylogeny.Phylogeny;
+
+/*
+ * @author Christian M. Zmasek
+ */
+public interface CoverageCalculationMethod {
+
+    public Coverage calculateCoverage( List<Phylogeny> phylogenies,
+                                       List<String> names,
+                                       CoverageCalculationOptions options,
+                                       boolean annotate_phylogenies );
+}
diff --git a/forester/java/src/org/forester/pccx/CoverageCalculationOptions.java b/forester/java/src/org/forester/pccx/CoverageCalculationOptions.java

new file mode 100644 (file)

index 0000000..1d588ff
--- /dev/null
+++ b/forester/java/src/org/forester/pccx/CoverageCalculationOptions.java
@@ -0,0 +1,34 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.pccx;
+
+/*
+ * @author Christian M. Zmasek
+ */
+public interface CoverageCalculationOptions {
+
+    public String asString();
+}
diff --git a/forester/java/src/org/forester/pccx/CoverageCalculator.java b/forester/java/src/org/forester/pccx/CoverageCalculator.java

new file mode 100644 (file)

index 0000000..8f60542
--- /dev/null
+++ b/forester/java/src/org/forester/pccx/CoverageCalculator.java
@@ -0,0 +1,63 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.pccx;
+
+import java.util.List;
+
+import org.forester.phylogeny.Phylogeny;
+
+/*
+ * @author Christian M. Zmasek
+ */
+public class CoverageCalculator {
+
+    private final CoverageCalculationMethod  _method;
+    private final CoverageCalculationOptions _options;
+
+    private CoverageCalculator( final CoverageCalculationMethod method, final CoverageCalculationOptions options ) {
+        _method = method;
+        _options = options;
+    }
+
+    public Coverage calculateCoverage( final List<Phylogeny> phylogenies,
+                                       final List<String> names,
+                                       final boolean annotate_phylogenies ) {
+        return getMethod().calculateCoverage( phylogenies, names, getOptions(), annotate_phylogenies );
+    }
+
+    private CoverageCalculationMethod getMethod() {
+        return _method;
+    }
+
+    private CoverageCalculationOptions getOptions() {
+        return _options;
+    }
+
+    public static CoverageCalculator getInstance( final CoverageCalculationMethod method,
+                                                  final CoverageCalculationOptions options ) {
+        return new CoverageCalculator( method, options );
+    }
+}
diff --git a/forester/java/src/org/forester/pccx/CoverageExtender.java b/forester/java/src/org/forester/pccx/CoverageExtender.java

new file mode 100644 (file)

index 0000000..b882759
--- /dev/null
+++ b/forester/java/src/org/forester/pccx/CoverageExtender.java
@@ -0,0 +1,43 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.pccx;
+
+import java.io.PrintStream;
+import java.util.List;
+
+import org.forester.phylogeny.Phylogeny;
+
+/*
+ * @author Christian M. Zmasek
+ */
+public interface CoverageExtender {
+
+    public abstract List<String> find( final List<Phylogeny> phylogenies,
+                                       final List<String> already_covered,
+                                       int number_names_to_find,
+                                       final CoverageCalculationOptions options,
+                                       final PrintStream out );
+}
\ No newline at end of file
diff --git a/forester/java/src/org/forester/pccx/ExternalNodeBasedCoverage.java b/forester/java/src/org/forester/pccx/ExternalNodeBasedCoverage.java

new file mode 100644 (file)

index 0000000..ee46c40
--- /dev/null
+++ b/forester/java/src/org/forester/pccx/ExternalNodeBasedCoverage.java
@@ -0,0 +1,100 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.pccx;
+
+import org.forester.util.DescriptiveStatistics;
+import org.forester.util.ForesterUtil;
+
+/*
+ * @author Christian M. Zmasek
+ */
+public class ExternalNodeBasedCoverage implements Coverage {
+
+    private final double _av_normalized_score;
+    private final double _av_raw_score;
+    private final int    _n;
+    private final double _sd;
+    private final double _max;
+    private final double _min;
+
+    public ExternalNodeBasedCoverage( final DescriptiveStatistics stats,
+                                      final double average_raw_score,
+                                      final CoverageCalculationOptions options ) {
+        _av_normalized_score = stats.arithmeticMean();
+        _av_raw_score = average_raw_score;
+        _n = stats.getN();
+        if ( _n > 1 ) {
+            _sd = stats.sampleStandardDeviation();
+        }
+        else {
+            _sd = 0.0;
+        }
+        _max = stats.getMax();
+        _min = stats.getMin();
+    }
+
+    public String asString() {
+        final StringBuffer sb = new StringBuffer();
+        if ( getN() == 1 ) {
+            sb.append( "Normalized score: " + getScore() + ForesterUtil.getLineSeparator() );
+            sb.append( "Raw score       : " + getAvarageRawScore() );
+        }
+        else {
+            sb.append( "Avarage normalized score: " + getScore() + " [sd=" + getSD() + " min=" + getMin() + " max="
+                    + getMax() + " n=" + getN() + "]" + ForesterUtil.getLineSeparator() );
+            sb.append( "Avarage raw score       : " + getAvarageRawScore() );
+        }
+        return sb.toString();
+    }
+
+    public double getAvarageNormalizedScore() {
+        return _av_normalized_score;
+    }
+
+    public double getAvarageRawScore() {
+        return _av_raw_score;
+    }
+
+    public double getMax() {
+        return _max;
+    }
+
+    public double getMin() {
+        return _min;
+    }
+
+    public int getN() {
+        return _n;
+    }
+
+    public double getScore() {
+        return getAvarageNormalizedScore();
+    }
+
+    public double getSD() {
+        return _sd;
+    }
+}
diff --git a/forester/java/src/org/forester/pccx/ExternalNodeBasedCoverageMethod.java b/forester/java/src/org/forester/pccx/ExternalNodeBasedCoverageMethod.java

new file mode 100644 (file)

index 0000000..5994bb9
--- /dev/null
+++ b/forester/java/src/org/forester/pccx/ExternalNodeBasedCoverageMethod.java
@@ -0,0 +1,130 @@
+// $Id:
+// Exp $
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.pccx;
+
+import java.awt.Color;
+import java.util.List;
+import java.util.Map;
+import java.util.SortedMap;
+
+import org.forester.phylogeny.Phylogeny;
+import org.forester.phylogeny.PhylogenyMethods;
+import org.forester.phylogeny.PhylogenyNode;
+import org.forester.util.BasicDescriptiveStatistics;
+import org.forester.util.DescriptiveStatistics;
+import org.forester.util.ForesterUtil;
+
+/*
+ * @author Christian M. Zmasek
+ */
+public class ExternalNodeBasedCoverageMethod implements CoverageCalculationMethod {
+
+    private static final Color MEAN_COVERAGE_COLOR = new Color( 0, 0, 0 );
+    private static final Color MAXIMAL_COV_COLOR   = new Color( 0, 255, 0 );
+    private static final Color MINIMAL_COV_COLOR   = new Color( 255, 0, 0 );
+
+    public Coverage calculateCoverage( final List<Phylogeny> phylogenies,
+                                       final List<String> names,
+                                       final CoverageCalculationOptions options,
+                                       final boolean annotate_phylogenies ) {
+        final DescriptiveStatistics normalized_score_stats = new BasicDescriptiveStatistics();
+        final DescriptiveStatistics raw_score_stats = new BasicDescriptiveStatistics();
+        final ExternalNodeBasedCoverageMethodOptions my_options = ( ExternalNodeBasedCoverageMethodOptions ) options;
+        if ( ( my_options == null ) || ForesterUtil.isEmpty( my_options.getScoringMethod() ) ) {
+            throw new IllegalArgumentException( "options for external node based coverage method appear to not have been set" );
+        }
+        BranchCountingBasedScoringMethod scoring_method;
+        try {
+            scoring_method = ( BranchCountingBasedScoringMethod ) ( Class.forName( my_options.getScoringMethod() ) )
+                    .newInstance();
+        }
+        catch ( final Exception e ) {
+            throw new IllegalArgumentException( "could not create scoring method class \""
+                    + my_options.getScoringMethod() + "\"" );
+        }
+        final double normalization_factor = scoring_method.getNormalizationFactor( phylogenies.get( 0 ) );
+        for( final Object element : phylogenies ) {
+            final double raw_score = calculateCoverage( ( Phylogeny ) element,
+                                                        names,
+                                                        options,
+                                                        scoring_method,
+                                                        annotate_phylogenies,
+                                                        normalization_factor );
+            normalized_score_stats.addValue( raw_score * normalization_factor );
+            raw_score_stats.addValue( raw_score );
+        }
+        return new ExternalNodeBasedCoverage( normalized_score_stats, raw_score_stats.arithmeticMean(), options );
+    }
+
+    private double calculateCoverage( final Phylogeny phylogeny,
+                                      final List<String> names,
+                                      final CoverageCalculationOptions options,
+                                      final BranchCountingBasedScoringMethod scoring_method,
+                                      final boolean annotate_phylogeny,
+                                      final double normalization_factor ) {
+        final SortedMap<PhylogenyNode, Double> external_node_scores = ModelingUtils
+                .setUpExternalCoverageHashMap( phylogeny );
+        for( final Object element : names ) {
+            scoring_method.calculateScoreForExternalNode( external_node_scores, phylogeny, phylogeny
+                    .getNode( ( String ) element ), options );
+        }
+        if ( annotate_phylogeny ) {
+            colorizePhylogenyAccordingToCoverage( external_node_scores, phylogeny, normalization_factor );
+        }
+        double score = 0.0;
+        for( final Object element : external_node_scores.values() ) {
+            score += ( ( Double ) element ).doubleValue();
+        }
+        return score;
+    }
+
+    private void colorizePhylogenyAccordingToCoverage( final SortedMap<PhylogenyNode, Double> external_node_scores,
+                                                       final Phylogeny phylogeny,
+                                                       final double normalization_factor ) {
+        final DescriptiveStatistics ds = new BasicDescriptiveStatistics();
+        for( final Object element : external_node_scores.entrySet() ) {
+            ds.addValue( ( Double ) ( ( Map.Entry ) element ).getValue() * normalization_factor );
+        }
+        final double min = ds.getMin();
+        final double max = ds.getMax();
+        final double median = ds.median();
+        for( final Object element2 : external_node_scores.entrySet() ) {
+            final Map.Entry element = ( Map.Entry ) element2;
+            final PhylogenyNode node = ( PhylogenyNode ) element.getKey();
+            final double normalized_value = ( Double ) element.getValue() * normalization_factor;
+            PhylogenyMethods.setBranchColorValue( node, ForesterUtil
+                    .calcColor( normalized_value,
+                                min,
+                                max,
+                                median,
+                                ExternalNodeBasedCoverageMethod.MINIMAL_COV_COLOR,
+                                ExternalNodeBasedCoverageMethod.MAXIMAL_COV_COLOR,
+                                ExternalNodeBasedCoverageMethod.MEAN_COVERAGE_COLOR ) );
+        }
+        PhylogenyMethods.postorderBranchColorAveragingExternalNodeBased( phylogeny );
+    }
+}
diff --git a/forester/java/src/org/forester/pccx/ExternalNodeBasedCoverageMethodOptions.java b/forester/java/src/org/forester/pccx/ExternalNodeBasedCoverageMethodOptions.java

new file mode 100644 (file)

index 0000000..c88c35e
--- /dev/null
+++ b/forester/java/src/org/forester/pccx/ExternalNodeBasedCoverageMethodOptions.java
@@ -0,0 +1,62 @@
+// $Id:
+// cmzmasek Exp $
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.pccx;
+
+public class ExternalNodeBasedCoverageMethodOptions implements CoverageCalculationOptions {
+
+    final private String _scoring_method;
+
+    /**
+     * This constructor sets the class name for the scoring method e.g.
+     * "org.forester.tools.modeling.BranchCountingBasedScoringMethod"
+     * 
+     * @param scoring_method
+     *            class name for the scoring method
+     */
+    public ExternalNodeBasedCoverageMethodOptions( final String scoring_method ) {
+        _scoring_method = scoring_method;
+    }
+
+    public String asString() {
+        final StringBuffer sb = new StringBuffer();
+        sb.append( "scoring method: " );
+        BranchCountingBasedScoringMethod scoring_method;
+        try {
+            scoring_method = ( BranchCountingBasedScoringMethod ) ( Class.forName( getScoringMethod() ) ).newInstance();
+        }
+        catch ( final Exception e ) {
+            sb.append( "?" );
+            return sb.toString();
+        }
+        sb.append( scoring_method.getDesciption() );
+        return sb.toString();
+    }
+
+    public String getScoringMethod() {
+        return _scoring_method;
+    }
+}
diff --git a/forester/java/src/org/forester/pccx/LogBranchLengthBasedScoringMethod.java b/forester/java/src/org/forester/pccx/LogBranchLengthBasedScoringMethod.java

new file mode 100644 (file)

index 0000000..9264e0e
--- /dev/null
+++ b/forester/java/src/org/forester/pccx/LogBranchLengthBasedScoringMethod.java
@@ -0,0 +1,85 @@
+// $Id:
+// cmzmasek Exp $
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.pccx;
+
+import org.forester.phylogeny.Phylogeny;
+import org.forester.phylogeny.PhylogenyNode;
+import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
+
+/*
+ * 
+ * @author Christian M. Zmasek
+ */
+public class LogBranchLengthBasedScoringMethod extends BranchCountingBasedScoringMethod {
+
+    public static final double MIN_ALLOWED_BL_VALUE = 0.0001;
+    public static final double MAX_ALLOWED_BL_VALUE = 1.0;
+
+    @Override
+    double calculateScoreContributionPerExternalNode( final PhylogenyNode external_node,
+                                                      final PhylogenyNode current_node ) {
+        double score_contribution = 0.0;
+        if ( current_node == external_node ) {
+            score_contribution = external_node.getDistanceToParent();
+            // This, of course, is completely /ad hoc/.
+        }
+        else {
+            score_contribution = ModelingUtils.calculateBranchLengthSum( external_node, current_node );
+        }
+        if ( score_contribution > LogBranchLengthBasedScoringMethod.MAX_ALLOWED_BL_VALUE ) {
+            score_contribution = LogBranchLengthBasedScoringMethod.MAX_ALLOWED_BL_VALUE;
+        }
+        else if ( score_contribution < LogBranchLengthBasedScoringMethod.MIN_ALLOWED_BL_VALUE ) {
+            score_contribution = LogBranchLengthBasedScoringMethod.MIN_ALLOWED_BL_VALUE;
+        }
+        return ( -Math.log( score_contribution ) );
+    }
+
+    @Override
+    public String getDesciption() {
+        return "sum of -ln(branch-length-sum) [for self: -ln(branch-length)] [min branch length: "
+                + LogBranchLengthBasedScoringMethod.MIN_ALLOWED_BL_VALUE + ", max branch length: "
+                + LogBranchLengthBasedScoringMethod.MAX_ALLOWED_BL_VALUE + "]";
+    }
+
+    @Override
+    public double getNormalizationFactor( final Phylogeny phylogeny ) {
+        double s = 0.0;
+        double d = 0.0;
+        for( final PhylogenyNodeIterator iter = phylogeny.iteratorExternalForward(); iter.hasNext(); ) {
+            d = iter.next().getDistanceToParent();
+            if ( d > LogBranchLengthBasedScoringMethod.MAX_ALLOWED_BL_VALUE ) {
+                d = LogBranchLengthBasedScoringMethod.MAX_ALLOWED_BL_VALUE;
+            }
+            else if ( d < LogBranchLengthBasedScoringMethod.MIN_ALLOWED_BL_VALUE ) {
+                d = LogBranchLengthBasedScoringMethod.MIN_ALLOWED_BL_VALUE;
+            }
+            s += ( -Math.log( d ) );
+        }
+        return 1 / s;
+    }
+}
diff --git a/forester/java/src/org/forester/pccx/ModelingUtils.java b/forester/java/src/org/forester/pccx/ModelingUtils.java

new file mode 100644 (file)

index 0000000..44eb20f
--- /dev/null
+++ b/forester/java/src/org/forester/pccx/ModelingUtils.java
@@ -0,0 +1,81 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.pccx;
+
+import java.util.SortedMap;
+import java.util.TreeMap;
+
+import org.forester.phylogeny.Phylogeny;
+import org.forester.phylogeny.PhylogenyMethods;
+import org.forester.phylogeny.PhylogenyNode;
+import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
+
+/*
+ * @author Christian M. Zmasek
+ */
+public final class ModelingUtils {
+
+    static double calculateBranchLengthSum( final PhylogenyNode n1, final PhylogenyNode n2 ) {
+        final PhylogenyNode lca = PhylogenyMethods.getInstance().obtainLCA( n1, n2 );
+        return ModelingUtils.calculateBranchLengthSumHelper( n1, lca )
+                + ModelingUtils.calculateBranchLengthSumHelper( n2, lca );
+    }
+
+    private static double calculateBranchLengthSumHelper( final PhylogenyNode outer, final PhylogenyNode inner ) {
+        PhylogenyNode my_outer = outer;
+        double l = 0;
+        while ( my_outer != inner ) {
+            if ( my_outer.getDistanceToParent() > 0.0 ) {
+                l += my_outer.getDistanceToParent();
+            }
+            my_outer = my_outer.getParent();
+        }
+        return l;
+    }
+
+    static int calculateBranchSum( final PhylogenyNode n1, final PhylogenyNode n2 ) {
+        final PhylogenyNode lca = PhylogenyMethods.getInstance().obtainLCA( n1, n2 );
+        return ModelingUtils.calculateBranchSumHelper( n1, lca ) + ModelingUtils.calculateBranchSumHelper( n2, lca );
+    }
+
+    private static int calculateBranchSumHelper( final PhylogenyNode outer, final PhylogenyNode inner ) {
+        PhylogenyNode my_outer = outer;
+        int s = 0;
+        while ( my_outer != inner ) {
+            s++;
+            my_outer = my_outer.getParent();
+        }
+        return s;
+    }
+
+    static SortedMap<PhylogenyNode, Double> setUpExternalCoverageHashMap( final Phylogeny phylogeny ) {
+        final SortedMap<PhylogenyNode, Double> external_node_coverage = new TreeMap<PhylogenyNode, Double>();
+        for( final PhylogenyNodeIterator iter = phylogeny.iteratorExternalForward(); iter.hasNext(); ) {
+            external_node_coverage.put( iter.next(), 0.0 );
+        }
+        return external_node_coverage;
+    }
+}
diff --git a/forester/java/src/org/forester/pccx/ScoringMethodForExternalNode.java b/forester/java/src/org/forester/pccx/ScoringMethodForExternalNode.java

new file mode 100644 (file)

index 0000000..121bc4e
--- /dev/null
+++ b/forester/java/src/org/forester/pccx/ScoringMethodForExternalNode.java
@@ -0,0 +1,80 @@
+// $Id:
+// $
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.pccx;
+
+import java.util.SortedMap;
+
+import org.forester.phylogeny.Phylogeny;
+import org.forester.phylogeny.PhylogenyNode;
+
+/*
+ * Interface providing implementations of scoring methods used by
+ * ExternalNodeBasedCoverageMethod.
+ * 
+ * @author Christian M. Zmasek
+ */
+public interface ScoringMethodForExternalNode {
+
+    /**
+     * This calculates the coverage score for one external node.
+     * 
+     * 
+     * @param external_node_scores
+     *            SortedMap<PhylogenyNode, Double> in which the external node
+     *            scores are stored (node->score)
+     * @param phylogeny
+     *            Phylogeny containing the external nodes to score
+     * @param external_node
+     *            PhylogenyNod for which to calculate the score
+     * @param options
+     *            CoverageCalculationOptions
+     * @param annotate_phylogeny           
+     *            
+     */
+    public void calculateScoreForExternalNode( final SortedMap<PhylogenyNode, Double> external_node_scores,
+                                               final Phylogeny phylogeny,
+                                               final PhylogenyNode external_node,
+                                               final CoverageCalculationOptions options );
+
+    /**
+     * This returns a short description of this scoring method
+     * 
+     * @return short description of this scoring method
+     */
+    public String getDesciption();
+
+    /**
+     * This calculates a normalization factor, so that a normalized score of 1.0
+     * means complete coverage.
+     * 
+     * 
+     * @param phylogeny
+     *            Phylogeny containing the external nodes to score
+     * @return normalization factor
+     */
+    public double getNormalizationFactor( final Phylogeny phylogeny );
+}
diff --git a/forester/java/src/org/forester/pccx/TestPccx.java b/forester/java/src/org/forester/pccx/TestPccx.java

new file mode 100644 (file)

index 0000000..d5bdbdc
--- /dev/null
+++ b/forester/java/src/org/forester/pccx/TestPccx.java
@@ -0,0 +1,246 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.pccx;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.forester.io.parsers.nhx.NHXParser;
+import org.forester.phylogeny.Phylogeny;
+import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
+import org.forester.phylogeny.factories.PhylogenyFactory;
+
+/*
+ * @author Christian M. Zmasek
+ */
+public class TestPccx {
+
+    private final static double ZERO_DIFF = 1.0E-6;
+
+    private static boolean isEqual( final double a, final double b ) {
+        return ( ( Math.abs( a - b ) ) < TestPccx.ZERO_DIFF );
+    }
+
+    public static boolean test() {
+        if ( !TestPccx.testExternalNodeBasedCoverage() ) {
+            return false;
+        }
+        return true;
+    }
+
+    private static boolean testExternalNodeBasedCoverage() {
+        try {
+            final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
+            final String ps1 = "((((A:0.1,B:0.7):0.2,C:1.0):2.0,D:1.7):1.3,((E:0.3,F:0.4):1.1,(G:0.5,H:0.6):1.2):1.4,X:2.0)";
+            final Phylogeny p1 = factory.create( ps1, new NHXParser() )[ 0 ];
+            final List<Phylogeny> phylogenies = new ArrayList<Phylogeny>();
+            final List<String> names = new ArrayList<String>();
+            phylogenies.add( p1 );
+            names.add( "A" );
+            names.add( "A" );
+            final CoverageCalculationOptions options = new ExternalNodeBasedCoverageMethodOptions( "org.forester.pccx.BranchCountingBasedScoringMethod" );
+            final CoverageCalculator cc = CoverageCalculator.getInstance( new ExternalNodeBasedCoverageMethod(),
+                                                                          options );
+            Coverage cov = cc.calculateCoverage( phylogenies, names, false );
+            if ( !TestPccx.isEqual( cov.getScore(), ( 1.0 + 1.0 / 2 + 1.0 / 3 + 1.0 / 4 + 1.0 / 7 + 1.0 / 7 + 1.0 / 7
+                    + 1.0 / 7 + 1.0 / 5 ) / 9 ) ) {
+                return false;
+            }
+            names.add( "B" );
+            names.add( "B" );
+            cov = cc.calculateCoverage( phylogenies, names, false );
+            if ( !TestPccx.isEqual( cov.getScore(), ( 1.0 + 1.0 + 1.0 / 3 + 1.0 / 4 + 1.0 / 7 + 1.0 / 7 + 1.0 / 7 + 1.0
+                    / 7 + 1.0 / 5 ) / 9 ) ) {
+                return false;
+            }
+            names.add( "G" );
+            cov = cc.calculateCoverage( phylogenies, names, false );
+            if ( !TestPccx
+                    .isEqual( cov.getScore(),
+                              ( 1.0 + 1.0 + 1.0 / 3 + 1.0 / 4 + 1.0 / 4 + 1.0 / 4 + 1.0 + 1.0 / 2 + 1.0 / 4 ) / 9 ) ) {
+                return false;
+            }
+            names.add( "E" );
+            cov = cc.calculateCoverage( phylogenies, names, false );
+            if ( !TestPccx.isEqual( cov.getScore(),
+                                    ( 1.0 + 1.0 + 1.0 / 3 + 1.0 / 4 + 1.0 + 1.0 / 2 + 1.0 + 1.0 / 2 + 1.0 / 4 ) / 9 ) ) {
+                return false;
+            }
+            names.add( "X" );
+            cov = cc.calculateCoverage( phylogenies, names, false );
+            if ( !TestPccx.isEqual( cov.getScore(),
+                                    ( 1.0 + 1.0 + 1.0 / 3 + 1.0 / 3 + 1.0 + 1.0 / 2 + 1.0 + 1.0 / 2 + 1.0 ) / 9 ) ) {
+                return false;
+            }
+            names.add( "C" );
+            names.add( "C" );
+            names.add( "C" );
+            cov = cc.calculateCoverage( phylogenies, names, false );
+            if ( !TestPccx.isEqual( cov.getScore(),
+                                    ( 1.0 + 1.0 + 1.0 + 1.0 / 3 + 1.0 + 1.0 / 2 + 1.0 + 1.0 / 2 + 1.0 ) / 9 ) ) {
+                return false;
+            }
+            names.add( "D" );
+            cov = cc.calculateCoverage( phylogenies, names, false );
+            if ( !TestPccx
+                    .isEqual( cov.getScore(), ( 1.0 + 1.0 + 1.0 + 1.0 + 1.0 + 1.0 / 2 + 1.0 + 1.0 / 2 + 1.0 ) / 9 ) ) {
+                return false;
+            }
+            names.add( "F" );
+            cov = cc.calculateCoverage( phylogenies, names, false );
+            if ( !TestPccx.isEqual( cov.getScore(), ( 1.0 + 1.0 + 1.0 + 1.0 + 1.0 + 1.0 + 1.0 + 1.0 / 2 + 1.0 ) / 9 ) ) {
+                return false;
+            }
+            names.add( "H" );
+            cov = cc.calculateCoverage( phylogenies, names, false );
+            if ( !TestPccx.isEqual( cov.getScore(), ( 1.0 + 1.0 + 1.0 + 1.0 + 1.0 + 1.0 + 1.0 + 1.0 + 1.0 ) / 9 ) ) {
+                return false;
+            }
+            final CoverageExtender ce = new BasicExternalNodeBasedCoverageExtender();
+            List<String> l = ce
+                    .find( phylogenies,
+                           null,
+                           0,
+                           new ExternalNodeBasedCoverageMethodOptions( "org.forester.pccx.BranchCountingBasedScoringMethod" ),
+                           null );
+            if ( !l.get( 0 ).equals( "X" ) ) {
+                return false;
+            }
+            if ( !l.get( 1 ).equals( "A" ) ) {
+                return false;
+            }
+            if ( !l.get( 2 ).equals( "E" ) ) {
+                return false;
+            }
+            if ( !l.get( 3 ).equals( "G" ) ) {
+                return false;
+            }
+            if ( !l.get( 4 ).equals( "C" ) ) {
+                return false;
+            }
+            if ( !l.get( 5 ).equals( "D" ) ) {
+                return false;
+            }
+            if ( !l.get( 6 ).equals( "B" ) ) {
+                return false;
+            }
+            if ( !l.get( 7 ).equals( "F" ) ) {
+                return false;
+            }
+            if ( !l.get( 8 ).equals( "H" ) ) {
+                return false;
+            }
+            final List<String> already_covered = new ArrayList<String>();
+            already_covered.add( "A" );
+            already_covered.add( "X" );
+            already_covered.add( "H" );
+            already_covered.add( "C" );
+            l = ce
+                    .find( phylogenies,
+                           already_covered,
+                           0,
+                           new ExternalNodeBasedCoverageMethodOptions( "org.forester.pccx.BranchCountingBasedScoringMethod" ),
+                           null );
+            if ( !l.get( 0 ).equals( "E" ) ) {
+                return false;
+            }
+            if ( !l.get( 1 ).equals( "D" ) ) {
+                return false;
+            }
+            if ( !l.get( 2 ).equals( "B" ) ) {
+                return false;
+            }
+            if ( !l.get( 3 ).equals( "F" ) ) {
+                return false;
+            }
+            if ( !l.get( 4 ).equals( "G" ) ) {
+                return false;
+            }
+            final String ps2 = "((((A:0.1,B:0.7):0.2,C:1.0):2.0,D:1.7):1.3,((E:0.3,F:0.4):1.1,(G:0.5,H:0.6):1.2):1.4,X:2.0)";
+            final String ps3 = "((((A:0.1,B:0.1):0.2,C:1.0):2.0,D:1.7):1.3,((E:0.3,F:0.4):1.1,(G:0.5,H:0.6):1.2):1.4,X:2.0)";
+            final String ps4 = "((((A:0.1,B:0.05):0.2,C:1.0):2.0,D:1.7):1.3,((E:0.3,F:0.4):1.1,(G:0.5,H:0.6):1.2):1.4,X:2.0)";
+            final Phylogeny p2 = factory.create( ps2, new NHXParser() )[ 0 ];
+            final Phylogeny p3 = factory.create( ps3, new NHXParser() )[ 0 ];
+            final Phylogeny p4 = factory.create( ps4, new NHXParser() )[ 0 ];
+            final List<Phylogeny> phylogenies2 = new ArrayList<Phylogeny>();
+            final List<String> names2 = new ArrayList<String>();
+            phylogenies2.add( p2 );
+            phylogenies2.add( p3 );
+            phylogenies2.add( p4 );
+            names2.add( "A" );
+            names2.add( "A" );
+            final CoverageCalculationOptions options2 = new ExternalNodeBasedCoverageMethodOptions( "org.forester.pccx.BranchLengthBasedScoringMethod" );
+            final CoverageCalculator cc2 = CoverageCalculator.getInstance( new ExternalNodeBasedCoverageMethod(),
+                                                                           options2 );
+            Coverage cov2 = cc2.calculateCoverage( phylogenies2, names2, false );
+            final double nf = 1 / ( 1 / 0.1 + 1 / 0.7 + 1 / 1.0 + 1 / 1.7 + 1 / 0.3 + 1 / 0.4 + 1 / 0.5 + 1 / 0.6 + 1 / 2.0 );
+            if ( !TestPccx.isEqual( cov2.getScore(), ( 1 / 0.1 + ( 1 / 0.8 + 1 / 0.2 + 1 / 0.15 ) / 3 + 1 / 1.3 + 1
+                    / 4.0 + 1 / 6.4 + 1 / 6.5 + 1 / 6.7 + 1 / 6.8 + 1 / 5.6 )
+                    * nf ) ) {
+                return false;
+            }
+            names2.add( "C" );
+            cov2 = cc2.calculateCoverage( phylogenies2, names2, false );
+            if ( !TestPccx.isEqual( cov2.getScore(), ( 1 / 0.1 + ( 1 / 0.8 + 1 / 0.2 + 1 / 0.15 ) / 3 + 1 / 1.0 + 1
+                    / 4.0 + 1 / 6.4 + 1 / 6.5 + 1 / 6.7 + 1 / 6.8 + 1 / 5.6 )
+                    * nf ) ) {
+                return false;
+            }
+            names2.add( "E" );
+            cov2 = cc2.calculateCoverage( phylogenies2, names2, false );
+            if ( !TestPccx.isEqual( cov2.getScore(), ( 1 / 0.1 + ( 1 / 0.8 + 1 / 0.2 + 1 / 0.15 ) / 3 + 1 / 1.0 + +1
+                    / 4.0 + 1 / 0.3 + 1 / 0.7 + 1 / 3.1 + 1 / 3.2 + 1 / 4.8 )
+                    * nf ) ) {
+                return false;
+            }
+            final CoverageCalculationOptions options_log = new ExternalNodeBasedCoverageMethodOptions( "org.forester.pccx.LogBranchLengthBasedScoringMethod" );
+            final CoverageCalculator cclog = CoverageCalculator.getInstance( new ExternalNodeBasedCoverageMethod(),
+                                                                             options_log );
+            final Coverage cov_log = cclog.calculateCoverage( phylogenies2, names2, false );
+            if ( !TestPccx.isEqual( cov_log.getScore(), 0.8534252108361485 ) ) {
+                return false;
+            }
+            final String ps10 = "((((A:0.1,B:0.7):0.2,C:1.0):2.0,D:1.7):1.3,((E:0.3,F:0.4):1.1,(G:0.5,H:0.6):1.2):1.4,((((I:0.1,J:0.7):0.2,K:1.0):2.0,L:1.7):1.3,((M:0.3,N:0.4,O:0.1,P:0.2):1.1,(Q:0.5,R:0.6):1.2):1.4,S:2.0):2.0)";
+            final Phylogeny p10 = factory.create( ps10, new NHXParser() )[ 0 ];
+            final List<Phylogeny> phylogenies10 = new ArrayList<Phylogeny>();
+            final List<String> names10 = new ArrayList<String>();
+            phylogenies10.add( p10 );
+            names10.add( "A" );
+            names10.add( "B" );
+            names10.add( "N" );
+            names10.add( "O" );
+            final CoverageCalculationOptions options10 = new ExternalNodeBasedCoverageMethodOptions( "org.forester.pccx.BranchCountingBasedScoringMethod" );
+            final CoverageCalculator cc10 = CoverageCalculator.getInstance( new ExternalNodeBasedCoverageMethod(),
+                                                                            options10 );
+            cc10.calculateCoverage( phylogenies10, names10, true );
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace( System.out );
+            return false;
+        }
+        return true;
+    }
+}
diff --git a/forester/java/src/org/forester/phylogeny/Edge.java b/forester/java/src/org/forester/phylogeny/Edge.java

new file mode 100644 (file)

index 0000000..234b2c1
--- /dev/null
+++ b/forester/java/src/org/forester/phylogeny/Edge.java
@@ -0,0 +1,45 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// Copyright (C) 2000-2001 Washington University School of Medicine
+// and Howard Hughes Medical Institute
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.phylogeny;
+
+import org.forester.phylogeny.data.PhylogenyData;
+
+/*
+ * @author Christian Zmasek
+ * 
+ * Interface for edges connecting nodes, for example branches in a phylgenetic
+ * network/tree.
+ */
+public interface Edge {
+
+    public PhylogenyData getData();
+
+    public PhylogenyNode getFirstNode();
+
+    public PhylogenyNode getSecondNode();
+}
\ No newline at end of file
diff --git a/forester/java/src/org/forester/phylogeny/Phylogeny.java b/forester/java/src/org/forester/phylogeny/Phylogeny.java

new file mode 100644 (file)

index 0000000..a3277ba
--- /dev/null
+++ b/forester/java/src/org/forester/phylogeny/Phylogeny.java
@@ -0,0 +1,1335 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// Copyright (C) 2000-2001 Washington University School of Medicine
+// and Howard Hughes Medical Institute
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.phylogeny;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.NoSuchElementException;
+import java.util.Vector;
+
+import org.forester.io.writers.PhylogenyWriter;
+import org.forester.phylogeny.data.BranchData;
+import org.forester.phylogeny.data.Confidence;
+import org.forester.phylogeny.data.Identifier;
+import org.forester.phylogeny.data.Sequence;
+import org.forester.phylogeny.data.SequenceRelation;
+import org.forester.phylogeny.data.SequenceRelation.SEQUENCE_RELATION_TYPE;
+import org.forester.phylogeny.iterators.ExternalForwardIterator;
+import org.forester.phylogeny.iterators.LevelOrderTreeIterator;
+import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
+import org.forester.phylogeny.iterators.PostorderTreeIterator;
+import org.forester.phylogeny.iterators.PreorderTreeIterator;
+import org.forester.util.FailedConditionCheckException;
+import org.forester.util.ForesterUtil;
+
+public class Phylogeny {
+
+    public final static boolean                                 ALLOW_MULTIPLE_PARENTS_DEFAULT = false;
+    private PhylogenyNode                                       _root;
+    private boolean                                             _rooted;
+    private boolean                                             _allow_multiple_parents;
+    private String                                              _name;
+    private String                                              _type;
+    private String                                              _description;
+    private String                                              _distance_unit;
+    private Confidence                                          _confidence;
+    private Identifier                                          _identifier;
+    private boolean                                             _rerootable;
+    private HashMap<Integer, PhylogenyNode>                     _idhash;
+    private List<PhylogenyNode>                                 _external_nodes_set;
+    private Collection<Sequence>                                _sequenceRelationQueries;
+    private Collection<SequenceRelation.SEQUENCE_RELATION_TYPE> _relevant_sequence_relation_types;
+
+    /**
+     * Default Phylogeny constructor. Constructs an empty Phylogeny.
+     */
+    public Phylogeny() {
+        init();
+    }
+
+    /**
+     * Adds this Phylogeny to the list of child nodes of PhylogenyNode parent
+     * and sets the parent of this to parent.
+     * 
+     * @param n
+     *            the PhylogenyNode to add
+     */
+    public void addAsChild( final PhylogenyNode parent ) {
+        if ( isEmpty() ) {
+            throw new IllegalArgumentException( "Attempt to add an empty tree." );
+        }
+        if ( !isRooted() ) {
+            throw new IllegalArgumentException( "Attempt to add an unrooted tree." );
+        }
+        parent.addAsChild( getRoot() );
+        externalNodesHaveChanged();
+    }
+
+    public void addAsSibling( final PhylogenyNode sibling ) {
+        if ( isEmpty() ) {
+            throw new IllegalArgumentException( "Attempt to add an empty tree." );
+        }
+        if ( !isRooted() ) {
+            throw new IllegalArgumentException( "Attempt to add an unrooted tree." );
+        }
+        final int sibling_index = sibling.getChildNodeIndex();
+        final PhylogenyNode new_node = new PhylogenyNode();
+        final PhylogenyNode sibling_parent = sibling.getParent();
+        new_node.setChild1( sibling );
+        new_node.setChild2( getRoot() );
+        new_node.setParent( sibling_parent );
+        sibling.setParent( new_node );
+        sibling_parent.setChildNode( sibling_index, new_node );
+        final double new_dist = sibling.getDistanceToParent() == PhylogenyNode.DISTANCE_DEFAULT ? PhylogenyNode.DISTANCE_DEFAULT
+                : sibling.getDistanceToParent() / 2;
+        new_node.setDistanceToParent( new_dist );
+        sibling.setDistanceToParent( new_dist );
+        externalNodesHaveChanged();
+    }
+
+    /**
+     * This calculates the height of the subtree emanating at n for rooted,
+     * tree-shaped phylogenies
+     * 
+     * @param n
+     *            the root-node of a subtree
+     * @return the height of the subtree emanating at n
+     */
+    public double calculateSubtreeHeight( final PhylogenyNode n ) {
+        if ( n.isExternal() || n.isCollapse() ) {
+            return ForesterUtil.isLargerOrEqualToZero( n.getDistanceToParent() );
+        }
+        else {
+            double max = -Double.MAX_VALUE;
+            for( int i = 0; i < n.getNumberOfDescendants(); ++i ) {
+                final double l = calculateSubtreeHeight( n.getChildNode( i ) );
+                if ( l > max ) {
+                    max = l;
+                }
+            }
+            return max + ForesterUtil.isLargerOrEqualToZero( n.getDistanceToParent() );
+        }
+    }
+
+    /**
+     * Returns a deep copy of this Phylogeny.
+     * <p>
+     * (The resulting Phylogeny has its references in the external nodes
+     * corrected, if they are lacking/obsolete in this.)
+     */
+    public Phylogeny copy() {
+        return copy( _root );
+    }
+
+    /**
+     * Returns a shallow copy of this Phylogeny.
+     * <p>
+     * (The resulting Phylogeny has its references in the external nodes
+     * corrected, if they are lacking/obsolete in this.)
+     */
+    public Phylogeny copyShallow() {
+        return copyShallow( _root );
+    }
+
+    public Phylogeny copyShallow( final PhylogenyNode source ) {
+        final Phylogeny tree = new Phylogeny();
+        if ( isEmpty() ) {
+            tree.init();
+            return tree;
+        }
+        tree._rooted = _rooted;
+        tree._name = _name;
+        tree._description = _description;
+        tree._type = _type;
+        tree._rerootable = _rerootable;
+        tree._distance_unit = _distance_unit;
+        tree._confidence = _confidence;
+        tree._identifier = _identifier;
+        tree.setAllowMultipleParents( isAllowMultipleParents() );
+        tree._root = PhylogenyMethods.copySubTreeShallow( source );
+        return tree;
+    }
+
+    /**
+     * Returns a deep copy of this Phylogeny.
+     * <p>
+     * (The resulting Phylogeny has its references in the external nodes
+     * corrected, if they are lacking/obsolete in this.)
+     */
+    public Phylogeny copy( final PhylogenyNode source ) {
+        final Phylogeny tree = new Phylogeny();
+        if ( isEmpty() ) {
+            tree.init();
+            return tree;
+        }
+        tree._rooted = _rooted;
+        tree._name = new String( _name );
+        tree._description = new String( _description );
+        tree._type = new String( _type );
+        tree._rerootable = _rerootable;
+        tree._distance_unit = new String( _distance_unit );
+        if ( _confidence != null ) {
+            tree._confidence = ( Confidence ) _confidence.copy();
+        }
+        if ( _identifier != null ) {
+            tree._identifier = ( Identifier ) _identifier.copy();
+        }
+        tree.setAllowMultipleParents( isAllowMultipleParents() );
+        tree._root = PhylogenyMethods.copySubTree( source );
+        return tree;
+    }
+
+    /**
+     * Need the delete and/or rehash _idhash (not done automatically
+     * to allow client multiple deletions in linear time).
+     * Need to call 'recalculateNumberOfExternalDescendants(boolean)' after this 
+     * if tree is to be displayed.
+     * 
+     * @param remove_us the parent node of the subtree to be deleted
+     */
+    public void deleteSubtree( final PhylogenyNode remove_us, final boolean collapse_resulting_node_with_one_desc ) {
+        if ( isEmpty() ) {
+            return;
+        }
+        if ( remove_us.isRoot() ) {
+            init();
+            return;
+        }
+        if ( !collapse_resulting_node_with_one_desc ) {
+            remove_us.getParent().removeChildNode( remove_us );
+        }
+        else {
+            final PhylogenyNode removed_node = remove_us;
+            final PhylogenyNode p = remove_us.getParent();
+            if ( p.isRoot() ) {
+                if ( p.getNumberOfDescendants() == 2 ) {
+                    if ( removed_node.isFirstChildNode() ) {
+                        setRoot( getRoot().getChildNode( 1 ) );
+                        getRoot().setParent( null );
+                    }
+                    else {
+                        setRoot( getRoot().getChildNode( 0 ) );
+                        getRoot().setParent( null );
+                    }
+                }
+                else {
+                    p.removeChildNode( removed_node.getChildNodeIndex() );
+                }
+            }
+            else {
+                final PhylogenyNode pp = removed_node.getParent().getParent();
+                if ( p.getNumberOfDescendants() == 2 ) {
+                    final int pi = p.getChildNodeIndex();
+                    if ( removed_node.isFirstChildNode() ) {
+                        p.getChildNode( 1 ).setDistanceToParent( PhylogenyMethods.addPhylogenyDistances( p
+                                .getDistanceToParent(), p.getChildNode( 1 ).getDistanceToParent() ) );
+                        pp.setChildNode( pi, p.getChildNode( 1 ) );
+                    }
+                    else {
+                        p.getChildNode( 0 ).setDistanceToParent( PhylogenyMethods.addPhylogenyDistances( p
+                                .getDistanceToParent(), p.getChildNode( 0 ).getDistanceToParent() ) );
+                        pp.setChildNode( pi, p.getChildNode( 0 ) );
+                    }
+                }
+                else {
+                    p.removeChildNode( removed_node.getChildNodeIndex() );
+                }
+            }
+        }
+        remove_us.setParent( null );
+        setIdHash( null );
+        externalNodesHaveChanged();
+    }
+
+    public void externalNodesHaveChanged() {
+        _external_nodes_set = null;
+    }
+
+    public String[] getAllExternalNodeNames() {
+        int i = 0;
+        if ( isEmpty() ) {
+            return null;
+        }
+        final String[] names = new String[ getNumberOfExternalNodes() ];
+        for( final PhylogenyNodeIterator iter = iteratorExternalForward(); iter.hasNext(); ) {
+            names[ i++ ] = new String( iter.next().getName() );
+        }
+        return names;
+    }
+
+    public Confidence getConfidence() {
+        return _confidence;
+    }
+
+    public String getDescription() {
+        return _description;
+    }
+
+    public String getDistanceUnit() {
+        return _distance_unit;
+    }
+
+    /**
+     * 
+     * Warning. The order of the returned nodes is random
+     * -- and hence cannot be relied on.
+     * 
+     * @return Unordered set of PhylogenyNode
+     */
+    public List<PhylogenyNode> getExternalNodes() {
+        if ( _external_nodes_set == null ) {
+            _external_nodes_set = new ArrayList<PhylogenyNode>();
+            for( final PhylogenyNodeIterator it = iteratorPostorder(); it.hasNext(); ) {
+                final PhylogenyNode n = it.next();
+                if ( n.isExternal() ) {
+                    _external_nodes_set.add( n );
+                }
+            }
+        }
+        return _external_nodes_set;
+    }
+
+    /**
+     * Returns the number of duplications of this Phylogeny (int). A return
+     * value of -1 indicates that the number of duplications is unknown.
+     */
+    // public int getNumberOfDuplications() {
+    // return _number_of_duplications;
+    // } // getNumberOfDuplications()
+    /**
+     * Sets the number of duplications of this Phylogeny (int). A value of -1
+     * indicates that the number of duplications is unknown.
+     * 
+     * @param clean_nh
+     *            set to true for clean NH format
+     */
+    // public void setNumberOfDuplications( int i ) {
+    // if ( i < 0 ) {
+    // _number_of_duplications = -1;
+    // }
+    // else {
+    // _number_of_duplications = i;
+    // }
+    // } // setNumberOfDuplications( int )
+    /**
+     * Returns the first external PhylogenyNode.
+     */
+    public PhylogenyNode getFirstExternalNode() {
+        if ( isEmpty() ) {
+            throw new FailedConditionCheckException( "attempt to obtain first external node of empty phylogeney" );
+        }
+        PhylogenyNode node = getRoot();
+        while ( node.isInternal() ) {
+            node = node.getFirstChildNode();
+        }
+        return node;
+    }
+
+    /**
+     * This calculates the height for rooted, tree-shaped phylogenies. The
+     * height is the longest distance from the root to an external node. Please
+     * note. Child nodes of collapsed nodes are ignored -- which is useful for
+     * display purposes but might be misleading for other applications.
+     * 
+     * @return the height for rooted, tree-shaped phylogenies
+     */
+    public double getHeight() {
+        if ( isEmpty() ) {
+            return 0.0;
+        }
+        return calculateSubtreeHeight( getRoot() );
+    }
+
+    public Identifier getIdentifier() {
+        return _identifier;
+    }
+
+    // ---------------------------------------------------------
+    // Modification of Phylogeny topology and Phylogeny appearance
+    // ---------------------------------------------------------
+    private HashMap<Integer, PhylogenyNode> getIdHash() {
+        return _idhash;
+    }
+
+    /**
+     * Returns the name of this Phylogeny.
+     */
+    public String getName() {
+        return _name;
+    }
+
+    /**
+     * Finds the PhylogenyNode of this Phylogeny which has a matching ID number.
+     * Takes O(n) time. After method hashIDs() has been called it runs in
+     * constant time.
+     * 
+     * @param id
+     *            ID number (int) of the PhylogenyNode to find
+     * @return PhylogenyNode with matching ID, null if not found
+     */
+    public PhylogenyNode getNode( final int id ) throws NoSuchElementException {
+        if ( isEmpty() ) {
+            throw new NoSuchElementException( "attempt to get node in an empty phylogeny" );
+        }
+        if ( _idhash != null ) {
+            return _idhash.get( id );
+        }
+        else {
+            for( final PhylogenyNodeIterator iter = iteratorPreorder(); iter.hasNext(); ) {
+                final PhylogenyNode node = iter.next();
+                if ( node.getId() == id ) {
+                    return node;
+                }
+            }
+        }
+        return null;
+    }
+
+    /**
+     * Returns a PhylogenyNode of this Phylogeny which has a matching name.
+     * Throws an Exception if seqname is not present in this or not unique.
+     * 
+     * @param name
+     *            name (String) of PhylogenyNode to find
+     * @return PhylogenyNode with matchin name
+     */
+    public PhylogenyNode getNode( final String name ) {
+        if ( isEmpty() ) {
+            return null;
+        }
+        final List<PhylogenyNode> nodes = getNodes( name );
+        if ( ( nodes == null ) || ( nodes.size() < 1 ) ) {
+            throw new IllegalArgumentException( "node named [" + name + "] not found" );
+        }
+        if ( nodes.size() > 1 ) {
+            throw new IllegalArgumentException( "node named [" + name + "] not unique" );
+        }
+        return nodes.get( 0 );
+    }
+
+    /**
+     * Return Node by TaxonomyId Olivier CHABROL :
+     * olivier.chabrol@univ-provence.fr
+     * 
+     * @param taxonomyID
+     *            search taxonomy identifier
+     * @param nodes
+     *            sublist node to search
+     * @return List node with the same taxonomy identifier
+     */
+    private List<PhylogenyNode> getNodeByTaxonomyID( final String taxonomyID, final List<PhylogenyNode> nodes ) {
+        final List<PhylogenyNode> retour = new ArrayList<PhylogenyNode>();
+        for( final PhylogenyNode node : nodes ) {
+            if ( taxonomyID.equals( PhylogenyMethods.getTaxonomyIdentifier( node ) ) ) {
+                retour.add( node );
+            }
+        }
+        return retour;
+    }
+
+    /**
+     * Returns a List with references to all Nodes of this Phylogeny which have
+     * a matching name.
+     * 
+     * @param name
+     *            name (String) of Nodes to find
+     * @return Vector of references to Nodes of this Phylogeny with matching
+     *         names
+     * @see #getNodesWithMatchingSpecies(String)
+     */
+    public List<PhylogenyNode> getNodes( final String name ) {
+        if ( isEmpty() ) {
+            return null;
+        }
+        final List<PhylogenyNode> nodes = new ArrayList<PhylogenyNode>();
+        for( final PhylogenyNodeIterator iter = iteratorPreorder(); iter.hasNext(); ) {
+            final PhylogenyNode n = iter.next();
+            if ( n.getName().equals( name ) ) {
+                nodes.add( n );
+            }
+        }
+        return nodes;
+    }
+
+    public List<PhylogenyNode> getNodesViaSequenceName( final String seq_name ) {
+        if ( isEmpty() ) {
+            return null;
+        }
+        final List<PhylogenyNode> nodes = new ArrayList<PhylogenyNode>();
+        for( final PhylogenyNodeIterator iter = iteratorPreorder(); iter.hasNext(); ) {
+            final PhylogenyNode n = iter.next();
+            if ( n.getNodeData().isHasSequence() && n.getNodeData().getSequence().getName().equals( seq_name ) ) {
+                nodes.add( n );
+            }
+        }
+        return nodes;
+    }
+
+    public List<PhylogenyNode> getNodesViaTaxonomyCode( final String taxonomy_code ) {
+        if ( isEmpty() ) {
+            return null;
+        }
+        final List<PhylogenyNode> nodes = new ArrayList<PhylogenyNode>();
+        for( final PhylogenyNodeIterator iter = iteratorPreorder(); iter.hasNext(); ) {
+            final PhylogenyNode n = iter.next();
+            if ( n.getNodeData().isHasTaxonomy()
+                    && n.getNodeData().getTaxonomy().getTaxonomyCode().equals( taxonomy_code ) ) {
+                nodes.add( n );
+            }
+        }
+        return nodes;
+    }
+
+    /**
+     * Returns a Vector with references to all Nodes of this Phylogeny which
+     * have a matching species name.
+     * 
+     * @param specname
+     *            species name (String) of Nodes to find
+     * @return Vector of references to Nodes of this Phylogeny with matching
+     *         species names.
+     * @see #getNodes(String)
+     */
+    public List<PhylogenyNode> getNodesWithMatchingSpecies( final String specname ) {
+        if ( isEmpty() ) {
+            return null;
+        }
+        final List<PhylogenyNode> nodes = new ArrayList<PhylogenyNode>();
+        for( final PhylogenyNodeIterator iter = iteratorPreorder(); iter.hasNext(); ) {
+            final PhylogenyNode n = iter.next();
+            if ( PhylogenyMethods.getSpecies( n ).equals( specname ) ) {
+                nodes.add( n );
+            }
+        }
+        return nodes;
+    }
+
+    public PhylogenyNode getNodeViaSequenceName( final String seq_name ) {
+        if ( isEmpty() ) {
+            return null;
+        }
+        final List<PhylogenyNode> nodes = getNodesViaSequenceName( seq_name );
+        if ( ( nodes == null ) || ( nodes.size() < 1 ) ) {
+            throw new IllegalArgumentException( "node with sequence named [" + seq_name + "] not found" );
+        }
+        if ( nodes.size() > 1 ) {
+            throw new IllegalArgumentException( "node with sequence named [" + seq_name + "] not unique" );
+        }
+        return nodes.get( 0 );
+    }
+
+    public PhylogenyNode getNodeViaTaxonomyCode( final String taxonomy_code ) {
+        if ( isEmpty() ) {
+            return null;
+        }
+        final List<PhylogenyNode> nodes = getNodesViaTaxonomyCode( taxonomy_code );
+        if ( ( nodes == null ) || ( nodes.size() < 1 ) ) {
+            throw new IllegalArgumentException( "node with taxonomy code \"" + taxonomy_code + "\" not found" );
+        }
+        if ( nodes.size() > 1 ) {
+            throw new IllegalArgumentException( "node with taxonomy code \"" + taxonomy_code + "\" not unique" );
+        }
+        return nodes.get( 0 );
+    }
+
+    public int getNumberOfBranches() {
+        if ( isEmpty() ) {
+            return 0;
+        }
+        int c = 0;
+        for( final PhylogenyNodeIterator iter = iteratorPreorder(); iter.hasNext(); iter.next() ) {
+            ++c;
+        }
+        if ( !isRooted() ) {
+            --c;
+        }
+        return c;
+    }
+
+    /**
+     * Returns the sum of external Nodes of this Phylogeny (int).
+     */
+    public int getNumberOfExternalNodes() {
+        if ( isEmpty() ) {
+            return 0;
+        }
+        return getExternalNodes().size();
+    }
+
+    /**
+     * Returns all paralogs of the external PhylogenyNode n of this Phylogeny.
+     * paralog are returned as List of node references.
+     * <p>
+     * PRECONDITION: This tree must be binary and rooted, and speciation -
+     * duplication need to be assigned for each of its internal Nodes.
+     * <p>
+     * Returns null if this Phylogeny is empty or if n is internal.
+     * <p>
+     * (Last modified: 11/22/00) Olivier CHABROL :
+     * olivier.chabrol@univ-provence.fr
+     * 
+     * @param n
+     *            external PhylogenyNode whose orthologs are to be returned
+     * @return Vector of references to all orthologous Nodes of PhylogenyNode n
+     *         of this Phylogeny, null if this Phylogeny is empty or if n is
+     *         internal
+     */
+    public List<PhylogenyNode> getParalogousNodes( final PhylogenyNode n, final String[] taxonomyCodeRange ) {
+        PhylogenyNode node = n;
+        PhylogenyNode prev = null;
+        final List<PhylogenyNode> v = new ArrayList<PhylogenyNode>();
+        final Map<PhylogenyNode, List<String>> map = new HashMap<PhylogenyNode, List<String>>();
+        getTaxonomyMap( getRoot(), map );
+        if ( !node.isExternal() || isEmpty() ) {
+            return null;
+        }
+        final String searchNodeSpeciesId = PhylogenyMethods.getTaxonomyIdentifier( n );
+        if ( !node.isExternal() || isEmpty() ) {
+            return null;
+        }
+        List<String> taxIdList = null;
+        final List<String> taxonomyCodeRangeList = Arrays.asList( taxonomyCodeRange );
+        while ( !node.isRoot() ) {
+            prev = node;
+            node = node.getParent();
+            taxIdList = map.get( node );
+            if ( node.isDuplication() && isContains( taxIdList, taxonomyCodeRangeList ) ) {
+                if ( node.getChildNode1() == prev ) {
+                    v.addAll( getNodeByTaxonomyID( searchNodeSpeciesId, node.getChildNode2()
+                            .getAllExternalDescendants() ) );
+                }
+                else {
+                    v.addAll( getNodeByTaxonomyID( searchNodeSpeciesId, node.getChildNode1()
+                            .getAllExternalDescendants() ) );
+                }
+            }
+        }
+        return v;
+    }
+
+    public Collection<SequenceRelation.SEQUENCE_RELATION_TYPE> getRelevantSequenceRelationTypes() {
+        if ( _relevant_sequence_relation_types == null ) {
+            _relevant_sequence_relation_types = new Vector<SEQUENCE_RELATION_TYPE>();
+        }
+        return _relevant_sequence_relation_types;
+    }
+
+    /**
+     * Returns the root PhylogenyNode of this Phylogeny.
+     */
+    public PhylogenyNode getRoot() {
+        return _root;
+    }
+
+    public Collection<Sequence> getSequenceRelationQueries() {
+        return _sequenceRelationQueries;
+    }
+
+    /**
+     * List all species contains in all leaf under a node Olivier CHABROL :
+     * olivier.chabrol@univ-provence.fr
+     * 
+     * @param node
+     *            PhylogenyNode whose sub node species are returned
+     * @return species contains in all leaf under the param node
+     */
+    private List<String> getSubNodeTaxonomy( final PhylogenyNode node ) {
+        final List<String> taxonomyList = new ArrayList<String>();
+        final List<PhylogenyNode> childs = node.getAllExternalDescendants();
+        String speciesId = null;
+        for( final PhylogenyNode phylogenyNode : childs ) {
+            // taxId = new Long(phylogenyNode.getTaxonomyID());
+            speciesId = PhylogenyMethods.getTaxonomyIdentifier( phylogenyNode );
+            if ( !taxonomyList.contains( speciesId ) ) {
+                taxonomyList.add( speciesId );
+            }
+        }
+        return taxonomyList;
+    }
+
+    /**
+     * Create a map [<PhylogenyNode, List<String>], the list contains the
+     * species contains in all leaf under phylogeny node Olivier CHABROL :
+     * olivier.chabrol@univ-provence.fr
+     * 
+     * @param node
+     *            the tree root node
+     * @param map
+     *            map to fill
+     */
+    private void getTaxonomyMap( final PhylogenyNode node, final Map<PhylogenyNode, List<String>> map ) {
+        // node is leaf
+        if ( node.isExternal() ) {
+            return;
+        }
+        map.put( node, getSubNodeTaxonomy( node ) );
+        getTaxonomyMap( node.getChildNode1(), map );
+        getTaxonomyMap( node.getChildNode2(), map );
+    }
+
+    public String getType() {
+        return _type;
+    }
+
+    /**
+     * Hashes the ID number of each PhylogenyNode of this Phylogeny to its
+     * corresponding PhylogenyNode, in order to make method getNode( id ) run in
+     * constant time. Important: The user is responsible for calling this method
+     * (again) after this Phylogeny has been changed/created/renumbered.
+     */
+    public void hashIDs() {
+        if ( isEmpty() ) {
+            return;
+        }
+        setIdHash( new HashMap<Integer, PhylogenyNode>() );
+        for( final PhylogenyNodeIterator iter = iteratorPreorder(); iter.hasNext(); ) {
+            final PhylogenyNode node = iter.next();
+            getIdHash().put( node.getId(), node );
+        }
+    }
+
+    /**
+     * Deletes this Phylogeny.
+     */
+    public void init() {
+        _root = null;
+        _rooted = false;
+        _name = "";
+        _description = "";
+        _type = "";
+        _distance_unit = "";
+        _idhash = null;
+        _confidence = null;
+        _identifier = null;
+        _rerootable = true;
+        setAllowMultipleParents( Phylogeny.ALLOW_MULTIPLE_PARENTS_DEFAULT );
+    }
+
+    private boolean isAllowMultipleParents() {
+        return _allow_multiple_parents;
+    }
+
+    /**
+     * Returns whether this is a completely binary tree (i.e. all internal nodes
+     * are bifurcations).
+     * 
+     */
+    public boolean isCompletelyBinary() {
+        if ( isEmpty() ) {
+            return false;
+        }
+        for( final PhylogenyNodeIterator iter = iteratorPreorder(); iter.hasNext(); ) {
+            final PhylogenyNode node = iter.next();
+            if ( node.isInternal() && ( node.getNumberOfDescendants() != 2 ) ) {
+                return false;
+            }
+        }
+        return true;
+    }
+
+    /**
+     * Util method to check if all element of a list is contains in the
+     * rangeList. Olivier CHABROL : olivier.chabrol@univ-provence.fr
+     * 
+     * @param list
+     *            list to be check
+     * @param rangeList
+     *            the range list to compare
+     * @return <code>true</code> if all param list element are contains in param
+     *         rangeList, <code>false</code> otherwise.
+     */
+    private boolean isContains( final List<String> list, final List<String> rangeList ) {
+        if ( list.size() > rangeList.size() ) {
+            return false;
+        }
+        String l = null;
+        for( final Iterator<String> iterator = list.iterator(); iterator.hasNext(); ) {
+            l = iterator.next();
+            if ( !rangeList.contains( l ) ) {
+                return false;
+            }
+        }
+        return true;
+    }
+
+    /**
+     * Checks whether a Phylogeny object is deleted (or empty).
+     * 
+     * @return true if the tree is deleted (or empty), false otherwise
+     */
+    public boolean isEmpty() {
+        return ( getRoot() == null );
+    }
+
+    public boolean isRerootable() {
+        return _rerootable;
+    }
+
+    /**
+     * Returns true is this Phylogeny is rooted.
+     */
+    public boolean isRooted() {
+        return _rooted;
+    } // isRooted()
+
+    public boolean isTree() {
+        return true;
+    }
+
+    public PhylogenyNodeIterator iteratorExternalForward() {
+        return new ExternalForwardIterator( this );
+    }
+
+    public PhylogenyNodeIterator iteratorLevelOrder() {
+        return new LevelOrderTreeIterator( this );
+    }
+
+    public PhylogenyNodeIterator iteratorPostorder() {
+        return new PostorderTreeIterator( this );
+    }
+
+    public PhylogenyNodeIterator iteratorPreorder() {
+        return new PreorderTreeIterator( this );
+    }
+
+    /**
+     * Resets the ID numbers of the nodes of this Phylogeny in level order,
+     * starting with start_label (for the root). <br>
+     * WARNING. After this method has been called, node IDs are no longer
+     * unique. 
+     */
+    public void levelOrderReID() {
+        if ( isEmpty() ) {
+            return;
+        }
+        _idhash = null;
+        int max = 0;
+        for( final PhylogenyNodeIterator it = iteratorPreorder(); it.hasNext(); ) {
+            final PhylogenyNode node = it.next();
+            if ( node.isRoot() ) {
+                node.setId( PhylogenyNode.getNodeCount() );
+            }
+            else {
+                node.setId( node.getParent().getId() + 1 );
+                if ( node.getId() > max ) {
+                    max = node.getId();
+                }
+            }
+        }
+        PhylogenyNode.setNodeCount( max + 1 );
+    }
+
+    /**
+     * Arranges the order of childern for each node of this Phylogeny in such a
+     * way that either the branch with more children is on top (right) or on
+     * bottom (left), dependent on the value of boolean order.
+     * 
+     * @param order
+     *            decides in which direction to order
+     */
+    public void orderAppearance( final boolean order ) throws RuntimeException {
+        if ( !isTree() ) {
+            throw new FailedConditionCheckException( "Attempt to order appearance on phylogeny which is not tree-like." );
+        }
+        if ( isEmpty() ) {
+            return;
+        }
+        orderAppearanceHelper( getRoot(), order );
+    }
+
+    // Helper method for "orderAppearance(boolean)".
+    // Traverses this Phylogeny recusively.
+    private void orderAppearanceHelper( final PhylogenyNode n, final boolean order ) {
+        if ( n.isExternal() ) {
+            return;
+        }
+        else {
+            PhylogenyNode temp = null;
+            // FIXME
+            if ( ( n.getNumberOfDescendants() == 2 )
+                    && ( n.getChildNode1().getNumberOfExternalNodes() != n.getChildNode2().getNumberOfExternalNodes() )
+                    && ( ( n.getChildNode1().getNumberOfExternalNodes() < n.getChildNode2().getNumberOfExternalNodes() ) == order ) ) {
+                temp = n.getChildNode1();
+                n.setChild1( n.getChildNode2() );
+                n.setChild2( temp );
+            }
+            for( int i = 0; i < n.getNumberOfDescendants(); ++i ) {
+                orderAppearanceHelper( n.getChildNode( i ), order );
+            }
+        }
+    }
+
+    public void preOrderReId() {
+        if ( isEmpty() ) {
+            return;
+        }
+        setIdHash( null );
+        int i = PhylogenyNode.getNodeCount();
+        for( final PhylogenyNodeIterator it = iteratorPreorder(); it.hasNext(); ) {
+            it.next().setId( i++ );
+        }
+        PhylogenyNode.setNodeCount( i );
+    }
+
+    /**
+     * Prints descriptions of all external Nodes of this Phylogeny to
+     * System.out.
+     */
+    public void printExtNodes() {
+        if ( isEmpty() ) {
+            return;
+        }
+        for( final PhylogenyNodeIterator iter = iteratorExternalForward(); iter.hasNext(); ) {
+            System.out.println( iter.next() + "\n" );
+        }
+    }
+
+    /**
+     * (Re)counts the number of children for each PhylogenyNode of this
+     * Phylogeny. As an example, this method needs to be called after a
+     * Phylogeny has been reRooted and it is to be displayed.
+     * 
+     * @param consider_collapsed_nodes
+     *            set to true to take into account collapsed nodes (collapsed
+     *            nodes have 1 child).
+     */
+    public void recalculateNumberOfExternalDescendants( final boolean consider_collapsed_nodes ) {
+        if ( isEmpty() ) {
+            return;
+        }
+        for( final PhylogenyNodeIterator iter = iteratorPostorder(); iter.hasNext(); ) {
+            final PhylogenyNode node = iter.next();
+            if ( node.isExternal() || ( consider_collapsed_nodes && node.isCollapse() ) ) {
+                node.setSumExtNodes( 1 );
+            }
+            else {
+                int sum = 0;
+                for( int i = 0; i < node.getNumberOfDescendants(); ++i ) {
+                    sum += node.getChildNode( i ).getNumberOfExternalNodes();
+                }
+                node.setSumExtNodes( sum );
+            }
+        }
+    }
+
+    /**
+     * Places the root of this Phylogeny on the parent branch of the
+     * PhylogenyNode with a corresponding ID. The new root is always placed on
+     * the middle of the branch. If the resulting reRooted Phylogeny is to be
+     * used any further, in most cases the following methods have to be called
+     * on the resulting Phylogeny:
+     * <p>
+     * <li>recalculateNumberOfExternalDescendants(boolean)
+     * <li>recalculateAndReset()
+     * 
+     * @param id
+     *            ID (int) of PhylogenyNode of this Phylogeny
+     */
+    public void reRoot( final int id ) {
+        reRoot( getNode( id ) );
+    }
+
+    /**
+     * Places the root of this Phylogeny on Branch b. The new root is always
+     * placed on the middle of the branch b.
+     * 
+     */
+    public void reRoot( final PhylogenyBranch b ) {
+        final PhylogenyNode n1 = b.getFirstNode();
+        final PhylogenyNode n2 = b.getSecondNode();
+        if ( n1.isExternal() ) {
+            reRoot( n1 );
+        }
+        else if ( n2.isExternal() ) {
+            reRoot( n2 );
+        }
+        else if ( ( n2 == n1.getChildNode1() ) || ( n2 == n1.getChildNode2() ) ) {
+            reRoot( n2 );
+        }
+        else if ( ( n1 == n2.getChildNode1() ) || ( n1 == n2.getChildNode2() ) ) {
+            reRoot( n1 );
+        }
+        else if ( ( n1.getParent() != null ) && n1.getParent().isRoot()
+                && ( ( n1.getParent().getChildNode1() == n2 ) || ( n1.getParent().getChildNode2() == n2 ) ) ) {
+            reRoot( n1 );
+        }
+        else {
+            throw new IllegalArgumentException( "reRoot( Branch b ): b is not a branch." );
+        }
+    }
+
+    /**
+     * Places the root of this Phylogeny on the parent branch PhylogenyNode n.
+     * The new root is always placed on the middle of the branch.
+     * <p>
+     * If the resulting reRooted Phylogeny is to be used any further, in most
+     * cases the following three methods have to be called on the resulting
+     * Phylogeny:
+     * <ul>
+     * <li>recalculateNumberOfExternalDescendants(boolean) <li>recalculateAndReset()
+     * </ul>
+     * <p>
+     * (Last modified: 10/01/01)
+     * 
+     * @param n
+     *            PhylogenyNode of this Phylogeny\
+     */
+    public void reRoot( final PhylogenyNode n ) {
+        reRoot( n, -1 );
+    }
+
+    public void reRoot( final PhylogenyNode n, final double distance_n_to_parent ) {
+        if ( isEmpty() || ( getNumberOfExternalNodes() < 2 ) ) {
+            return;
+        }
+        setRooted( true );
+        if ( n.isRoot() ) {
+            return;
+        }
+        else if ( n.getParent().isRoot() ) {
+            if ( ( n.getParent().getNumberOfDescendants() == 2 ) && ( distance_n_to_parent >= 0 ) ) {
+                final double d = n.getParent().getChildNode1().getDistanceToParent()
+                        + n.getParent().getChildNode2().getDistanceToParent();
+                PhylogenyNode other;
+                if ( n.getChildNodeIndex() == 0 ) {
+                    other = n.getParent().getChildNode2();
+                }
+                else {
+                    other = n.getParent().getChildNode1();
+                }
+                n.setDistanceToParent( distance_n_to_parent );
+                final double dm = d - distance_n_to_parent;
+                if ( dm >= 0 ) {
+                    other.setDistanceToParent( dm );
+                }
+                else {
+                    other.setDistanceToParent( 0 );
+                }
+            }
+            if ( n.getParent().getNumberOfDescendants() > 2 ) {
+                final int index = n.getChildNodeIndex();
+                final double dn = n.getDistanceToParent();
+                final PhylogenyNode prev_root = getRoot();
+                prev_root.getDescendants().remove( index );
+                final PhylogenyNode new_root = new PhylogenyNode();
+                new_root.setChildNode( 0, n );
+                new_root.setChildNode( 1, prev_root );
+                if ( n.getBranchDataDirectly() != null ) {
+                    prev_root.setBranchData( ( BranchData ) n.getBranchDataDirectly().copy() );
+                }
+                setRoot( new_root );
+                if ( distance_n_to_parent >= 0 ) {
+                    n.setDistanceToParent( distance_n_to_parent );
+                    final double d = dn - distance_n_to_parent;
+                    if ( d >= 0 ) {
+                        prev_root.setDistanceToParent( d );
+                    }
+                    else {
+                        prev_root.setDistanceToParent( 0 );
+                    }
+                }
+                else {
+                    if ( dn >= 0 ) {
+                        final double d = dn / 2.0;
+                        n.setDistanceToParent( d );
+                        prev_root.setDistanceToParent( d );
+                    }
+                }
+            }
+        }
+        else {
+            PhylogenyNode a = n;
+            PhylogenyNode b = null;
+            PhylogenyNode c = null;
+            final PhylogenyNode new_root = new PhylogenyNode();
+            double distance1 = 0.0;
+            double distance2 = 0.0;
+            BranchData branch_data_1 = null;
+            BranchData branch_data_2 = null;
+            b = a.getParent();
+            c = b.getParent();
+            new_root.setChildNode( 0, a );
+            new_root.setChildNode( 1, b );
+            distance1 = c.getDistanceToParent();
+            if ( c.getBranchDataDirectly() != null ) {
+                branch_data_1 = ( BranchData ) c.getBranchDataDirectly().copy();
+            }
+            c.setDistanceToParent( b.getDistanceToParent() );
+            if ( b.getBranchDataDirectly() != null ) {
+                c.setBranchData( ( BranchData ) b.getBranchDataDirectly().copy() );
+            }
+            if ( a.getBranchDataDirectly() != null ) {
+                b.setBranchData( ( BranchData ) a.getBranchDataDirectly().copy() );
+            }
+            // New root is always placed in the middle of the branch:
+            if ( a.getDistanceToParent() == PhylogenyNode.DISTANCE_DEFAULT ) {
+                b.setDistanceToParent( PhylogenyNode.DISTANCE_DEFAULT );
+            }
+            else {
+                if ( distance_n_to_parent >= 0.0 ) {
+                    final double diff = a.getDistanceToParent() - distance_n_to_parent;
+                    a.setDistanceToParent( distance_n_to_parent );
+                    b.setDistanceToParent( diff >= 0.0 ? diff : 0.0 );
+                }
+                else {
+                    final double d = a.getDistanceToParent() / 2.0;
+                    a.setDistanceToParent( d );
+                    b.setDistanceToParent( d );
+                }
+            }
+            b.setChildNodeOnly( a.getChildNodeIndex( b ), c );
+            // moving to the old root, swapping references:
+            while ( !c.isRoot() ) {
+                a = b;
+                b = c;
+                c = c.getParent();
+                b.setChildNodeOnly( a.getChildNodeIndex( b ), c );
+                b.setParent( a );
+                distance2 = c.getDistanceToParent();
+                branch_data_2 = c.getBranchDataDirectly();
+                c.setDistanceToParent( distance1 );
+                c.setBranchData( branch_data_1 );
+                distance1 = distance2;
+                branch_data_1 = branch_data_2;
+            }
+            // removing the old root:
+            if ( c.getNumberOfDescendants() == 2 ) {
+                final PhylogenyNode node = c.getChildNode( 1 - b.getChildNodeIndex( c ) );
+                node.setParent( b );
+                if ( ( c.getDistanceToParent() == PhylogenyNode.DISTANCE_DEFAULT )
+                        && ( node.getDistanceToParent() == PhylogenyNode.DISTANCE_DEFAULT ) ) {
+                    node.setDistanceToParent( PhylogenyNode.DISTANCE_DEFAULT );
+                }
+                else {
+                    node.setDistanceToParent( ( c.getDistanceToParent() >= 0.0 ? c.getDistanceToParent() : 0.0 )
+                            + ( node.getDistanceToParent() >= 0.0 ? node.getDistanceToParent() : 0.0 ) );
+                }
+                if ( c.getBranchDataDirectly() != null ) {
+                    node.setBranchData( ( BranchData ) c.getBranchDataDirectly().copy() );
+                }
+                for( int i = 0; i < b.getNumberOfDescendants(); ++i ) {
+                    if ( b.getChildNode( i ) == c ) {
+                        b.setChildNodeOnly( i, node );
+                        break;
+                    }
+                }
+            }
+            else {
+                c.setParent( b );
+                c.removeChildNode( b.getChildNodeIndex( c ) );
+            }
+            setRoot( new_root );
+        }
+    }
+
+    /**
+     * Sets all Nodes of this Phylogeny to not-collapsed.
+     * <p>
+     * In most cases methods adjustNodeCount(false) and recalculateAndReset()
+     * need to be called after this method has been called.
+     */
+    public void setAllNodesToNotCollapse() {
+        if ( isEmpty() ) {
+            return;
+        }
+        for( final PhylogenyNodeIterator iter = iteratorPreorder(); iter.hasNext(); ) {
+            final PhylogenyNode node = iter.next();
+            node.setCollapse( false );
+        }
+    }
+
+    private void setAllowMultipleParents( final boolean allow_multiple_parents ) {
+        _allow_multiple_parents = allow_multiple_parents;
+    }
+
+    public void setConfidence( final Confidence confidence ) {
+        _confidence = confidence;
+    }
+
+    public void setDescription( final String description ) {
+        _description = description;
+    }
+
+    public void setDistanceUnit( final String _distance_unit ) {
+        this._distance_unit = _distance_unit;
+    }
+
+    public void setIdentifier( final Identifier identifier ) {
+        _identifier = identifier;
+    }
+
+    void setIdHash( final HashMap<Integer, PhylogenyNode> idhash ) {
+        _idhash = idhash;
+    }
+
+    /**
+     * Sets the indicators of all Nodes of this Phylogeny to 0.
+     */
+    public void setIndicatorsToZero() {
+        if ( isEmpty() ) {
+            return;
+        }
+        for( final PhylogenyNodeIterator iter = iteratorPreorder(); iter.hasNext(); ) {
+            iter.next().setIndicator( ( byte ) 0 );
+        }
+    } // setIndicatorsToZero()
+
+    /**
+     * Sets the name of this Phylogeny to s.
+     */
+    public void setName( final String s ) {
+        _name = s;
+    }
+
+    public void setRelevantSequenceRelationTypes( final Collection<SequenceRelation.SEQUENCE_RELATION_TYPE> types ) {
+        _relevant_sequence_relation_types = types;
+    }
+
+    public void setRerootable( final boolean rerootable ) {
+        _rerootable = rerootable;
+    }
+
+    public void setRoot( final PhylogenyNode n ) {
+        _root = n;
+    } // setRoot( PhylogenyNode )
+
+    /**
+     * Sets whether this Phylogeny is rooted or not.
+     */
+    public void setRooted( final boolean b ) {
+        _rooted = b;
+    } // setRooted( boolean )
+
+    public void setSequenceRelationQueries( final Collection<Sequence> sequencesByName ) {
+        _sequenceRelationQueries = sequencesByName;
+    }
+
+    public void setType( final String type ) {
+        _type = type;
+    }
+
+    /**
+     * Swaps the the two childern of a PhylogenyNode node of this Phylogeny.
+     * <p>
+     * (Last modified: 06/13/01)
+     * 
+     * @param node
+     *            a PhylogenyNode of this Phylogeny
+     */
+    public void swapChildren( final PhylogenyNode node ) throws RuntimeException {
+        if ( !isTree() ) {
+            throw new FailedConditionCheckException( "Attempt to swap children on phylogeny which is not tree-like." );
+        }
+        if ( isEmpty() || node.isExternal() || ( node.getNumberOfDescendants() < 2 ) ) {
+            return;
+        }
+        final PhylogenyNode first = node.getFirstChildNode();
+        for( int i = 1; i < node.getNumberOfDescendants(); ++i ) {
+            node.setChildNode( i - 1, node.getChildNode( i ) );
+        }
+        node.setChildNode( node.getNumberOfDescendants() - 1, first );
+    } // swapChildren( PhylogenyNode )
+
+    public String toNewHampshire() {
+        return toNewHampshire( false );
+    }
+
+    public String toNewHampshire( final boolean simple_nh ) {
+        try {
+            return new PhylogenyWriter().toNewHampshire( this, simple_nh, true ).toString();
+        }
+        catch ( final IOException e ) {
+            throw new Error( "this should not have happend: " + e.getMessage() );
+        }
+    }
+
+    public String toNewHampshireX() {
+        try {
+            return new PhylogenyWriter().toNewHampshireX( this ).toString();
+        }
+        catch ( final IOException e ) {
+            throw new Error( "this should not have happend: " + e.getMessage() );
+        }
+    }
+
+    public String toNexus() {
+        try {
+            return new PhylogenyWriter().toNexus( this ).toString();
+        }
+        catch ( final IOException e ) {
+            throw new Error( "this should not have happend: " + e.getMessage() );
+        }
+    }
+
+    public String toPhyloXML( final int phyloxml_level ) {
+        try {
+            return new PhylogenyWriter().toPhyloXML( this, phyloxml_level ).toString();
+        }
+        catch ( final IOException e ) {
+            throw new Error( "this should not have happend: " + e.getMessage() );
+        }
+    }
+
+    // ---------------------------------------------------------
+    // Writing of Phylogeny to Strings
+    // ---------------------------------------------------------
+    /**
+     * Converts this Phylogeny to a New Hampshire X (String) representation.
+     * 
+     * @return New Hampshire X (String) representation of this
+     * @see #toNewHampshireX()
+     */
+    @Override
+    public String toString() {
+        return toNewHampshireX();
+    }
+
+    /**
+     * Removes the root PhylogenyNode this Phylogeny.
+     */
+    public void unRoot() throws RuntimeException {
+        if ( !isTree() ) {
+            throw new FailedConditionCheckException( "Attempt to unroot a phylogeny which is not tree-like." );
+        }
+        if ( isEmpty() ) {
+            return;
+        }
+        setIndicatorsToZero();
+        if ( !isRooted() || ( getNumberOfExternalNodes() <= 1 ) ) {
+            return;
+        }
+        setRooted( false );
+        return;
+    } // unRoot()
+}
diff --git a/forester/java/src/org/forester/phylogeny/PhylogenyBranch.java b/forester/java/src/org/forester/phylogeny/PhylogenyBranch.java

new file mode 100644 (file)

index 0000000..214337f
--- /dev/null
+++ b/forester/java/src/org/forester/phylogeny/PhylogenyBranch.java
@@ -0,0 +1,168 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// Copyright (C) 2000-2001 Washington University School of Medicine
+// and Howard Hughes Medical Institute
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.phylogeny;
+
+import org.forester.phylogeny.data.PhylogenyData;
+
+/*
+ * @author Christian M. Zmasek
+ */
+public class PhylogenyBranch implements Edge {
+
+    private final PhylogenyNode _node_1;
+    private final PhylogenyNode _node_2;
+    private PhylogenyData       _data;
+    private final boolean       _is_directed;
+    private boolean             _towards_1;
+
+    public PhylogenyBranch( final PhylogenyNode first_node, final PhylogenyNode second_node ) {
+        if ( ( first_node == null ) || ( second_node == null ) ) {
+            throw new IllegalArgumentException( "Attempt to create a branch with a null node" );
+        }
+        _node_1 = first_node;
+        _node_2 = second_node;
+        _is_directed = false;
+    }
+
+    public PhylogenyBranch( final PhylogenyNode first_node,
+                            final PhylogenyNode second_node,
+                            final boolean direction_towards_first ) {
+        if ( ( first_node == null ) || ( second_node == null ) ) {
+            throw new IllegalArgumentException( "Attempt to create a branch with a null node" );
+        }
+        _node_1 = first_node;
+        _node_2 = second_node;
+        _is_directed = true;
+        _towards_1 = direction_towards_first;
+    }
+
+    @Override
+    public boolean equals( final Object obj ) {
+        if ( this == obj ) {
+            return true;
+        }
+        if ( obj == null ) {
+            return false;
+        }
+        if ( getClass() != obj.getClass() ) {
+            return false;
+        }
+        final PhylogenyBranch other = ( PhylogenyBranch ) obj;
+        return hashCode() == other.hashCode();
+    }
+
+    public PhylogenyNode getConnectedNode( final PhylogenyNode node ) throws IllegalArgumentException {
+        if ( node == _node_1 ) {
+            return _node_2;
+        }
+        else if ( node == _node_2 ) {
+            return _node_1;
+        }
+        else {
+            throw new IllegalArgumentException( "Attempt to get " + "connected node on branch with node which is "
+                    + "not connected by the branch" );
+        }
+    }
+
+    public PhylogenyData getData() {
+        return _data;
+    }
+
+    public PhylogenyNode getFirstNode() {
+        return _node_1;
+    }
+
+    public PhylogenyNode getSecondNode() {
+        return _node_2;
+    }
+
+    @Override
+    public int hashCode() {
+        final int PRIME = 31;
+        int result = 1;
+        final int node_1_hc = _node_1.hashCode();
+        final int node_2_hc = _node_2.hashCode();
+        int hc_1 = 0;
+        int hc_2 = 0;
+        if ( !_is_directed ) {
+            if ( node_1_hc > node_2_hc ) {
+                hc_1 = node_2_hc;
+                hc_2 = node_1_hc;
+            }
+            else {
+                hc_1 = node_1_hc;
+                hc_2 = node_2_hc;
+            }
+        }
+        else {
+            if ( _towards_1 ) {
+                hc_1 = node_2_hc;
+                hc_2 = node_1_hc;
+            }
+            else {
+                hc_1 = node_1_hc;
+                hc_2 = node_2_hc;
+            }
+        }
+        result = PRIME * result + ( ( _data == null ) ? 0 : _data.hashCode() );
+        result = PRIME * result + ( _is_directed ? 1231 : 1237 );
+        result = PRIME * result + hc_1;
+        result = PRIME * result + hc_2;
+        return result;
+    }
+
+    public boolean isDirected() {
+        return _is_directed;
+    }
+
+    public boolean isDirectionTowards( final PhylogenyNode node ) throws RuntimeException {
+        if ( !isDirected() ) {
+            throw new RuntimeException( "Attempt to get direction of undirected branch" );
+        }
+        return ( ( node == _node_1 ) && _towards_1 );
+    }
+
+    public void setDirectionTowards( final PhylogenyNode node ) {
+        _towards_1 = node == _node_1;
+    }
+
+    @Override
+    public String toString() {
+        if ( isDirected() ) {
+            if ( isDirectionTowards( getFirstNode() ) ) {
+                return ( getSecondNode().getName() + " -> " + getFirstNode().getName() );
+            }
+            else {
+                return ( getFirstNode().getName() + " -> " + getSecondNode().getName() );
+            }
+        }
+        else {
+            return ( getFirstNode().getName() + " -- " + getSecondNode().getName() );
+        }
+    }
+}
diff --git a/forester/java/src/org/forester/phylogeny/PhylogenyMethods.java b/forester/java/src/org/forester/phylogeny/PhylogenyMethods.java

new file mode 100644 (file)

index 0000000..6569c81
--- /dev/null
+++ b/forester/java/src/org/forester/phylogeny/PhylogenyMethods.java
@@ -0,0 +1,1186 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.phylogeny;
+
+import java.awt.Color;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Set;
+import java.util.SortedMap;
+import java.util.TreeMap;
+
+import org.forester.phylogeny.data.BranchColor;
+import org.forester.phylogeny.data.BranchWidth;
+import org.forester.phylogeny.data.Confidence;
+import org.forester.phylogeny.data.DomainArchitecture;
+import org.forester.phylogeny.data.Taxonomy;
+import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
+import org.forester.util.FailedConditionCheckException;
+import org.forester.util.ForesterUtil;
+
+public class PhylogenyMethods {
+
+    private static PhylogenyMethods _instance      = null;
+    private final Set<Integer>      _temp_hash_set = new HashSet<Integer>();
+    private PhylogenyNode           _farthest_1    = null;
+    private PhylogenyNode           _farthest_2    = null;
+
+    private PhylogenyMethods() {
+        // Hidden constructor.
+    }
+
+    /**
+     * Calculates the distance between PhylogenyNodes node1 and node2.
+     * 
+     * 
+     * @param node1
+     * @param node2
+     * @return distance between node1 and node2
+     */
+    public double calculateDistance( final PhylogenyNode node1, final PhylogenyNode node2 ) {
+        final PhylogenyNode lca = obtainLCA( node1, node2 );
+        final PhylogenyNode n1 = node1;
+        final PhylogenyNode n2 = node2;
+        return ( PhylogenyMethods.getDistance( n1, lca ) + PhylogenyMethods.getDistance( n2, lca ) );
+    }
+
+    public double calculateFurthestDistance( final Phylogeny phylogeny ) {
+        if ( phylogeny.getNumberOfExternalNodes() < 2 ) {
+            return 0.0;
+        }
+        _farthest_1 = null;
+        _farthest_2 = null;
+        PhylogenyNode node_1 = null;
+        PhylogenyNode node_2 = null;
+        double farthest_d = -Double.MAX_VALUE;
+        final PhylogenyMethods methods = PhylogenyMethods.getInstance();
+        final List<PhylogenyNode> ext_nodes = phylogeny.getRoot().getAllExternalDescendants();
+        for( int i = 1; i < ext_nodes.size(); ++i ) {
+            for( int j = 0; j < i; ++j ) {
+                final double d = methods.calculateDistance( ext_nodes.get( i ), ext_nodes.get( j ) );
+                if ( d < 0.0 ) {
+                    throw new RuntimeException( "distance cannot be negative" );
+                }
+                if ( d > farthest_d ) {
+                    farthest_d = d;
+                    node_1 = ext_nodes.get( i );
+                    node_2 = ext_nodes.get( j );
+                }
+            }
+        }
+        _farthest_1 = node_1;
+        _farthest_2 = node_2;
+        return farthest_d;
+    }
+
+    @Override
+    public Object clone() throws CloneNotSupportedException {
+        throw new CloneNotSupportedException();
+    }
+
+    public PhylogenyNode getFarthestNode1() {
+        return _farthest_1;
+    }
+
+    public PhylogenyNode getFarthestNode2() {
+        return _farthest_2;
+    }
+
+    /**
+     * Returns the LCA of PhylogenyNodes node1 and node2.
+     * 
+     * 
+     * @param node1
+     * @param node2
+     * @return LCA of node1 and node2
+     */
+    public PhylogenyNode obtainLCA( final PhylogenyNode node1, final PhylogenyNode node2 ) {
+        _temp_hash_set.clear();
+        PhylogenyNode n1 = node1;
+        PhylogenyNode n2 = node2;
+        _temp_hash_set.add( n1.getId() );
+        while ( !n1.isRoot() ) {
+            n1 = n1.getParent();
+            _temp_hash_set.add( n1.getId() );
+        }
+        while ( !_temp_hash_set.contains( n2.getId() ) && !n2.isRoot() ) {
+            n2 = n2.getParent();
+        }
+        if ( !_temp_hash_set.contains( n2.getId() ) ) {
+            throw new IllegalArgumentException( "attempt to get LCA of two nodes which do not share a common root" );
+        }
+        return n2;
+    }
+
+    /**
+     * Returns all orthologs of the external PhylogenyNode n of this Phylogeny.
+     * Orthologs are returned as List of node references.
+     * <p>
+     * PRECONDITION: This tree must be binary and rooted, and speciation -
+     * duplication need to be assigned for each of its internal Nodes.
+     * <p>
+     * Returns null if this Phylogeny is empty or if n is internal.
+     * @param n
+     *            external PhylogenyNode whose orthologs are to be returned
+     * @return Vector of references to all orthologous Nodes of PhylogenyNode n
+     *         of this Phylogeny, null if this Phylogeny is empty or if n is
+     *         internal
+     */
+    public List<PhylogenyNode> getOrthologousNodes( final Phylogeny phy, final PhylogenyNode node ) {
+        final List<PhylogenyNode> nodes = new ArrayList<PhylogenyNode>();
+        final PhylogenyNodeIterator it = phy.iteratorExternalForward();
+        while ( it.hasNext() ) {
+            final PhylogenyNode temp_node = it.next();
+            if ( ( temp_node != node ) && isAreOrthologous( node, temp_node ) ) {
+                nodes.add( temp_node );
+            }
+        }
+        return nodes;
+    }
+
+    public boolean isAreOrthologous( final PhylogenyNode node1, final PhylogenyNode node2 ) {
+        return !obtainLCA( node1, node2 ).isDuplication();
+    }
+
+    static double addPhylogenyDistances( final double a, final double b ) {
+        if ( ( a >= 0.0 ) && ( b >= 0.0 ) ) {
+            return a + b;
+        }
+        else if ( a >= 0.0 ) {
+            return a;
+        }
+        else if ( b >= 0.0 ) {
+            return b;
+        }
+        return PhylogenyNode.DISTANCE_DEFAULT;
+    }
+
+    // Helper for getUltraParalogousNodes( PhylogenyNode ).
+    public static boolean areAllChildrenDuplications( final PhylogenyNode n ) {
+        if ( n.isExternal() ) {
+            return false;
+        }
+        else {
+            if ( n.isDuplication() ) {
+                //FIXME test me!
+                for( final PhylogenyNode desc : n.getDescendants() ) {
+                    if ( !areAllChildrenDuplications( desc ) ) {
+                        return false;
+                    }
+                }
+                return true;
+            }
+            else {
+                return false;
+            }
+        }
+    }
+
+    public static int calculateDepth( final PhylogenyNode node ) {
+        PhylogenyNode n = node;
+        int steps = 0;
+        while ( !n.isRoot() ) {
+            steps++;
+            n = n.getParent();
+        }
+        return steps;
+    }
+
+    public static double calculateDistanceToRoot( final PhylogenyNode node ) {
+        PhylogenyNode n = node;
+        double d = 0.0;
+        while ( !n.isRoot() ) {
+            if ( n.getDistanceToParent() > 0.0 ) {
+                d += n.getDistanceToParent();
+            }
+            n = n.getParent();
+        }
+        return d;
+    }
+
+    public static short calculateMaxBranchesToLeaf( final PhylogenyNode node ) {
+        if ( node.isExternal() ) {
+            return 0;
+        }
+        short max = 0;
+        for( PhylogenyNode d : node.getAllExternalDescendants() ) {
+            short steps = 0;
+            while ( d != node ) {
+                if ( d.isCollapse() ) {
+                    steps = 0;
+                }
+                else {
+                    steps++;
+                }
+                d = d.getParent();
+            }
+            if ( max < steps ) {
+                max = steps;
+            }
+        }
+        return max;
+    }
+
+    public static int calculateMaxDepth( final Phylogeny phy ) {
+        int max = 0;
+        for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) {
+            final PhylogenyNode node = iter.next();
+            final int steps = calculateDepth( node );
+            if ( steps > max ) {
+                max = steps;
+            }
+        }
+        return max;
+    }
+
+    public static double calculateMaxDistanceToRoot( final Phylogeny phy ) {
+        double max = 0.0;
+        for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) {
+            final PhylogenyNode node = iter.next();
+            final double d = calculateDistanceToRoot( node );
+            if ( d > max ) {
+                max = d;
+            }
+        }
+        return max;
+    }
+
+    public static int calculateMaximumNumberOfDescendantsPerNode( final Phylogeny phy ) {
+        int max = 0;
+        for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
+            final PhylogenyNode node = iter.next();
+            if ( node.getNumberOfDescendants() > max ) {
+                max = node.getNumberOfDescendants();
+            }
+        }
+        return max;
+    }
+
+    /**
+     * Returns the set of distinct taxonomies of
+     * all external nodes of node.
+     * If at least one the external nodes has no taxonomy,
+     * null is returned.
+     * 
+     */
+    public static Set<Taxonomy> obtainDistinctTaxonomies( final PhylogenyNode node ) {
+        final List<PhylogenyNode> descs = node.getAllExternalDescendants();
+        final Set<Taxonomy> tax_set = new HashSet<Taxonomy>();
+        for( final PhylogenyNode n : descs ) {
+            if ( !n.getNodeData().isHasTaxonomy() || n.getNodeData().getTaxonomy().isEmpty() ) {
+                return null;
+            }
+            tax_set.add( n.getNodeData().getTaxonomy() );
+        }
+        return tax_set;
+    }
+
+    /**
+     * Returns a map of distinct taxonomies of
+     * all external nodes of node.
+     * If at least one of the external nodes has no taxonomy,
+     * null is returned.
+     * 
+     */
+    public static SortedMap<Taxonomy, Integer> obtainDistinctTaxonomyCounts( final PhylogenyNode node ) {
+        final List<PhylogenyNode> descs = node.getAllExternalDescendants();
+        final SortedMap<Taxonomy, Integer> tax_map = new TreeMap<Taxonomy, Integer>();
+        for( final PhylogenyNode n : descs ) {
+            if ( !n.getNodeData().isHasTaxonomy() || n.getNodeData().getTaxonomy().isEmpty() ) {
+                return null;
+            }
+            final Taxonomy t = n.getNodeData().getTaxonomy();
+            if ( tax_map.containsKey( t ) ) {
+                tax_map.put( t, tax_map.get( t ) + 1 );
+            }
+            else {
+                tax_map.put( t, 1 );
+            }
+        }
+        return tax_map;
+    }
+
+    public static int calculateNumberOfExternalNodesWithoutTaxonomy( final PhylogenyNode node ) {
+        final List<PhylogenyNode> descs = node.getAllExternalDescendants();
+        int x = 0;
+        for( final PhylogenyNode n : descs ) {
+            if ( !n.getNodeData().isHasTaxonomy() || n.getNodeData().getTaxonomy().isEmpty() ) {
+                x++;
+            }
+        }
+        return x;
+    }
+
+    /**
+     * Deep copies the phylogeny originating from this node.
+     */
+    static PhylogenyNode copySubTree( final PhylogenyNode source ) {
+        if ( source == null ) {
+            return null;
+        }
+        else {
+            final PhylogenyNode newnode = source.copyNodeData();
+            if ( !source.isExternal() ) {
+                for( int i = 0; i < source.getNumberOfDescendants(); ++i ) {
+                    newnode.setChildNode( i, PhylogenyMethods.copySubTree( source.getChildNode( i ) ) );
+                }
+            }
+            return newnode;
+        }
+    }
+
+    /**
+     * Shallow copies the phylogeny originating from this node.
+     */
+    static PhylogenyNode copySubTreeShallow( final PhylogenyNode source ) {
+        if ( source == null ) {
+            return null;
+        }
+        else {
+            final PhylogenyNode newnode = source.copyNodeDataShallow();
+            if ( !source.isExternal() ) {
+                for( int i = 0; i < source.getNumberOfDescendants(); ++i ) {
+                    newnode.setChildNode( i, PhylogenyMethods.copySubTreeShallow( source.getChildNode( i ) ) );
+                }
+            }
+            return newnode;
+        }
+    }
+
+    public static void deleteExternalNodesNegativeSelection( final Set<Integer> to_delete, final Phylogeny phy ) {
+        phy.hashIDs();
+        for( final Integer id : to_delete ) {
+            phy.deleteSubtree( phy.getNode( id ), true );
+        }
+        phy.hashIDs();
+    }
+
+    public static void deleteExternalNodesNegativeSelection( final String[] node_names_to_delete, final Phylogeny p )
+            throws IllegalArgumentException {
+        for( int i = 0; i < node_names_to_delete.length; ++i ) {
+            if ( ForesterUtil.isEmpty( node_names_to_delete[ i ] ) ) {
+                continue;
+            }
+            List<PhylogenyNode> nodes = null;
+            nodes = p.getNodes( node_names_to_delete[ i ] );
+            final Iterator<PhylogenyNode> it = nodes.iterator();
+            while ( it.hasNext() ) {
+                final PhylogenyNode n = it.next();
+                if ( !n.isExternal() ) {
+                    throw new IllegalArgumentException( "attempt to delete non-external node \""
+                            + node_names_to_delete[ i ] + "\"" );
+                }
+                p.deleteSubtree( n, true );
+            }
+        }
+    }
+
+    public static void deleteExternalNodesPositiveSelection( final Set<Taxonomy> species_to_keep, final Phylogeny phy ) {
+        //   final Set<Integer> to_delete = new HashSet<Integer>();
+        for( final PhylogenyNodeIterator it = phy.iteratorExternalForward(); it.hasNext(); ) {
+            final PhylogenyNode n = it.next();
+            if ( n.getNodeData().isHasTaxonomy() ) {
+                if ( !species_to_keep.contains( n.getNodeData().getTaxonomy() ) ) {
+                    //to_delete.add( n.getNodeId() );
+                    phy.deleteSubtree( n, true );
+                }
+            }
+            else {
+                throw new IllegalArgumentException( "node " + n.getId() + " has no taxonomic data" );
+            }
+        }
+        phy.hashIDs();
+        phy.externalNodesHaveChanged();
+        //  deleteExternalNodesNegativeSelection( to_delete, phy );
+    }
+
+    public static List<String> deleteExternalNodesPositiveSelection( final String[] node_names_to_keep,
+                                                                     final Phylogeny p ) {
+        final PhylogenyNodeIterator it = p.iteratorExternalForward();
+        final String[] to_delete = new String[ p.getNumberOfExternalNodes() ];
+        int i = 0;
+        Arrays.sort( node_names_to_keep );
+        while ( it.hasNext() ) {
+            final String curent_name = it.next().getName();
+            if ( Arrays.binarySearch( node_names_to_keep, curent_name ) < 0 ) {
+                to_delete[ i++ ] = curent_name;
+            }
+        }
+        PhylogenyMethods.deleteExternalNodesNegativeSelection( to_delete, p );
+        final List<String> deleted = new ArrayList<String>();
+        for( final String n : to_delete ) {
+            if ( !ForesterUtil.isEmpty( n ) ) {
+                deleted.add( n );
+            }
+        }
+        return deleted;
+    }
+
+    public static List<PhylogenyNode> getAllDescendants( final PhylogenyNode node ) {
+        final List<PhylogenyNode> descs = new ArrayList<PhylogenyNode>();
+        final Set<Integer> encountered = new HashSet<Integer>();
+        if ( !node.isExternal() ) {
+            final List<PhylogenyNode> exts = node.getAllExternalDescendants();
+            for( PhylogenyNode current : exts ) {
+                descs.add( current );
+                while ( current != node ) {
+                    current = current.getParent();
+                    if ( encountered.contains( current.getId() ) ) {
+                        continue;
+                    }
+                    descs.add( current );
+                    encountered.add( current.getId() );
+                }
+            }
+        }
+        return descs;
+    }
+
+    /**
+     * 
+     * Convenience method
+     * 
+     * @param node
+     * @return
+     */
+    public static Color getBranchColorValue( final PhylogenyNode node ) {
+        if ( node.getBranchData().getBranchColor() == null ) {
+            return null;
+        }
+        return node.getBranchData().getBranchColor().getValue();
+    }
+
+    /**
+     * Convenience method
+     */
+    public static double getBranchWidthValue( final PhylogenyNode node ) {
+        if ( !node.getBranchData().isHasBranchWidth() ) {
+            return BranchWidth.BRANCH_WIDTH_DEFAULT_VALUE;
+        }
+        return node.getBranchData().getBranchWidth().getValue();
+    }
+
+    /**
+     * Convenience method
+     */
+    public static double getConfidenceValue( final PhylogenyNode node ) {
+        if ( !node.getBranchData().isHasConfidences() ) {
+            return Confidence.CONFIDENCE_DEFAULT_VALUE;
+        }
+        return node.getBranchData().getConfidence( 0 ).getValue();
+    }
+
+    /**
+     * Convenience method
+     */
+    public static double[] getConfidenceValuesAsArray( final PhylogenyNode node ) {
+        if ( !node.getBranchData().isHasConfidences() ) {
+            return new double[ 0 ];
+        }
+        final double[] values = new double[ node.getBranchData().getConfidences().size() ];
+        int i = 0;
+        for( final Confidence c : node.getBranchData().getConfidences() ) {
+            values[ i++ ] = c.getValue();
+        }
+        return values;
+    }
+
+    /**
+     * Calculates the distance between PhylogenyNodes n1 and n2.
+     * PRECONDITION: n1 is a descendant of n2.
+     * 
+     * @param n1
+     *            a descendant of n2
+     * @param n2
+     * @return distance between n1 and n2
+     */
+    private static double getDistance( PhylogenyNode n1, final PhylogenyNode n2 ) {
+        double d = 0.0;
+        while ( n1 != n2 ) {
+            if ( n1.getDistanceToParent() > 0.0 ) {
+                d += n1.getDistanceToParent();
+            }
+            n1 = n1.getParent();
+        }
+        return d;
+    }
+
+    /**
+     * Returns taxonomy t if all external descendants have 
+     * the same taxonomy t, null otherwise.
+     * 
+     */
+    public static Taxonomy getExternalDescendantsTaxonomy( final PhylogenyNode node ) {
+        final List<PhylogenyNode> descs = node.getAllExternalDescendants();
+        Taxonomy tax = null;
+        for( final PhylogenyNode n : descs ) {
+            if ( !n.getNodeData().isHasTaxonomy() || n.getNodeData().getTaxonomy().isEmpty() ) {
+                return null;
+            }
+            else if ( tax == null ) {
+                tax = n.getNodeData().getTaxonomy();
+            }
+            else if ( n.getNodeData().getTaxonomy().isEmpty() || !tax.isEqual( n.getNodeData().getTaxonomy() ) ) {
+                return null;
+            }
+        }
+        return tax;
+    }
+
+    public static PhylogenyNode getFurthestDescendant( final PhylogenyNode node ) {
+        final List<PhylogenyNode> children = node.getAllExternalDescendants();
+        PhylogenyNode farthest = null;
+        double longest = -Double.MAX_VALUE;
+        for( final PhylogenyNode child : children ) {
+            if ( PhylogenyMethods.getDistance( child, node ) > longest ) {
+                farthest = child;
+                longest = PhylogenyMethods.getDistance( child, node );
+            }
+        }
+        return farthest;
+    }
+
+    public static PhylogenyMethods getInstance() {
+        if ( PhylogenyMethods._instance == null ) {
+            PhylogenyMethods._instance = new PhylogenyMethods();
+        }
+        return PhylogenyMethods._instance;
+    }
+
+    /**
+     * Returns the largest confidence value found on phy.
+     */
+    static public double getMaximumConfidenceValue( final Phylogeny phy ) {
+        double max = -Double.MAX_VALUE;
+        for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
+            final double s = PhylogenyMethods.getConfidenceValue( iter.next() );
+            if ( ( s != Confidence.CONFIDENCE_DEFAULT_VALUE ) && ( s > max ) ) {
+                max = s;
+            }
+        }
+        return max;
+    }
+
+    static public int getMinimumDescendentsPerInternalNodes( final Phylogeny phy ) {
+        int min = Integer.MAX_VALUE;
+        int d = 0;
+        PhylogenyNode n;
+        for( final PhylogenyNodeIterator it = phy.iteratorPreorder(); it.hasNext(); ) {
+            n = it.next();
+            if ( n.isInternal() ) {
+                d = n.getNumberOfDescendants();
+                if ( d < min ) {
+                    min = d;
+                }
+            }
+        }
+        return min;
+    }
+
+    /**
+     * Convenience method for display purposes.
+     * Not intended for algorithms.
+     */
+    public static String getSpecies( final PhylogenyNode node ) {
+        if ( !node.getNodeData().isHasTaxonomy() ) {
+            return "";
+        }
+        if ( !ForesterUtil.isEmpty( node.getNodeData().getTaxonomy().getTaxonomyCode() ) ) {
+            return node.getNodeData().getTaxonomy().getTaxonomyCode();
+        }
+        else if ( !ForesterUtil.isEmpty( node.getNodeData().getTaxonomy().getScientificName() ) ) {
+            return node.getNodeData().getTaxonomy().getScientificName();
+        }
+        else {
+            return node.getNodeData().getTaxonomy().getCommonName();
+        }
+    }
+
+    /**
+     * Returns all Nodes which are connected to external PhylogenyNode n of this
+     * Phylogeny by a path containing only speciation events. We call these
+     * "super orthologs". Nodes are returned as Vector of references to Nodes.
+     * <p>
+     * PRECONDITION: This tree must be binary and rooted, and speciation -
+     * duplication need to be assigned for each of its internal Nodes.
+     * <p>
+     * Returns null if this Phylogeny is empty or if n is internal.
+     * @param n
+     *            external PhylogenyNode whose strictly speciation related Nodes
+     *            are to be returned
+     * @return Vector of references to all strictly speciation related Nodes of
+     *         PhylogenyNode n of this Phylogeny, null if this Phylogeny is
+     *         empty or if n is internal
+     */
+    public static List<PhylogenyNode> getSuperOrthologousNodes( final PhylogenyNode n ) {
+        // FIXME
+        PhylogenyNode node = n, deepest = null;
+        final List<PhylogenyNode> v = new ArrayList<PhylogenyNode>();
+        if ( !node.isExternal() ) {
+            return null;
+        }
+        while ( !node.isRoot() && !node.getParent().isDuplication() ) {
+            node = node.getParent();
+        }
+        deepest = node;
+        deepest.setIndicatorsToZero();
+        do {
+            if ( !node.isExternal() ) {
+                if ( node.getIndicator() == 0 ) {
+                    node.setIndicator( ( byte ) 1 );
+                    if ( !node.isDuplication() ) {
+                        node = node.getChildNode1();
+                    }
+                }
+                if ( node.getIndicator() == 1 ) {
+                    node.setIndicator( ( byte ) 2 );
+                    if ( !node.isDuplication() ) {
+                        node = node.getChildNode2();
+                    }
+                }
+                if ( ( node != deepest ) && ( node.getIndicator() == 2 ) ) {
+                    node = node.getParent();
+                }
+            }
+            else {
+                if ( node != n ) {
+                    v.add( node );
+                }
+                if ( node != deepest ) {
+                    node = node.getParent();
+                }
+                else {
+                    node.setIndicator( ( byte ) 2 );
+                }
+            }
+        } while ( ( node != deepest ) || ( deepest.getIndicator() != 2 ) );
+        return v;
+    }
+
+    /**
+     * Convenience method for display purposes.
+     * Not intended for algorithms.
+     */
+    public static String getTaxonomyIdentifier( final PhylogenyNode node ) {
+        if ( !node.getNodeData().isHasTaxonomy() || ( node.getNodeData().getTaxonomy().getIdentifier() == null ) ) {
+            return "";
+        }
+        return node.getNodeData().getTaxonomy().getIdentifier().getValue();
+    }
+
+    /**
+     * Returns all Nodes which are connected to external PhylogenyNode n of this
+     * Phylogeny by a path containing, and leading to, only duplication events.
+     * We call these "ultra paralogs". Nodes are returned as Vector of
+     * references to Nodes.
+     * <p>
+     * PRECONDITION: This tree must be binary and rooted, and speciation -
+     * duplication need to be assigned for each of its internal Nodes.
+     * <p>
+     * Returns null if this Phylogeny is empty or if n is internal.
+     * <p>
+     * (Last modified: 10/06/01)
+     * 
+     * @param n
+     *            external PhylogenyNode whose ultra paralogs are to be returned
+     * @return Vector of references to all ultra paralogs of PhylogenyNode n of
+     *         this Phylogeny, null if this Phylogeny is empty or if n is
+     *         internal
+     */
+    public static List<PhylogenyNode> getUltraParalogousNodes( final PhylogenyNode n ) {
+        // FIXME test me
+        PhylogenyNode node = n;
+        if ( !node.isExternal() ) {
+            return null;
+        }
+        while ( !node.isRoot() && node.getParent().isDuplication() && areAllChildrenDuplications( node.getParent() ) ) {
+            node = node.getParent();
+        }
+        final List<PhylogenyNode> nodes = node.getAllExternalDescendants();
+        nodes.remove( n );
+        return nodes;
+    }
+
+    public static String inferCommonPartOfScientificNameOfDescendants( final PhylogenyNode node ) {
+        final List<PhylogenyNode> descs = node.getDescendants();
+        String sn = null;
+        for( final PhylogenyNode n : descs ) {
+            if ( !n.getNodeData().isHasTaxonomy()
+                    || ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getScientificName() ) ) {
+                return null;
+            }
+            else if ( sn == null ) {
+                sn = n.getNodeData().getTaxonomy().getScientificName().trim();
+            }
+            else {
+                String sn_current = n.getNodeData().getTaxonomy().getScientificName().trim();
+                if ( !sn.equals( sn_current ) ) {
+                    boolean overlap = false;
+                    while ( ( sn.indexOf( ' ' ) >= 0 ) || ( sn_current.indexOf( ' ' ) >= 0 ) ) {
+                        if ( ForesterUtil.countChars( sn, ' ' ) > ForesterUtil.countChars( sn_current, ' ' ) ) {
+                            sn = sn.substring( 0, sn.lastIndexOf( ' ' ) ).trim();
+                        }
+                        else {
+                            sn_current = sn_current.substring( 0, sn_current.lastIndexOf( ' ' ) ).trim();
+                        }
+                        if ( sn.equals( sn_current ) ) {
+                            overlap = true;
+                            break;
+                        }
+                    }
+                    if ( !overlap ) {
+                        return null;
+                    }
+                }
+            }
+        }
+        return sn;
+    }
+
+    public static boolean isHasExternalDescendant( final PhylogenyNode node ) {
+        for( int i = 0; i < node.getNumberOfDescendants(); ++i ) {
+            if ( node.getChildNode( i ).isExternal() ) {
+                return true;
+            }
+        }
+        return false;
+    }
+
+    /*
+     * This is case insensitive.
+     * 
+     */
+    public synchronized static boolean isTaxonomyHasIdentifierOfGivenProvider( final Taxonomy tax,
+                                                                               final String[] providers ) {
+        if ( ( tax.getIdentifier() != null ) && !ForesterUtil.isEmpty( tax.getIdentifier().getProvider() ) ) {
+            final String my_tax_prov = tax.getIdentifier().getProvider();
+            for( final String provider : providers ) {
+                if ( provider.equalsIgnoreCase( my_tax_prov ) ) {
+                    return true;
+                }
+            }
+            return false;
+        }
+        else {
+            return false;
+        }
+    }
+
+    private static boolean match( final String s,
+                                  final String query,
+                                  final boolean case_sensitive,
+                                  final boolean partial ) {
+        if ( ForesterUtil.isEmpty( s ) || ForesterUtil.isEmpty( query ) ) {
+            return false;
+        }
+        String my_s = s.trim();
+        String my_query = query.trim();
+        if ( !case_sensitive ) {
+            my_s = my_s.toLowerCase();
+            my_query = my_query.toLowerCase();
+        }
+        if ( partial ) {
+            return my_s.indexOf( my_query ) >= 0;
+        }
+        else {
+            return my_s.equals( my_query );
+        }
+    }
+
+    public static void midpointRoot( final Phylogeny phylogeny ) {
+        if ( phylogeny.getNumberOfExternalNodes() < 2 ) {
+            return;
+        }
+        final PhylogenyMethods methods = getInstance();
+        final double farthest_d = methods.calculateFurthestDistance( phylogeny );
+        final PhylogenyNode f1 = methods.getFarthestNode1();
+        final PhylogenyNode f2 = methods.getFarthestNode2();
+        if ( farthest_d <= 0.0 ) {
+            return;
+        }
+        double x = farthest_d / 2.0;
+        PhylogenyNode n = f1;
+        if ( PhylogenyMethods.getDistance( f1, phylogeny.getRoot() ) < PhylogenyMethods.getDistance( f2, phylogeny
+                .getRoot() ) ) {
+            n = f2;
+        }
+        while ( ( x > n.getDistanceToParent() ) && !n.isRoot() ) {
+            x -= ( n.getDistanceToParent() > 0 ? n.getDistanceToParent() : 0 );
+            n = n.getParent();
+        }
+        phylogeny.reRoot( n, x );
+        phylogeny.recalculateNumberOfExternalDescendants( true );
+        final PhylogenyNode a = getFurthestDescendant( phylogeny.getRoot().getChildNode1() );
+        final PhylogenyNode b = getFurthestDescendant( phylogeny.getRoot().getChildNode2() );
+        final double da = getDistance( a, phylogeny.getRoot() );
+        final double db = getDistance( b, phylogeny.getRoot() );
+        if ( Math.abs( da - db ) > 0.000001 ) {
+            throw new FailedConditionCheckException( "this should not have happened: midpoint rooting failed:  da="
+                    + da + ",  db=" + db + ",  diff=" + Math.abs( da - db ) );
+        }
+    }
+
+    public static void normalizeBootstrapValues( final Phylogeny phylogeny,
+                                                 final double max_bootstrap_value,
+                                                 final double max_normalized_value ) {
+        for( final PhylogenyNodeIterator iter = phylogeny.iteratorPreorder(); iter.hasNext(); ) {
+            final PhylogenyNode node = iter.next();
+            if ( node.isInternal() ) {
+                final double confidence = getConfidenceValue( node );
+                if ( confidence != Confidence.CONFIDENCE_DEFAULT_VALUE ) {
+                    if ( confidence >= max_bootstrap_value ) {
+                        setBootstrapConfidence( node, max_normalized_value );
+                    }
+                    else {
+                        setBootstrapConfidence( node, ( confidence * max_normalized_value ) / max_bootstrap_value );
+                    }
+                }
+            }
+        }
+    }
+
+    public static List<PhylogenyNode> obtainAllNodesAsList( final Phylogeny phy ) {
+        final List<PhylogenyNode> nodes = new ArrayList<PhylogenyNode>();
+        if ( phy.isEmpty() ) {
+            return nodes;
+        }
+        for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
+            nodes.add( iter.next() );
+        }
+        return nodes;
+    }
+
+    public static void postorderBranchColorAveragingExternalNodeBased( final Phylogeny p ) {
+        for( final PhylogenyNodeIterator iter = p.iteratorPostorder(); iter.hasNext(); ) {
+            final PhylogenyNode node = iter.next();
+            double red = 0.0;
+            double green = 0.0;
+            double blue = 0.0;
+            int n = 0;
+            if ( node.isInternal() ) {
+                for( final PhylogenyNodeIterator iterator = node.iterateChildNodesForward(); iterator.hasNext(); ) {
+                    final PhylogenyNode child_node = iterator.next();
+                    final Color child_color = getBranchColorValue( child_node );
+                    if ( child_color != null ) {
+                        ++n;
+                        red += child_color.getRed();
+                        green += child_color.getGreen();
+                        blue += child_color.getBlue();
+                    }
+                }
+                setBranchColorValue( node, new Color( ForesterUtil.roundToInt( red / n ), ForesterUtil
+                        .roundToInt( green / n ), ForesterUtil.roundToInt( blue / n ) ) );
+            }
+        }
+    }
+
+    public static void removeNode( final PhylogenyNode remove_me, final Phylogeny phylogeny ) {
+        if ( remove_me.isRoot() ) {
+            throw new IllegalArgumentException( "ill advised attempt to remove root node" );
+        }
+        if ( remove_me.isExternal() ) {
+            phylogeny.deleteSubtree( remove_me, false );
+        }
+        else {
+            final PhylogenyNode parent = remove_me.getParent();
+            final List<PhylogenyNode> descs = remove_me.getDescendants();
+            parent.removeChildNode( remove_me );
+            for( final PhylogenyNode desc : descs ) {
+                parent.addAsChild( desc );
+                desc.setDistanceToParent( addPhylogenyDistances( remove_me.getDistanceToParent(), desc
+                        .getDistanceToParent() ) );
+            }
+            remove_me.setParent( null );
+            phylogeny.setIdHash( null );
+            phylogeny.externalNodesHaveChanged();
+        }
+    }
+
+    public static List<PhylogenyNode> searchData( final String query,
+                                                  final Phylogeny phy,
+                                                  final boolean case_sensitive,
+                                                  final boolean partial ) {
+        final List<PhylogenyNode> nodes = new ArrayList<PhylogenyNode>();
+        if ( phy.isEmpty() || ( query == null ) ) {
+            return nodes;
+        }
+        if ( ForesterUtil.isEmpty( query ) ) {
+            return nodes;
+        }
+        for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
+            final PhylogenyNode node = iter.next();
+            boolean match = false;
+            if ( match( node.getName(), query, case_sensitive, partial ) ) {
+                match = true;
+            }
+            else if ( node.getNodeData().isHasTaxonomy()
+                    && match( node.getNodeData().getTaxonomy().getTaxonomyCode(), query, case_sensitive, partial ) ) {
+                match = true;
+            }
+            else if ( node.getNodeData().isHasTaxonomy()
+                    && match( node.getNodeData().getTaxonomy().getCommonName(), query, case_sensitive, partial ) ) {
+                match = true;
+            }
+            else if ( node.getNodeData().isHasTaxonomy()
+                    && match( node.getNodeData().getTaxonomy().getScientificName(), query, case_sensitive, partial ) ) {
+                match = true;
+            }
+            else if ( node.getNodeData().isHasTaxonomy()
+                    && ( node.getNodeData().getTaxonomy().getIdentifier() != null )
+                    && match( node.getNodeData().getTaxonomy().getIdentifier().getValue(),
+                              query,
+                              case_sensitive,
+                              partial ) ) {
+                match = true;
+            }
+            else if ( node.getNodeData().isHasTaxonomy() && !node.getNodeData().getTaxonomy().getSynonyms().isEmpty() ) {
+                final List<String> syns = node.getNodeData().getTaxonomy().getSynonyms();
+                I: for( final String syn : syns ) {
+                    if ( match( syn, query, case_sensitive, partial ) ) {
+                        match = true;
+                        break I;
+                    }
+                }
+            }
+            else if ( node.getNodeData().isHasSequence()
+                    && match( node.getNodeData().getSequence().getName(), query, case_sensitive, partial ) ) {
+                match = true;
+            }
+            else if ( node.getNodeData().isHasSequence()
+                    && match( node.getNodeData().getSequence().getSymbol(), query, case_sensitive, partial ) ) {
+                match = true;
+            }
+            else if ( node.getNodeData().isHasSequence()
+                    && ( node.getNodeData().getSequence().getAccession() != null )
+                    && match( node.getNodeData().getSequence().getAccession().getValue(),
+                              query,
+                              case_sensitive,
+                              partial ) ) {
+                match = true;
+            }
+            else if ( node.getNodeData().isHasSequence()
+                    && ( node.getNodeData().getSequence().getDomainArchitecture() != null ) ) {
+                final DomainArchitecture da = node.getNodeData().getSequence().getDomainArchitecture();
+                I: for( int i = 0; i < da.getNumberOfDomains(); ++i ) {
+                    if ( match( da.getDomain( i ).getName(), query, case_sensitive, partial ) ) {
+                        match = true;
+                        break I;
+                    }
+                }
+            }
+            if ( match ) {
+                nodes.add( node );
+            }
+        }
+        return nodes;
+    }
+
+    public static List<PhylogenyNode> searchDataLogicalAnd( final String[] queries,
+                                                            final Phylogeny phy,
+                                                            final boolean case_sensitive,
+                                                            final boolean partial ) {
+        final List<PhylogenyNode> nodes = new ArrayList<PhylogenyNode>();
+        if ( phy.isEmpty() || ( queries == null ) || ( queries.length < 1 ) ) {
+            return nodes;
+        }
+        for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
+            final PhylogenyNode node = iter.next();
+            boolean all_matched = true;
+            for( final String query : queries ) {
+                boolean match = false;
+                if ( ForesterUtil.isEmpty( query ) ) {
+                    continue;
+                }
+                if ( match( node.getName(), query, case_sensitive, partial ) ) {
+                    match = true;
+                }
+                else if ( node.getNodeData().isHasTaxonomy()
+                        && match( node.getNodeData().getTaxonomy().getTaxonomyCode(), query, case_sensitive, partial ) ) {
+                    match = true;
+                }
+                else if ( node.getNodeData().isHasTaxonomy()
+                        && match( node.getNodeData().getTaxonomy().getCommonName(), query, case_sensitive, partial ) ) {
+                    match = true;
+                }
+                else if ( node.getNodeData().isHasTaxonomy()
+                        && match( node.getNodeData().getTaxonomy().getScientificName(), query, case_sensitive, partial ) ) {
+                    match = true;
+                }
+                else if ( node.getNodeData().isHasTaxonomy()
+                        && ( node.getNodeData().getTaxonomy().getIdentifier() != null )
+                        && match( node.getNodeData().getTaxonomy().getIdentifier().getValue(),
+                                  query,
+                                  case_sensitive,
+                                  partial ) ) {
+                    match = true;
+                }
+                else if ( node.getNodeData().isHasTaxonomy()
+                        && !node.getNodeData().getTaxonomy().getSynonyms().isEmpty() ) {
+                    final List<String> syns = node.getNodeData().getTaxonomy().getSynonyms();
+                    I: for( final String syn : syns ) {
+                        if ( match( syn, query, case_sensitive, partial ) ) {
+                            match = true;
+                            break I;
+                        }
+                    }
+                }
+                else if ( node.getNodeData().isHasSequence()
+                        && match( node.getNodeData().getSequence().getName(), query, case_sensitive, partial ) ) {
+                    match = true;
+                }
+                else if ( node.getNodeData().isHasSequence()
+                        && match( node.getNodeData().getSequence().getSymbol(), query, case_sensitive, partial ) ) {
+                    match = true;
+                }
+                else if ( node.getNodeData().isHasSequence()
+                        && ( node.getNodeData().getSequence().getAccession() != null )
+                        && match( node.getNodeData().getSequence().getAccession().getValue(),
+                                  query,
+                                  case_sensitive,
+                                  partial ) ) {
+                    match = true;
+                }
+                else if ( node.getNodeData().isHasSequence()
+                        && ( node.getNodeData().getSequence().getDomainArchitecture() != null ) ) {
+                    final DomainArchitecture da = node.getNodeData().getSequence().getDomainArchitecture();
+                    I: for( int i = 0; i < da.getNumberOfDomains(); ++i ) {
+                        if ( match( da.getDomain( i ).getName(), query, case_sensitive, partial ) ) {
+                            match = true;
+                            break I;
+                        }
+                    }
+                }
+                if ( !match ) {
+                    all_matched = false;
+                    break;
+                }
+            }
+            if ( all_matched ) {
+                nodes.add( node );
+            }
+        }
+        return nodes;
+    }
+
+    /**
+     * Convenience method.
+     * Sets value for the first confidence value (created if not present, values overwritten otherwise). 
+     */
+    public static void setBootstrapConfidence( final PhylogenyNode node, final double bootstrap_confidence_value ) {
+        setConfidence( node, bootstrap_confidence_value, "bootstrap" );
+    }
+
+    public static void setBranchColorValue( final PhylogenyNode node, final Color color ) {
+        if ( node.getBranchData().getBranchColor() == null ) {
+            node.getBranchData().setBranchColor( new BranchColor() );
+        }
+        node.getBranchData().getBranchColor().setValue( color );
+    }
+
+    /**
+     * Convenience method
+     */
+    public static void setBranchWidthValue( final PhylogenyNode node, final double branch_width_value ) {
+        node.getBranchData().setBranchWidth( new BranchWidth( branch_width_value ) );
+    }
+
+    /**
+     * Convenience method.
+     * Sets value for the first confidence value (created if not present, values overwritten otherwise). 
+     */
+    public static void setConfidence( final PhylogenyNode node, final double confidence_value ) {
+        setConfidence( node, confidence_value, "" );
+    }
+
+    /**
+     * Convenience method.
+     * Sets value for the first confidence value (created if not present, values overwritten otherwise). 
+     */
+    public static void setConfidence( final PhylogenyNode node, final double confidence_value, final String type ) {
+        Confidence c = null;
+        if ( node.getBranchData().getNumberOfConfidences() > 0 ) {
+            c = node.getBranchData().getConfidence( 0 );
+        }
+        else {
+            c = new Confidence();
+            node.getBranchData().addConfidence( c );
+        }
+        c.setType( type );
+        c.setValue( confidence_value );
+    }
+
+    public static void setScientificName( final PhylogenyNode node, final String scientific_name ) {
+        if ( !node.getNodeData().isHasTaxonomy() ) {
+            node.getNodeData().setTaxonomy( new Taxonomy() );
+        }
+        node.getNodeData().getTaxonomy().setScientificName( scientific_name );
+    }
+
+    /**
+     * Convenience method to set the taxonomy code of a phylogeny node.
+     * 
+     * 
+     * @param node
+     * @param taxonomy_code
+     */
+    public static void setTaxonomyCode( final PhylogenyNode node, final String taxonomy_code ) {
+        if ( !node.getNodeData().isHasTaxonomy() ) {
+            node.getNodeData().setTaxonomy( new Taxonomy() );
+        }
+        node.getNodeData().getTaxonomy().setTaxonomyCode( taxonomy_code );
+    }
+
+    /**
+     * Removes from Phylogeny to_be_stripped all external Nodes which are
+     * associated with a species NOT found in Phylogeny reference.
+     * 
+     * @param reference
+     *            a reference Phylogeny
+     * @param to_be_stripped
+     *            Phylogeny to be stripped
+     * @return number of external nodes removed from to_be_stripped
+     */
+    public static int taxonomyBasedDeletionOfExternalNodes( final Phylogeny reference, final Phylogeny to_be_stripped ) {
+        final Set<String> ref_ext_taxo = new HashSet<String>();
+        final ArrayList<PhylogenyNode> nodes_to_delete = new ArrayList<PhylogenyNode>();
+        for( final PhylogenyNodeIterator it = reference.iteratorExternalForward(); it.hasNext(); ) {
+            ref_ext_taxo.add( getSpecies( it.next() ) );
+        }
+        for( final PhylogenyNodeIterator it = to_be_stripped.iteratorExternalForward(); it.hasNext(); ) {
+            final PhylogenyNode n = it.next();
+            if ( !ref_ext_taxo.contains( getSpecies( n ) ) ) {
+                nodes_to_delete.add( n );
+            }
+        }
+        for( final PhylogenyNode phylogenyNode : nodes_to_delete ) {
+            to_be_stripped.deleteSubtree( phylogenyNode, true );
+        }
+        return nodes_to_delete.size();
+    }
+}
diff --git a/forester/java/src/org/forester/phylogeny/PhylogenyNode.java b/forester/java/src/org/forester/phylogeny/PhylogenyNode.java

new file mode 100644 (file)

index 0000000..3858ea9
--- /dev/null
+++ b/forester/java/src/org/forester/phylogeny/PhylogenyNode.java
@@ -0,0 +1,1032 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// Copyright (C) 2000-2001 Washington University School of Medicine
+// and Howard Hughes Medical Institute
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.phylogeny;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.forester.io.parsers.nhx.NHXFormatException;
+import org.forester.io.parsers.nhx.NHXParser;
+import org.forester.io.parsers.util.PhylogenyParserException;
+import org.forester.phylogeny.data.BranchData;
+import org.forester.phylogeny.data.NodeData;
+import org.forester.phylogeny.iterators.ChildNodeIteratorForward;
+import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
+import org.forester.phylogeny.iterators.PreorderTreeIterator;
+import org.forester.util.ForesterUtil;
+
+public class PhylogenyNode implements PhylogenyNodeI, Comparable<PhylogenyNode> {
+
+    /** Value of -99.0 is used as default value. */
+    public final static double       DISTANCE_DEFAULT = -1024.0;
+    private static int               _node_count      = 0;
+    private byte                     _indicator;
+    private int                      _id;
+    private int                      _sum_ext_nodes;
+    private float                    _x;
+    private float                    _y;
+    private double                   _distance_parent;
+    private boolean                  _collapse;
+    private PhylogenyNode            _parent;
+    private PhylogenyNode            _link;
+    private ArrayList<PhylogenyNode> _descendants;
+    private NodeData                 _node_data;
+    private BranchData               _branch_data;
+    private float                    _x_secondary;
+    private float                    _y_secondary;
+
+    /**
+     * Default constructor for PhylogenyNode.
+     */
+    public PhylogenyNode() {
+        init();
+        setId( PhylogenyNode.getNodeCount() );
+        PhylogenyNode.increaseNodeCount();
+        setSumExtNodes( 1 ); // For ext node, this number is 1 (not 0!!)
+    }
+
+    public PhylogenyNode( final String nhx ) throws NHXFormatException {
+        this( nhx, ForesterUtil.TAXONOMY_EXTRACTION.NO );
+    }
+
+    public PhylogenyNode( final String nhx, final ForesterUtil.TAXONOMY_EXTRACTION taxonomy_extraction )
+            throws NHXFormatException {
+        init();
+        NHXParser.parseNHX( nhx, this, taxonomy_extraction, false );
+        setId( PhylogenyNode.getNodeCount() );
+        PhylogenyNode.increaseNodeCount();
+        setSumExtNodes( 1 ); // For ext node, this number is 1 (not 0!!)
+    }
+
+    /**
+     * Constructor for PhylogenyNode.
+     * <p>
+     * 
+     * @param s
+     *            String representing one PhylogenyNode in New Hampshire (NH) or
+     *            New Hampshire X (NHX) format.
+     * @throws NHXFormatException
+     * @throws PhylogenyParserException
+     */
+    public PhylogenyNode( final String nhx,
+                          final ForesterUtil.TAXONOMY_EXTRACTION taxonomy_extraction,
+                          final boolean replace_underscores ) throws NHXFormatException {
+        init();
+        NHXParser.parseNHX( nhx, this, taxonomy_extraction, replace_underscores );
+        setId( PhylogenyNode.getNodeCount() );
+        PhylogenyNode.increaseNodeCount();
+        setSumExtNodes( 1 ); // For ext node, this number is 1 (not 0!!)
+    }
+
+    /**
+     * Adds PhylogenyNode n to the list of child nodes and sets the _parent of n
+     * to this.
+     * 
+     * @param n
+     *            the PhylogenyNode to add
+     */
+    final public void addAsChild( final PhylogenyNodeI node ) {
+        final PhylogenyNode n = ( PhylogenyNode ) node;
+        addChildNode( n );
+        n.setParent( this );
+    }
+
+    /**
+     * Adds PhylogenyNode n to the list of child nodes. But does NOT set the
+     * _parent of n to this.
+     * 
+     * @see addAsChild( PhylogenyNode n )
+     * @param n
+     *            the PhylogenyNode to add
+     */
+    final private void addChildNode( final PhylogenyNode child ) {
+        getDescendants().add( child );
+    }
+
+    final public int compareTo( final PhylogenyNode o ) {
+        final PhylogenyNode n = o;
+        if ( ( getName() == null ) || ( n.getName() == null ) ) {
+            return 0;
+        }
+        return getName().compareTo( n.getName() );
+    }
+
+    // ---------------------------------------------------------
+    // Copy and delete Nodes, copy subtress
+    // ---------------------------------------------------------
+    /**
+     * Returns a new PhylogenyNode which has its data copied from this
+     * PhylogenyNode. Links to the other Nodes in the same Phylogeny are NOT
+     * copied (e.g. _link to _parent). Field "_link" IS copied.
+     * 
+     * @see #getLink() 
+     */
+    final public PhylogenyNode copyNodeData() {
+        final PhylogenyNode node = new PhylogenyNode();
+        PhylogenyNode.decreaseNodeCount();
+        node._id = _id;
+        node._sum_ext_nodes = _sum_ext_nodes;
+        node._indicator = _indicator;
+        node._x = _x;
+        node._y = _y;
+        node._distance_parent = _distance_parent;
+        node._collapse = _collapse;
+        node._link = _link;
+        if ( _node_data != null ) {
+            node._node_data = ( NodeData ) _node_data.copy();
+        }
+        if ( _branch_data != null ) {
+            node._branch_data = ( BranchData ) _branch_data.copy();
+        }
+        return node;
+    }
+
+    /**
+     * Returns a new PhylogenyNode which has the same data as this
+     * PhylogenyNode. Links to the other Nodes in the same Phylogeny are NOT
+     * copied (e.g. _link to _parent). Field "_link" IS copied.
+     * 
+     * @see #getLink() 
+     */
+    final public PhylogenyNode copyNodeDataShallow() {
+        final PhylogenyNode node = new PhylogenyNode();
+        PhylogenyNode.decreaseNodeCount();
+        node._id = _id;
+        node._sum_ext_nodes = _sum_ext_nodes;
+        node._indicator = _indicator;
+        node._x = _x;
+        node._y = _y;
+        node._distance_parent = _distance_parent;
+        node._collapse = _collapse;
+        node._link = _link;
+        node._node_data = _node_data;
+        node._branch_data = _branch_data;
+        return node;
+    }
+
+    @Override
+    /**
+     * Based on node name, sequence, and taxonomy.
+     * 
+     * 
+     */
+    final public boolean equals( final Object o ) {
+        if ( this == o ) {
+            return true;
+        }
+        else if ( o == null ) {
+            return false;
+        }
+        else if ( o.getClass() != this.getClass() ) {
+            throw new IllegalArgumentException( "attempt to check [" + this.getClass() + "] equality to " + o + " ["
+                    + o.getClass() + "]" );
+        }
+        else {
+            final PhylogenyNode other = ( PhylogenyNode ) o;
+            if ( !getName().equals( other.getName() ) ) {
+                return false;
+            }
+            final NodeData this_data = getNodeData();
+            final NodeData other_data = other.getNodeData();
+            if ( ( this_data.isHasSequence() && other_data.isHasSequence() )
+                    && ( this_data.isHasTaxonomy() && other_data.isHasTaxonomy() ) ) {
+                return ( this_data.getTaxonomy().isEqual( other_data.getTaxonomy() ) && this_data.getSequence()
+                        .isEqual( other_data.getSequence() ) );
+            }
+            else if ( this_data.isHasSequence() && other_data.isHasSequence() ) {
+                return ( this_data.getSequence().isEqual( other_data.getSequence() ) );
+            }
+            else if ( this_data.isHasTaxonomy() && other_data.isHasTaxonomy() ) {
+                return ( this_data.getTaxonomy().isEqual( other_data.getTaxonomy() ) );
+            }
+            else if ( getName().length() > 0 ) {
+                // Node name is not empty, and equal.
+                return true;
+            }
+            else {
+                return false;
+            }
+        }
+    }
+
+    // ---------------------------------------------------------
+    // Obtaining of Nodes
+    // ---------------------------------------------------------
+    /**
+     * Returns a List containing references to all external children of this
+     * PhylogenyNode.
+     * 
+     * @return List of references to external Nodes
+     */
+    final public List<PhylogenyNode> getAllExternalDescendants() {
+        final List<PhylogenyNode> nodes = new ArrayList<PhylogenyNode>();
+        if ( isExternal() ) {
+            nodes.add( this );
+            return nodes;
+        }
+        PhylogenyNode node1 = this;
+        while ( !node1.isExternal() ) {
+            node1 = node1.getFirstChildNode();
+        }
+        PhylogenyNode node2 = this;
+        while ( !node2.isExternal() ) {
+            node2 = node2.getLastChildNode();
+        }
+        while ( node1 != node2 ) {
+            nodes.add( node1 );
+            node1 = node1.getNextExternalNode();
+        }
+        nodes.add( node2 );
+        return nodes;
+    }
+
+    /**
+     * Returns a List containing references to all names of the external
+     * children of this PhylogenyNode.
+     * 
+     * @return List of references to names of external Nodes
+     */
+    final public List<String> getAllExternalDescendantsNames() {
+        final List<PhylogenyNode> c = getAllExternalDescendants();
+        final List<String> n = new ArrayList<String>( c.size() );
+        for( final PhylogenyNode phylogenyNode : c ) {
+            n.add( phylogenyNode.getName() );
+        }
+        return n;
+    }
+
+    final public BranchData getBranchData() {
+        if ( _branch_data == null ) {
+            _branch_data = new BranchData();
+        }
+        return _branch_data;
+    }
+
+    final BranchData getBranchDataDirectly() {
+        return _branch_data;
+    }
+
+    /**
+     * This return child node n of this node.
+     * 
+     * @param n
+     *            the index of the child to get
+     * @return the child node with index n
+     * @throws IllegalArgumentException
+     *             if n is out of bounds
+     */
+    final public PhylogenyNode getChildNode( final int i ) {
+        if ( isExternal() ) {
+            throw new UnsupportedOperationException( "attempt to get the child node of an external node." );
+        }
+        if ( ( i >= getNumberOfDescendants() ) || ( i < 0 ) ) {
+            throw new IllegalArgumentException( "attempt to get child node " + i + " of a node with "
+                    + getNumberOfDescendants() + " child nodes" );
+        }
+        return getDescendants().get( i );
+    }
+
+    /**
+     * Convenience method. Returns the first child PhylogenyNode of this
+     * PhylogenyNode.
+     */
+    final public PhylogenyNode getChildNode1() {
+        return getChildNode( 0 );
+    }
+
+    /**
+     * Convenience method. Returns the second child PhylogenyNode of this
+     * PhylogenyNode.
+     * <p>
+     * [last modified May 18, 2005 by CMZ]
+     */
+    final public PhylogenyNode getChildNode2() {
+        return getChildNode( 1 );
+    }
+
+    /**
+     * This gets the child node index of this node.
+     * <p>
+     * 
+     * @return the child node index of this node
+     * @throws UnsupportedOperationException
+     *             if this node is a root node
+     */
+    final public int getChildNodeIndex() {
+        return getChildNodeIndex( getParent() );
+    }
+
+    /**
+     * This gets the child node index of this node, given that parent is its
+     * parent
+     * <p>
+     * [last modified Aug 14, 2006 by CMZ]
+     * 
+     * @return the child node index of this node
+     * @throws UnsupportedOperationException
+     *             if this node is a root node
+     */
+    final public int getChildNodeIndex( final PhylogenyNode parent ) {
+        if ( isRoot() ) {
+            throw new UnsupportedOperationException( "Cannot get the child index for a root node." );
+        }
+        for( int i = 0; i < parent.getNumberOfDescendants(); ++i ) {
+            if ( parent.getChildNode( i ) == this ) {
+                return i;
+            }
+        }
+        throw new RuntimeException( "Unexpected exception: Could not determine the child index for node: " + this );
+    }
+
+    final public List<PhylogenyNode> getDescendants() {
+        return _descendants;
+    }
+
+    /**
+     * Returns the length of the branch leading to the _parent of this
+     * PhylogenyNode (double).
+     */
+    final public double getDistanceToParent() {
+        return _distance_parent;
+    }
+
+    /**
+     * Convenience method. Returns the first child node of this node.
+     * <p>
+     * [last modified May 18, 2005 by CMZ]
+     * 
+     * @return the first child node of this node
+     */
+    public final PhylogenyNode getFirstChildNode() {
+        return getChildNode( 0 );
+    }
+
+    /**
+     * Returns the _indicator value of this PhylogenyNode.
+     */
+    public final byte getIndicator() {
+        return _indicator;
+    }
+
+    /**
+     * Convenience method. Returns the last child node of this node.
+     * <p>
+     * [last modified May 18, 2005 by CMZ]
+     * 
+     * @return the last child node of this node
+     */
+    public final PhylogenyNode getLastChildNode() {
+        return getChildNode( getNumberOfDescendants() - 1 );
+    }
+
+    /**
+     * Returns a refernce to the linked PhylogenyNode of this PhylogenyNode.
+     * Currently, this method is only used for the speciation-_duplication
+     * assignment algorithms.
+     */
+    public final PhylogenyNode getLink() {
+        return _link;
+    }
+
+    /**
+     * Returns a refernce to the next external PhylogenyNode of this
+     * PhylogenyNode. TODO should be in Phylogeny. Returns null if no next
+     * external node is available.
+     */
+    public final PhylogenyNode getNextExternalNode() {
+        if ( isInternal() ) {
+            throw new UnsupportedOperationException( "attempt to get next external node of an internal node" );
+        }
+        else if ( isLastExternalNode() ) {
+            return null;
+        }
+        int index = getChildNodeIndex();
+        PhylogenyNode previous_node = this;
+        PhylogenyNode current_node = getParent();
+        while ( !current_node.isRoot()
+                && ( ( current_node.getNumberOfDescendants() == 1 ) || previous_node.isLastChildNode() ) ) {
+            index = current_node.getChildNodeIndex();
+            previous_node = current_node;
+            current_node = current_node.getParent();
+        }
+        current_node = current_node.getChildNode( index + 1 );
+        while ( current_node.isInternal() ) {
+            current_node = current_node.getFirstChildNode();
+        }
+        return current_node;
+    }
+
+    public final NodeData getNodeData() {
+        if ( _node_data == null ) {
+            _node_data = new NodeData();
+        }
+        return _node_data;
+    }
+
+    final NodeData getNodeDataDirectly() {
+        return _node_data;
+    }
+
+    // ---------------------------------------------------------
+    // Set and get methods for Nodes
+    // ---------------------------------------------------------
+    /**
+     * Returns the ID (int) of this PhylogenyNode.
+     */
+    final public int getId() {
+        return _id;
+    }
+
+    /**
+     * Returns the name of this node.
+     */
+    final public String getName() {
+        return getNodeData().getNodeName();
+    }
+
+    final public int getNumberOfDescendants() {
+        return _descendants.size();
+    }
+
+    /**
+     * Returns the total number of external Nodes originating from this
+     * PhylogenyNode (int).
+     */
+    final public int getNumberOfExternalNodes() {
+        return _sum_ext_nodes;
+    }
+
+    final public int getNumberOfParents() {
+        return 1;
+    }
+
+    /**
+     * Returns a refernce to the parent PhylogenyNode of this PhylogenyNode.
+     */
+    final public PhylogenyNode getParent() {
+        return _parent;
+    }
+
+    /**
+     * Returns a refernce to the next external PhylogenyNode of this
+     * PhylogenyNode. TODO should be in Phylogeny. Returns null if no next
+     * external node is available.
+     */
+    final public PhylogenyNode getPreviousExternalNode() {
+        if ( isInternal() ) {
+            throw new UnsupportedOperationException( "Cannot get the previous external node for an internal node." );
+        }
+        else if ( isRoot() /* TODO && tree is rooted */) {
+            throw new UnsupportedOperationException( "Cannot get the previous external node for a root node." );
+        }
+        else if ( isFirstExternalNode() ) {
+            throw new UnsupportedOperationException( "Attempt to get previous external node of the first external node." );
+        }
+        int index = getChildNodeIndex();
+        PhylogenyNode previous_node = this;
+        PhylogenyNode current_node = getParent();
+        while ( !current_node.isRoot()
+                && ( ( current_node.getNumberOfDescendants() == 1 ) || previous_node.isFirstChildNode() ) ) {
+            index = current_node.getChildNodeIndex();
+            previous_node = current_node;
+            current_node = current_node.getParent();
+        }
+        current_node = current_node.getChildNode( index - 1 );
+        while ( current_node.isInternal() ) {
+            current_node = current_node.getLastChildNode();
+        }
+        return current_node;
+    }
+
+    /**
+     * Used for drawing of Trees.
+     */
+    final public float getXcoord() {
+        return _x;
+    }
+
+    final public float getXSecondary() {
+        return _x_secondary;
+    }
+
+    /**
+     * Used for drawing of Trees.
+     */
+    final public float getYcoord() {
+        return _y;
+    }
+
+    final public float getYSecondary() {
+        return _y_secondary;
+    }
+
+    @Override
+    final public int hashCode() {
+        final NodeData data = getNodeData();
+        if ( ( getName().length() < 1 ) && !data.isHasSequence() && !data.isHasTaxonomy() ) {
+            return super.hashCode();
+        }
+        int result = getName().hashCode();
+        if ( data.isHasSequence() ) {
+            result ^= data.getSequence().hashCode();
+        }
+        if ( data.isHasTaxonomy() ) {
+            result ^= data.getTaxonomy().hashCode();
+        }
+        return result;
+    }
+
+    final private void init() {
+        _descendants = new ArrayList<PhylogenyNode>();
+        _parent = null;
+        _id = 0;
+        initializeData();
+    }
+
+    /**
+     * Deletes data of this PhylogenyNode. Links to the other Nodes in the
+     * Phylogeny, the ID and the sum of external nodes are NOT deleted. Field
+     * "_link" (_link to Nodes in other Phylogeny) IS deleted.
+     * 
+     * @see #getLink() (Last modified: 12/20/03)
+     */
+    final public void initializeData() {
+        _indicator = 0;
+        _x = 0;
+        _y = 0;
+        //_node_name = "";
+        _distance_parent = PhylogenyNode.DISTANCE_DEFAULT;
+        _collapse = false;
+        _link = null;
+        _branch_data = null;
+        _node_data = null;
+    }
+
+    /**
+     * Returns whether this PhylogenyNode should be drawn as collapsed.
+     */
+    final public boolean isCollapse() {
+        return _collapse;
+    }
+
+    /**
+     * Returns true if this PhylogenyNode represents a _duplication event, false
+     * otherwise.
+     */
+    final public boolean isDuplication() {
+        return getNodeData().isHasEvent() && getNodeData().getEvent().isDuplication();
+    }
+
+    /**
+     * Checks whether this PhylogenyNode is external (tip).
+     * 
+     * @return true if this PhylogenyNode is external, false otherwise
+     */
+    final public boolean isExternal() {
+        return ( getNumberOfDescendants() < 1 );
+    }
+
+    /**
+     * DOCUMENT ME!
+     * 
+     * @return DOCUMENT ME!
+     */
+    final public boolean isFirstChildNode() {
+        if ( isRoot() /* and tree is rooted TODO */) {
+            throw new UnsupportedOperationException( "Cannot determine whether the root is the first child node of its _parent." );
+        }
+        return ( getChildNodeIndex() == 0 );
+    }
+
+    /**
+     * DOCUMENT ME!
+     * 
+     * @return DOCUMENT ME!
+     */
+    final public boolean isFirstExternalNode() {
+        if ( isInternal() ) {
+            return false;
+        }
+        PhylogenyNode node = this;
+        while ( !node.isRoot() ) {
+            if ( !node.isFirstChildNode() ) {
+                return false;
+            }
+            node = node.getParent();
+        }
+        return true;
+    }
+
+    /**
+     * Returns whether a _duplication or speciation event has been assigned for
+     * this PhylogenyNode.
+     */
+    final public boolean isHasAssignedEvent() {
+        if ( !getNodeData().isHasEvent() ) {
+            return false;
+        }
+        if ( ( getNodeData().getEvent() ).isUnassigned() ) {
+            return false;
+        }
+        return true;
+    }
+
+    /**
+     * Checks whether this PhylogenyNode is internal (tip).
+     * 
+     * @return true if this PhylogenyNode is external, false otherwise
+     */
+    final public boolean isInternal() {
+        return ( !isExternal() );
+    }
+
+    /**
+     * Returns true if this node is the last child node of its _parent.
+     * <p>
+     * [last modified June 01, 2005 by CMZ]
+     * 
+     * @return true if this node is the last child node of its _parent, false
+     *         otherwise
+     */
+    final public boolean isLastChildNode() {
+        if ( isRoot() /* and tree is rooted TODO */) {
+            throw new UnsupportedOperationException( "Cannot determine whether the root is the last child node of its _parent." );
+        }
+        return ( getChildNodeIndex() == ( getParent().getNumberOfDescendants() - 1 ) );
+    }
+
+    /**
+     * DOCUMENT ME!
+     * 
+     * @return DOCUMENT ME!
+     */
+    final public boolean isLastExternalNode() {
+        if ( isInternal() ) {
+            return false;
+        }
+        PhylogenyNode node = this;
+        while ( !node.isRoot() ) {
+            if ( !node.isLastChildNode() ) {
+                return false;
+            }
+            node = node.getParent();
+        }
+        return true;
+    }
+
+    /**
+     * Checks whether this PhylogenyNode is a root.
+     * 
+     * @return true if this PhylogenyNode is the root, false otherwise
+     */
+    final public boolean isRoot() {
+        return _parent == null;
+    }
+
+    final public boolean isSpeciation() {
+        return getNodeData().isHasEvent() && getNodeData().getEvent().isSpeciation();
+    }
+
+    // ---------------------------------------------------------
+    // Iterator
+    // ---------------------------------------------------------
+    final public PhylogenyNodeIterator iterateChildNodesForward() {
+        return new ChildNodeIteratorForward( this );
+    }
+
+    // ---------------------------------------------------------
+    // Basic printing
+    // ---------------------------------------------------------
+    /**
+     * Prints to the console the subtree originating from this PhylogenyNode in
+     * preorder.
+     */
+    public void preorderPrint() {
+        System.out.println( this + "\n" );
+        if ( isInternal() ) {
+            for( int i = 0; i < getNumberOfDescendants(); ++i ) {
+                getChildNode( i ).preorderPrint();
+            }
+        }
+    }
+
+    final public void removeChildNode( final int i ) {
+        if ( isExternal() ) {
+            throw new UnsupportedOperationException( "cannot get the child node for a external node." );
+        }
+        if ( ( i >= getNumberOfDescendants() ) || ( i < 0 ) ) {
+            throw new IllegalArgumentException( "attempt to get child node " + i + " of a node with "
+                    + getNumberOfDescendants() + " child nodes." );
+        }
+        getDescendants().remove( i );
+    }
+
+    final public void removeChildNode( final PhylogenyNode remove_me ) {
+        removeChildNode( remove_me.getChildNodeIndex() );
+    }
+
+    final public void setBranchData( final BranchData branch_data ) {
+        _branch_data = branch_data;
+    }
+
+    /**
+     * Sets the first child PhylogenyNode of this PhylogenyNode to n.
+     */
+    final public void setChild1( final PhylogenyNode n ) {
+        setChildNode( 0, n );
+    }
+
+    /**
+     * Sets the second child PhylogenyNode of this PhylogenyNode to n.
+     */
+    final public void setChild2( final PhylogenyNode n ) {
+        setChildNode( 1, n );
+    }
+
+    /**
+     * Inserts PhylogenyNode n at the specified position i into the list of
+     * child nodes. This does not allow null slots in the list of child nodes:
+     * If i is larger than the number of child nodes, n is just added to the
+     * list, not place at index i.
+     * 
+     * @param i
+     *            the index of position where to add the child
+     * @param n
+     *            the PhylogenyNode to add
+     */
+    final public void setChildNode( final int i, final PhylogenyNode node ) {
+        node.setParent( this );
+        if ( getNumberOfDescendants() <= i ) {
+            addChildNode( node );
+        }
+        else {
+            getDescendants().set( i, node );
+        }
+    }
+
+    final void setChildNodeOnly( final int i, final PhylogenyNode node ) {
+        if ( getNumberOfDescendants() <= i ) {
+            addChildNode( node );
+        }
+        else {
+            getDescendants().set( i, node );
+        }
+    }
+
+    /**
+     * Sets whether this PhylogenyNode should be drawn as collapsed.
+     */
+    final public void setCollapse( final boolean b ) {
+        _collapse = b;
+    }
+
+    /**
+     * Sets the length of the branch leading to the _parent of this
+     * PhylogenyNode to double d.
+     */
+    final public void setDistanceToParent( final double d ) {
+        _distance_parent = d;
+    }
+
+    /**
+     * Sets the _indicator value of this PhylogenyNode to i.
+     */
+    final public void setIndicator( final byte i ) {
+        _indicator = i;
+    }
+
+    // --------------------------------------------------------------------
+    // Adjust methods (related to Phylogeny construction and
+    // Phylogeny modification)
+    // --------------------------------------------------------------------
+    /**
+     * Sets the indicators of all the children of this PhylogenyNode to zero.
+     */
+    final void setIndicatorsToZero() {
+        for( final PreorderTreeIterator it = new PreorderTreeIterator( this ); it.hasNext(); ) {
+            it.next().setIndicator( ( byte ) 0 );
+        }
+    }
+
+    /**
+     * Sets the linked PhylogenyNode of this PhylogenyNode to n. Currently, this
+     * method is only used for the speciation-_duplication assignment
+     * algorithms.
+     */
+    final public void setLink( final PhylogenyNode n ) {
+        _link = n;
+    }
+
+    /**
+     * Sets the name of this node.
+     */
+    final public void setName( final String node_name ) {
+        getNodeData().setNodeName( node_name );
+    }
+
+    /**
+     * Sets the Id of this PhylogenyNode to i. In most cases, this number
+     * should not be set to values lower than getNodeCount() -- which this method
+     * does not allow.
+     */
+    synchronized final protected void setId( final int i ) {
+        if ( i < getNodeCount() ) {
+            throw new IllegalArgumentException( "attempt to set node id to a value less than total node count (thus violating the uniqueness of node ids)" );
+        }
+        _id = i;
+    }
+
+    /**
+     * Sets the _parent PhylogenyNode of this PhylogenyNode to n.
+     */
+    final public void setParent( final PhylogenyNode n ) {
+        _parent = n;
+    }
+
+    /**
+     * Sets the total number of external Nodes originating from this
+     * PhylogenyNode to i (int).
+     */
+    final public void setSumExtNodes( final int i ) {
+        if ( i < 0 ) {
+            throw new IllegalArgumentException( "attempt to set sum of external nodes to less than one" );
+        }
+        _sum_ext_nodes = i;
+    }
+
+    /**
+     * Used for drawing of Trees.
+     */
+    final public void setXcoord( final float x ) {
+        _x = x;
+    }
+
+    final public void setXSecondary( final float x_secondary ) {
+        _x_secondary = x_secondary;
+    }
+
+    // -----------
+    /**
+     * Used for drawing of Trees.
+     */
+    final public void setYcoord( final float y ) {
+        _y = y;
+    }
+
+    final public void setYSecondary( final float y_secondary ) {
+        _y_secondary = y_secondary;
+    }
+
+    // ---------------------------------------------------------
+    // Writing of Nodes to Strings
+    // ---------------------------------------------------------
+    final public String toNewHampshire( final boolean simple_nh, final boolean write_distance_to_parent ) {
+        final StringBuilder sb = new StringBuilder();
+        String data = "";
+        if ( !ForesterUtil.isEmpty( getName() ) ) {
+            data = getName();
+        }
+        else if ( getNodeData().isHasTaxonomy() ) {
+            if ( !ForesterUtil.isEmpty( getNodeData().getTaxonomy().getTaxonomyCode() ) ) {
+                data = getNodeData().getTaxonomy().getTaxonomyCode();
+            }
+            else if ( !ForesterUtil.isEmpty( getNodeData().getTaxonomy().getScientificName() ) ) {
+                data = getNodeData().getTaxonomy().getScientificName();
+            }
+            else if ( !ForesterUtil.isEmpty( getNodeData().getTaxonomy().getCommonName() ) ) {
+                data = getNodeData().getTaxonomy().getCommonName();
+            }
+            else if ( getNodeData().getTaxonomy().getTaxonomyCode() != null ) {
+                data = getNodeData().getTaxonomy().getTaxonomyCode();
+            }
+        }
+        else if ( getNodeData().isHasSequence() ) {
+            if ( !ForesterUtil.isEmpty( getNodeData().getSequence().getName() ) ) {
+                data = getNodeData().getSequence().getName();
+            }
+        }
+        if ( data.length() > 0 ) {
+            data = ForesterUtil.replaceIllegalNhCharacters( data );
+            if ( simple_nh && ( data.length() > 10 ) ) {
+                data = data.substring( 0, 11 );
+            }
+            if ( ForesterUtil.isContainsParanthesesableNhCharacter( data ) ) {
+                sb.append( '\'' );
+                sb.append( data );
+                sb.append( '\'' );
+            }
+            else {
+                sb.append( data );
+            }
+        }
+        if ( ( getDistanceToParent() != PhylogenyNode.DISTANCE_DEFAULT ) && write_distance_to_parent ) {
+            sb.append( ":" );
+            sb.append( getDistanceToParent() );
+        }
+        return sb.toString();
+    }
+
+    /**
+     * Converts this PhylogenyNode to a New Hampshire X (NHX) String
+     * representation.
+     */
+    final public String toNewHampshireX() {
+        final StringBuffer sb = new StringBuffer();
+        final StringBuffer s_nhx = new StringBuffer();
+        if ( !ForesterUtil.isEmpty( getName() ) ) {
+            final String name = ForesterUtil.replaceIllegalNhCharacters( getName() );
+            if ( ForesterUtil.isContainsParanthesesableNhCharacter( name ) ) {
+                sb.append( '\'' );
+                sb.append( name );
+                sb.append( '\'' );
+            }
+            else {
+                sb.append( name );
+            }
+        }
+        if ( getDistanceToParent() != PhylogenyNode.DISTANCE_DEFAULT ) {
+            sb.append( ":" );
+            sb.append( getDistanceToParent() );
+        }
+        if ( getNodeDataDirectly() != null ) {
+            s_nhx.append( getNodeDataDirectly().toNHX() );
+        }
+        if ( getBranchDataDirectly() != null ) {
+            s_nhx.append( getBranchDataDirectly().toNHX() );
+        }
+        if ( s_nhx.length() > 0 ) {
+            sb.append( "[&&NHX" );
+            sb.append( s_nhx );
+            sb.append( "]" );
+        }
+        return sb.toString();
+    }
+
+    @Override
+    final public String toString() {
+        final StringBuilder sb = new StringBuilder();
+        if ( !ForesterUtil.isEmpty( getName() ) ) {
+            sb.append( getName() );
+            sb.append( " " );
+        }
+        sb.append( "[" );
+        sb.append( getId() );
+        sb.append( "]" );
+        return sb.toString();
+    }
+
+    /**
+     * Decreases the total number of all Nodes created so far by one.
+     */
+    final static synchronized void decreaseNodeCount() {
+        --PhylogenyNode._node_count;
+    }
+
+    /**
+     * Returns the total number of all Nodes created so far.
+     * 
+     * @return total number of Nodes (int)
+     */
+    synchronized final public static int getNodeCount() {
+        return PhylogenyNode._node_count;
+    }
+
+    /**
+     * Increases the total number of all Nodes created so far by one.
+     */
+    synchronized final private static void increaseNodeCount() {
+        ++PhylogenyNode._node_count;
+    }
+
+    /**
+     * Sets the total number of all Nodes created so far to i (int).
+     */
+    synchronized final static void setNodeCount( final int i ) {
+        PhylogenyNode._node_count = i;
+    }
+}
diff --git a/forester/java/src/org/forester/phylogeny/PhylogenyNodeI.java b/forester/java/src/org/forester/phylogeny/PhylogenyNodeI.java

new file mode 100644 (file)

index 0000000..3920875
--- /dev/null
+++ b/forester/java/src/org/forester/phylogeny/PhylogenyNodeI.java
@@ -0,0 +1,47 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// Copyright (C) 2000-2001 Washington University School of Medicine
+// and Howard Hughes Medical Institute
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.phylogeny;
+
+public interface PhylogenyNodeI {
+
+    public void addAsChild( PhylogenyNodeI node );
+
+    public PhylogenyNode getChildNode( int i );
+
+    public double getDistanceToParent();
+
+    public int getId();
+
+    public String getName();
+
+    public void setDistanceToParent( double d );
+
+    public void setName( String name );
+
+    public void setParent( PhylogenyNode phylogenyNode );
+}
diff --git a/forester/java/src/org/forester/phylogeny/data/Accession.java b/forester/java/src/org/forester/phylogeny/data/Accession.java

new file mode 100644 (file)

index 0000000..295b8b7
--- /dev/null
+++ b/forester/java/src/org/forester/phylogeny/data/Accession.java
@@ -0,0 +1,142 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org
+
+package org.forester.phylogeny.data;
+
+import java.io.IOException;
+import java.io.Writer;
+
+import org.forester.io.parsers.nhx.NHXtags;
+import org.forester.io.parsers.phyloxml.PhyloXmlMapping;
+import org.forester.util.ForesterUtil;
+
+public class Accession implements PhylogenyData {
+
+    final String _value;
+    final String _source;
+
+    public Accession( final String value, final String source ) {
+        _value = value;
+        _source = source;
+    }
+
+    public StringBuffer asSimpleText() {
+        return new StringBuffer( getValue() );
+    }
+
+    public StringBuffer asText() {
+        final StringBuffer sb = new StringBuffer();
+        if ( !ForesterUtil.isEmpty( getSource() ) ) {
+            sb.append( "[" );
+            sb.append( getSource() );
+            sb.append( "] " );
+        }
+        sb.append( getValue() );
+        return sb;
+    }
+
+    public PhylogenyData copy() {
+        return new Accession( new String( getValue() ), new String( getSource() ) );
+    }
+
+    @Override
+    public boolean equals( final Object o ) {
+        if ( this == o ) {
+            return true;
+        }
+        else if ( o == null ) {
+            return false;
+        }
+        else if ( o.getClass() != this.getClass() ) {
+            throw new IllegalArgumentException( "attempt to check [" + this.getClass() + "] equality to " + o + " ["
+                    + o.getClass() + "]" );
+        }
+        else {
+            return isEqual( ( Accession ) o );
+        }
+    }
+
+    public String getSource() {
+        return _source;
+    }
+
+    public String getValue() {
+        return _value;
+    }
+
+    @Override
+    public int hashCode() {
+        if ( getSource() != null ) {
+            return ( getSource() + getValue() ).hashCode();
+        }
+        return getValue().hashCode();
+    }
+
+    public boolean isEqual( final PhylogenyData data ) {
+        if ( this == data ) {
+            return true;
+        }
+        if ( ( data == null ) || ( getValue() == null ) ) {
+            return false;
+        }
+        final Accession a = ( Accession ) data;
+        if ( ( getSource() != null ) && ( a.getSource() != null ) ) {
+            return ( a.getValue().equals( getValue() ) && a.getSource().equals( getSource() ) );
+        }
+        return ( a.getValue().equals( getValue() ) );
+    }
+
+    public StringBuffer toNHX() {
+        final StringBuffer sb = new StringBuffer();
+        sb.append( ":" );
+        sb.append( NHXtags.SEQUENCE_ACCESSION );
+        sb.append( ForesterUtil.replaceIllegalNhxCharacters( getValue() ) );
+        return sb;
+    }
+
+    public void toPhyloXML( final Writer writer, final int level, final String indentation ) throws IOException {
+        if ( ForesterUtil.isEmpty( getSource() ) ) {
+            PhylogenyDataUtil.appendElement( writer,
+                                             PhyloXmlMapping.ACCESSION,
+                                             getValue(),
+                                             PhyloXmlMapping.ACCESSION_SOURCE_ATTR,
+                                             "unknown",
+                                             indentation );
+        }
+        else {
+            PhylogenyDataUtil.appendElement( writer,
+                                             PhyloXmlMapping.ACCESSION,
+                                             getValue(),
+                                             PhyloXmlMapping.ACCESSION_SOURCE_ATTR,
+                                             getSource(),
+                                             indentation );
+        }
+    }
+
+    @Override
+    public String toString() {
+        return asText().toString();
+    }
+}
diff --git a/forester/java/src/org/forester/phylogeny/data/Annotation.java b/forester/java/src/org/forester/phylogeny/data/Annotation.java

new file mode 100644 (file)

index 0000000..cba8b55
--- /dev/null
+++ b/forester/java/src/org/forester/phylogeny/data/Annotation.java
@@ -0,0 +1,282 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.phylogeny.data;
+
+import java.io.IOException;
+import java.io.Writer;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.forester.io.parsers.phyloxml.PhyloXmlMapping;
+import org.forester.io.writers.PhylogenyWriter;
+import org.forester.util.ForesterUtil;
+
+public class Annotation implements PhylogenyData, MultipleUris, Comparable<Annotation> {
+
+    private String        _desc;
+    private String        _type;
+    private String        _source;
+    private final String  _ref;
+    private String        _evidence;
+    private Confidence    _confidence;
+    private PropertiesMap _properties;
+    private List<Uri>     _uris;
+
+    public Annotation( final String ref ) {
+        if ( ForesterUtil.isEmpty( ref ) ) {
+            throw new IllegalArgumentException( "illegal attempt to create Annotation with null or empty reference" );
+        }
+        if ( ( ref.indexOf( ':' ) < 1 ) || ( ref.length() < 3 ) ) {
+            throw new IllegalArgumentException( "illegal format for Annotation reference: [" + ref + "]" );
+        }
+        _ref = ref;
+        init();
+    }
+
+    @Override
+    public StringBuffer asSimpleText() {
+        return new StringBuffer( getDesc() );
+    }
+
+    @Override
+    public StringBuffer asText() {
+        return new StringBuffer( getDesc() );
+    }
+
+    @Override
+    public PhylogenyData copy() {
+        final Annotation ann = new Annotation( new String( getRef() ) );
+        if ( getConfidence() != null ) {
+            ann.setConfidence( ( Confidence ) getConfidence().copy() );
+        }
+        else {
+            ann.setConfidence( null );
+        }
+        ann.setType( new String( getType() ) );
+        ann.setDesc( new String( getDesc() ) );
+        ann.setEvidence( new String( getEvidence() ) );
+        ann.setSource( new String( getSource() ) );
+        if ( getProperties() != null ) {
+            ann.setProperties( ( PropertiesMap ) getProperties().copy() );
+        }
+        else {
+            ann.setProperties( null );
+        }
+        if ( getUris() != null ) {
+            ann.setUris( new ArrayList<Uri>() );
+            for( final Uri uri : getUris() ) {
+                if ( uri != null ) {
+                    ann.getUris().add( uri );
+                }
+            }
+        }
+        return ann;
+    }
+
+    public Confidence getConfidence() {
+        return _confidence;
+    }
+
+    public String getDesc() {
+        return _desc;
+    }
+
+    public String getEvidence() {
+        return _evidence;
+    }
+
+    public PropertiesMap getProperties() {
+        return _properties;
+    }
+
+    public String getRef() {
+        return _ref;
+    }
+
+    public String getSource() {
+        return _source;
+    }
+
+    public String getType() {
+        return _type;
+    }
+
+    private void init() {
+        _desc = "";
+        _type = "";
+        _source = "";
+        _evidence = "";
+        _confidence = null;
+        _properties = null;
+        setUris( null );
+    }
+
+    @Override
+    public boolean isEqual( final PhylogenyData data ) {
+        final Annotation other = ( Annotation ) data;
+        return getDesc().equalsIgnoreCase( other.getDesc() ) && getType().equals( other.getType() )
+                && getSource().equals( other.getSource() ) && getRef().equals( other.getRef() );
+    }
+
+    public void setConfidence( final Confidence confidence ) {
+        _confidence = confidence;
+    }
+
+    public void setDesc( final String desc ) {
+        _desc = desc;
+    }
+
+    public void setEvidence( final String evidence ) {
+        _evidence = evidence;
+    }
+
+    public void setProperties( final PropertiesMap property ) {
+        _properties = property;
+    }
+
+    // public void setRef( final String ref ) {
+    //     _ref = ref;
+    // }
+    public void setSource( final String source ) {
+        _source = source;
+    }
+
+    public void setType( final String type ) {
+        _type = type;
+    }
+
+    @Override
+    public StringBuffer toNHX() {
+        throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public void toPhyloXML( final Writer writer, final int level, final String indentation ) throws IOException {
+        if ( ( getConfidence() != null ) || ( getProperties() != null )
+                || ( ( getUris() != null ) && !getUris().isEmpty() ) || !ForesterUtil.isEmpty( getDesc() ) ) {
+            writer.write( ForesterUtil.LINE_SEPARATOR );
+            writer.write( indentation );
+            PhylogenyDataUtil.appendOpen( writer,
+                                          PhyloXmlMapping.ANNOTATION,
+                                          PhyloXmlMapping.ANNOTATION_REF_ATTR,
+                                          getRef(),
+                                          PhyloXmlMapping.ANNOTATION_EVIDENCE_ATTR,
+                                          getEvidence(),
+                                          PhyloXmlMapping.ANNOTATION_TYPE_ATTR,
+                                          getType(),
+                                          PhyloXmlMapping.ANNOTATION_SOURCE_ATTR,
+                                          getSource() );
+            if ( !ForesterUtil.isEmpty( getDesc() ) ) {
+                PhylogenyDataUtil.appendElement( writer, PhyloXmlMapping.ANNOTATION_DESC, getDesc(), indentation );
+            }
+            if ( getConfidence() != null ) {
+                getConfidence().toPhyloXML( writer, level, indentation + PhylogenyWriter.PHYLO_XML_INTENDATION_BASE );
+            }
+            if ( getProperties() != null ) {
+                getProperties().toPhyloXML( writer, level, indentation );
+            }
+            if ( getUris() != null ) {
+                for( final Uri uri : getUris() ) {
+                    if ( uri != null ) {
+                        uri.toPhyloXML( writer, level, indentation );
+                    }
+                }
+            }
+            writer.write( ForesterUtil.LINE_SEPARATOR );
+            writer.write( indentation );
+            PhylogenyDataUtil.appendClose( writer, PhyloXmlMapping.ANNOTATION );
+        }
+        else {
+            PhylogenyDataUtil.appendElement( writer,
+                                             PhyloXmlMapping.ANNOTATION,
+                                             PhyloXmlMapping.ANNOTATION_REF_ATTR,
+                                             getRef(),
+                                             PhyloXmlMapping.ANNOTATION_EVIDENCE_ATTR,
+                                             getEvidence(),
+                                             PhyloXmlMapping.ANNOTATION_TYPE_ATTR,
+                                             getType(),
+                                             PhyloXmlMapping.ANNOTATION_SOURCE_ATTR,
+                                             getSource(),
+                                             indentation );
+        }
+    }
+
+    @Override
+    public String toString() {
+        return asText().toString();
+    }
+
+    @Override
+    public void addUri( final Uri uri ) {
+        if ( getUris() == null ) {
+            setUris( new ArrayList<Uri>() );
+        }
+        getUris().add( uri );
+    }
+
+    @Override
+    public Uri getUri( final int index ) {
+        return getUris().get( index );
+    }
+
+    @Override
+    public List<Uri> getUris() {
+        return _uris;
+    }
+
+    @Override
+    public void setUris( final List<Uri> uris ) {
+        _uris = uris;
+    }
+
+    @Override
+    public boolean equals( final Object o ) {
+        if ( this == o ) {
+            return true;
+        }
+        else if ( o == null ) {
+            return false;
+        }
+        else if ( o.getClass() != this.getClass() ) {
+            throw new IllegalArgumentException( "attempt to check [" + this.getClass() + "] equality to " + o + " ["
+                    + o.getClass() + "]" );
+        }
+        else {
+            return isEqual( ( Annotation ) o );
+        }
+    }
+
+    @Override
+    public int compareTo( final Annotation o ) {
+        if ( equals( o ) ) {
+            return 0;
+        }
+        if ( getRef().equals( o.getRef() ) ) {
+            return getDesc().compareTo( o.getDesc() );
+        }
+        return getRef().compareTo( o.getRef() );
+    }
+}
diff --git a/forester/java/src/org/forester/phylogeny/data/BinaryCharacters.java b/forester/java/src/org/forester/phylogeny/data/BinaryCharacters.java

new file mode 100644 (file)

index 0000000..4b3bdd1
--- /dev/null
+++ b/forester/java/src/org/forester/phylogeny/data/BinaryCharacters.java
@@ -0,0 +1,319 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.phylogeny.data;
+
+import java.io.IOException;
+import java.io.Writer;
+import java.util.Iterator;
+import java.util.SortedSet;
+import java.util.TreeSet;
+
+import org.forester.io.parsers.phyloxml.PhyloXmlMapping;
+import org.forester.io.writers.PhylogenyWriter;
+import org.forester.util.ForesterUtil;
+
+public class BinaryCharacters implements PhylogenyData {
+
+    public final static int         COUNT_DEFAULT = -1;
+    private final SortedSet<String> _present;
+    private final SortedSet<String> _gained;
+    private final SortedSet<String> _lost;
+    private final int               _present_count;
+    private final int               _gained_count;
+    private final int               _lost_count;
+    private String                  _type;
+
+    public BinaryCharacters() {
+        _present = new TreeSet<String>();
+        _gained = new TreeSet<String>();
+        _lost = new TreeSet<String>();
+        _present_count = COUNT_DEFAULT;
+        _gained_count = COUNT_DEFAULT;
+        _lost_count = COUNT_DEFAULT;
+    }
+
+    public BinaryCharacters( final SortedSet<String> present_characters,
+                             final SortedSet<String> gained_characters,
+                             final SortedSet<String> lost_characters,
+                             final String type ) {
+        _present = present_characters;
+        _gained = gained_characters;
+        _lost = lost_characters;
+        _type = type;
+        _present_count = COUNT_DEFAULT;
+        _gained_count = COUNT_DEFAULT;
+        _lost_count = COUNT_DEFAULT;
+    }
+
+    public BinaryCharacters( final SortedSet<String> present_characters,
+                             final SortedSet<String> gained_characters,
+                             final SortedSet<String> lost_characters,
+                             final String type,
+                             final int present_count,
+                             final int gained_count,
+                             final int lost_count ) {
+        _present = present_characters;
+        _gained = gained_characters;
+        _lost = lost_characters;
+        _type = type;
+        _present_count = present_count;
+        _gained_count = gained_count;
+        _lost_count = lost_count;
+        validate();
+    }
+
+    private void addCharacters( final String indentation, final Writer w, final String[] present ) throws IOException {
+        for( final String string : present ) {
+            PhylogenyDataUtil.appendElement( w, PhyloXmlMapping.BINARY_CHARACTER, string, indentation );
+        }
+    }
+
+    public void addGainedCharacter( final String binary_character ) {
+        if ( getLostCharacters().contains( binary_character ) ) {
+            throw new IllegalArgumentException( "attempt to add binary character [" + binary_character
+                    + "] to gained characters but is already listed as lost" );
+        }
+        getGainedCharacters().add( binary_character );
+    }
+
+    public void addLostCharacter( final String binary_character ) {
+        if ( getPresentCharacters().contains( binary_character ) ) {
+            throw new IllegalArgumentException( "attempt to add binary character [" + binary_character
+                    + "] to lost characters but is already listed as present" );
+        }
+        if ( getGainedCharacters().contains( binary_character ) ) {
+            throw new IllegalArgumentException( "attempt to add binary character [" + binary_character
+                    + "] to lost characters but is already listed as gained" );
+        }
+        getLostCharacters().add( binary_character );
+    }
+
+    public void addPresentCharacter( final String binary_character ) {
+        if ( getLostCharacters().contains( binary_character ) ) {
+            throw new IllegalArgumentException( "attempt to add binary character [" + binary_character
+                    + "] to present characters but is already listed as lost" );
+        }
+        getPresentCharacters().add( binary_character );
+    }
+
+    @Override
+    public StringBuffer asSimpleText() {
+        return asText();
+    }
+
+    @Override
+    public StringBuffer asText() {
+        validate();
+        final StringBuffer sb = new StringBuffer();
+        sb.append( "present [" );
+        sb.append( getPresentCount() );
+        sb.append( "]: " );
+        sb.append( getPresentCharactersAsStringBuffer() );
+        sb.append( ForesterUtil.LINE_SEPARATOR );
+        sb.append( "gained  [ " );
+        sb.append( getGainedCount() );
+        sb.append( "]: " );
+        sb.append( getGainedCharactersAsStringBuffer() );
+        sb.append( ForesterUtil.LINE_SEPARATOR );
+        sb.append( "lost    [" );
+        sb.append( getLostCount() );
+        sb.append( "]: " );
+        sb.append( getLostCharactersAsStringBuffer() );
+        return sb;
+    }
+
+    @Override
+    /**
+     * Not a deep copy.
+     * 
+     */
+    public PhylogenyData copy() {
+        validate();
+        return new BinaryCharacters( getPresentCharacters(),
+                                     getGainedCharacters(),
+                                     getLostCharacters(),
+                                     getType(),
+                                     getPresentCount(),
+                                     getGainedCount(),
+                                     getLostCount() );
+    }
+
+    public SortedSet<String> getGainedCharacters() {
+        return _gained;
+    }
+
+    public String[] getGainedCharactersAsStringArray() {
+        return sortedSetToStringArray( getGainedCharacters() );
+    }
+
+    public StringBuffer getGainedCharactersAsStringBuffer() {
+        return sortedSetToStringBuffer( getGainedCharacters(), " " );
+    }
+
+    public int getGainedCount() {
+        return _gained_count;
+    }
+
+    public SortedSet<String> getLostCharacters() {
+        return _lost;
+    }
+
+    public String[] getLostCharactersAsStringArray() {
+        return sortedSetToStringArray( getLostCharacters() );
+    }
+
+    public StringBuffer getLostCharactersAsStringBuffer() {
+        return sortedSetToStringBuffer( getLostCharacters(), " " );
+    }
+
+    public int getLostCount() {
+        return _lost_count;
+    }
+
+    public SortedSet<String> getPresentCharacters() {
+        return _present;
+    }
+
+    public String[] getPresentCharactersAsStringArray() {
+        return sortedSetToStringArray( getPresentCharacters() );
+    }
+
+    public StringBuffer getPresentCharactersAsStringBuffer() {
+        return sortedSetToStringBuffer( getPresentCharacters(), " " );
+    }
+
+    public int getPresentCount() {
+        return _present_count;
+    }
+
+    public String getType() {
+        return _type;
+    }
+
+    @Override
+    public boolean isEqual( final PhylogenyData data ) {
+        throw new UnsupportedOperationException();
+    }
+
+    public void setType( final String type ) {
+        _type = type;
+    }
+
+    @Override
+    public StringBuffer toNHX() {
+        throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public void toPhyloXML( final Writer writer, final int level, final String indentation ) throws IOException {
+        validate();
+        writer.write( ForesterUtil.LINE_SEPARATOR );
+        writer.write( indentation );
+        PhylogenyDataUtil.appendOpen( writer,
+                                      PhyloXmlMapping.BINARY_CHARACTERS,
+                                      PhyloXmlMapping.BINARY_CHARACTERS_TYPE_ATTR,
+                                      getType(),
+                                      PhyloXmlMapping.BINARY_CHARACTERS_GAINED_COUNT_ATTR,
+                                      getGainedCount() != COUNT_DEFAULT ? String.valueOf( getGainedCount() ) : "",
+                                      PhyloXmlMapping.BINARY_CHARACTERS_LOST_COUNT_ATTR,
+                                      getLostCount() != COUNT_DEFAULT ? String.valueOf( getLostCount() ) : "",
+                                      PhyloXmlMapping.BINARY_CHARACTERS_PRESENT_COUNT_ATTR,
+                                      getPresentCount() != COUNT_DEFAULT ? String.valueOf( getPresentCount() ) : "" );
+        final String my_ind = indentation + PhylogenyWriter.PHYLO_XML_INTENDATION_BASE;
+        if ( getGainedCharacters().size() > 0 ) {
+            writer.write( ForesterUtil.LINE_SEPARATOR );
+            writer.write( my_ind );
+            PhylogenyDataUtil.appendOpen( writer, PhyloXmlMapping.BINARY_CHARACTERS_GAINED );
+            addCharacters( my_ind, writer, getGainedCharactersAsStringArray() );
+            writer.write( ForesterUtil.LINE_SEPARATOR );
+            writer.write( my_ind );
+            PhylogenyDataUtil.appendClose( writer, PhyloXmlMapping.BINARY_CHARACTERS_GAINED );
+        }
+        if ( getLostCharacters().size() > 0 ) {
+            writer.write( ForesterUtil.LINE_SEPARATOR );
+            writer.write( my_ind );
+            PhylogenyDataUtil.appendOpen( writer, PhyloXmlMapping.BINARY_CHARACTERS_LOST );
+            addCharacters( my_ind, writer, getLostCharactersAsStringArray() );
+            writer.write( ForesterUtil.LINE_SEPARATOR );
+            writer.write( my_ind );
+            PhylogenyDataUtil.appendClose( writer, PhyloXmlMapping.BINARY_CHARACTERS_LOST );
+        }
+        if ( getPresentCharacters().size() > 0 ) {
+            writer.write( ForesterUtil.LINE_SEPARATOR );
+            writer.write( my_ind );
+            PhylogenyDataUtil.appendOpen( writer, PhyloXmlMapping.BINARY_CHARACTERS_PRESENT );
+            addCharacters( my_ind, writer, getPresentCharactersAsStringArray() );
+            writer.write( ForesterUtil.LINE_SEPARATOR );
+            writer.write( my_ind );
+            PhylogenyDataUtil.appendClose( writer, PhyloXmlMapping.BINARY_CHARACTERS_PRESENT );
+        }
+        writer.write( ForesterUtil.LINE_SEPARATOR );
+        writer.write( indentation );
+        PhylogenyDataUtil.appendClose( writer, PhyloXmlMapping.BINARY_CHARACTERS );
+    }
+
+    @Override
+    public String toString() {
+        return asText().toString();
+    }
+
+    private void validate() {
+        if ( ( getPresentCount() != COUNT_DEFAULT ) && ( getPresentCharacters().size() > 0 )
+                && ( getPresentCount() != getPresentCharacters().size() ) ) {
+            throw new RuntimeException( "present characters size and count are unequal" );
+        }
+        if ( ( getGainedCount() != COUNT_DEFAULT ) && ( getGainedCharacters().size() > 0 )
+                && ( getGainedCount() != getGainedCharacters().size() ) ) {
+            throw new RuntimeException( "gained characters size and count are unequal" );
+        }
+        if ( ( getLostCount() != COUNT_DEFAULT ) && ( getLostCharacters().size() > 0 )
+                && ( getLostCount() != getLostCharacters().size() ) ) {
+            throw new RuntimeException( "lost characters size and count are unequal" );
+        }
+    }
+
+    private static String[] sortedSetToStringArray( final SortedSet<String> set ) {
+        final String[] chars = new String[ set.size() ];
+        final Iterator<String> it = set.iterator();
+        int i = 0;
+        while ( it.hasNext() ) {
+            chars[ i++ ] = it.next();
+        }
+        return chars;
+    }
+
+    private static StringBuffer sortedSetToStringBuffer( final SortedSet<String> set, final String separator ) {
+        final StringBuffer sb = new StringBuffer();
+        final Iterator<String> it = set.iterator();
+        while ( it.hasNext() ) {
+            sb.append( it.next() );
+            if ( it.hasNext() ) {
+                sb.append( separator );
+            }
+        }
+        return sb;
+    }
+}
diff --git a/forester/java/src/org/forester/phylogeny/data/BranchColor.java b/forester/java/src/org/forester/phylogeny/data/BranchColor.java

new file mode 100644 (file)

index 0000000..84a356b
--- /dev/null
+++ b/forester/java/src/org/forester/phylogeny/data/BranchColor.java
@@ -0,0 +1,111 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+//
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.phylogeny.data;
+
+import java.awt.Color;
+import java.io.IOException;
+import java.io.Writer;
+
+import org.forester.io.parsers.nhx.NHXtags;
+import org.forester.io.parsers.phyloxml.PhyloXmlMapping;
+import org.forester.util.ForesterUtil;
+
+public class BranchColor implements PhylogenyData {
+
+    private Color _color;
+
+    public BranchColor() {
+        _color = null;
+    }
+
+    public BranchColor( final Color color ) {
+        _color = color;
+    }
+
+    @Override
+    public StringBuffer asSimpleText() {
+        return new StringBuffer( getValue().toString() );
+    }
+
+    @Override
+    public StringBuffer asText() {
+        return new StringBuffer( getValue().toString() );
+    }
+
+    @Override
+    /**
+     * Not a deep copy.
+     * 
+     */
+    public PhylogenyData copy() {
+        final BranchColor bc = new BranchColor();
+        bc.setValue( getValue() );
+        return bc;
+    }
+
+    public Color getValue() {
+        return _color;
+    }
+
+    @Override
+    public boolean isEqual( final PhylogenyData data ) {
+        return getValue().equals( ( ( BranchColor ) data ).getValue() );
+    }
+
+    public void setValue( final Color color ) {
+        _color = color;
+    }
+
+    @Override
+    public StringBuffer toNHX() {
+        final StringBuffer sb = new StringBuffer();
+        sb.append( NHXtags.COLOR );
+        sb.append( getValue().getRed() );
+        sb.append( "." );
+        sb.append( getValue().getGreen() );
+        sb.append( "." );
+        sb.append( getValue().getBlue() );
+        return sb;
+    }
+
+    @Override
+    public void toPhyloXML( final Writer writer, final int level, final String indentation ) throws IOException {
+        writer.write( ForesterUtil.LINE_SEPARATOR );
+        writer.write( indentation );
+        PhylogenyDataUtil.appendOpen( writer, PhyloXmlMapping.COLOR );
+        PhylogenyDataUtil.appendElement( writer, PhyloXmlMapping.COLOR_RED, getValue().getRed() + "", indentation );
+        PhylogenyDataUtil.appendElement( writer, PhyloXmlMapping.COLOR_GREEN, getValue().getGreen() + "", indentation );
+        PhylogenyDataUtil.appendElement( writer, PhyloXmlMapping.COLOR_BLUE, getValue().getBlue() + "", indentation );
+        writer.write( ForesterUtil.LINE_SEPARATOR );
+        writer.write( indentation );
+        PhylogenyDataUtil.appendClose( writer, PhyloXmlMapping.COLOR );
+    }
+
+    @Override
+    public String toString() {
+        return asText().toString();
+    }
+}
diff --git a/forester/java/src/org/forester/phylogeny/data/BranchData.java b/forester/java/src/org/forester/phylogeny/data/BranchData.java

new file mode 100644 (file)

index 0000000..34a9dc0
--- /dev/null
+++ b/forester/java/src/org/forester/phylogeny/data/BranchData.java
@@ -0,0 +1,156 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// Copyright (C) 2000-2001 Washington University School of Medicine
+// and Howard Hughes Medical Institute
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.phylogeny.data;
+
+import java.io.IOException;
+import java.io.Writer;
+import java.util.ArrayList;
+import java.util.List;
+
+public class BranchData implements PhylogenyData {
+
+    private BranchColor      _branch_color;
+    private List<Confidence> _confidences;
+    private BranchWidth      _branch_width;
+
+    public BranchData() {
+        // Doing nothing. 
+    }
+
+    public void addConfidence( final Confidence confidence ) {
+        getConfidences().add( confidence );
+    }
+
+    @Override
+    public StringBuffer asSimpleText() {
+        throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public StringBuffer asText() {
+        throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public PhylogenyData copy() {
+        final BranchData new_bd = new BranchData();
+        if ( isHasBranchColor() ) {
+            new_bd.setBranchColor( ( BranchColor ) getBranchColor().copy() );
+        }
+        if ( isHasBranchWidth() ) {
+            new_bd.setBranchWidth( ( BranchWidth ) getBranchWidth().copy() );
+        }
+        if ( isHasConfidences() ) {
+            for( final Confidence confidence : getConfidences() ) {
+                new_bd.addConfidence( ( Confidence ) confidence.copy() );
+            }
+        }
+        return new_bd;
+    }
+
+    public BranchColor getBranchColor() {
+        return _branch_color;
+    }
+
+    public BranchWidth getBranchWidth() {
+        return _branch_width;
+    }
+
+    public Confidence getConfidence( final int index ) {
+        return getConfidences().get( index );
+    }
+
+    public List<Confidence> getConfidences() {
+        if ( _confidences == null ) {
+            _confidences = new ArrayList<Confidence>();
+        }
+        return _confidences;
+    }
+
+    public int getNumberOfConfidences() {
+        return getConfidences().size();
+    }
+
+    @Override
+    public boolean isEqual( final PhylogenyData data ) {
+        throw new UnsupportedOperationException();
+    }
+
+    public boolean isHasBranchColor() {
+        return getBranchColor() != null;
+    }
+
+    public boolean isHasBranchWidth() {
+        return getBranchWidth() != null;
+    }
+
+    public boolean isHasConfidences() {
+        return getNumberOfConfidences() > 0;
+    }
+
+    public void setBranchColor( final BranchColor branch_color ) {
+        _branch_color = branch_color;
+    }
+
+    public void setBranchWidth( final BranchWidth branch_width ) {
+        _branch_width = branch_width;
+    }
+
+    @Override
+    public StringBuffer toNHX() {
+        final StringBuffer sb = new StringBuffer();
+        if ( isHasConfidences() && ( getConfidence( 0 ).getValue() != Confidence.CONFIDENCE_DEFAULT_VALUE ) ) {
+            sb.append( ":" );
+            sb.append( getConfidence( 0 ).toNHX() );
+        }
+        if ( isHasBranchWidth() && ( getBranchWidth().getValue() != BranchWidth.BRANCH_WIDTH_DEFAULT_VALUE ) ) {
+            sb.append( ":" );
+            sb.append( getBranchWidth().toNHX() );
+        }
+        if ( isHasBranchColor() && ( getBranchColor().getValue() != null ) ) {
+            sb.append( ":" );
+            sb.append( getBranchColor().toNHX() );
+        }
+        return sb;
+    }
+
+    @Override
+    public void toPhyloXML( final Writer writer, final int level, final String indentation ) throws IOException {
+        if ( isHasConfidences() ) {
+            for( final Confidence confidence : getConfidences() ) {
+                confidence.toPhyloXML( writer, level, indentation );
+            }
+        }
+        if ( isHasBranchWidth() ) {
+            getBranchWidth().toPhyloXML( writer, level, indentation );
+        }
+        if ( isHasBranchColor() ) {
+            getBranchColor().toPhyloXML( writer, level, indentation );
+        }
+    }
+}
diff --git a/forester/java/src/org/forester/phylogeny/data/BranchWidth.java b/forester/java/src/org/forester/phylogeny/data/BranchWidth.java

new file mode 100644 (file)

index 0000000..803fbda
--- /dev/null
+++ b/forester/java/src/org/forester/phylogeny/data/BranchWidth.java
@@ -0,0 +1,91 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.phylogeny.data;
+
+import java.io.IOException;
+import java.io.Writer;
+
+import org.forester.io.parsers.nhx.NHXtags;
+import org.forester.io.parsers.phyloxml.PhyloXmlMapping;
+import org.forester.util.ForesterUtil;
+
+public class BranchWidth implements PhylogenyData {
+
+    public final static double BRANCH_WIDTH_DEFAULT_VALUE = 1.0;
+    private final double       _value;
+
+    public BranchWidth() {
+        _value = BRANCH_WIDTH_DEFAULT_VALUE;
+    }
+
+    public BranchWidth( final double value ) {
+        _value = value;
+    }
+
+    @Override
+    public StringBuffer asSimpleText() {
+        return new StringBuffer( getValue() + "" );
+    }
+
+    @Override
+    public StringBuffer asText() {
+        return asSimpleText();
+    }
+
+    @Override
+    public PhylogenyData copy() {
+        return new BranchWidth( getValue() );
+    }
+
+    public double getValue() {
+        return _value;
+    }
+
+    @Override
+    public boolean isEqual( final PhylogenyData data ) {
+        return getValue() == ( ( BranchWidth ) data ).getValue();
+    }
+
+    @Override
+    public StringBuffer toNHX() {
+        final StringBuffer sb = new StringBuffer();
+        sb.append( NHXtags.PARENT_BRANCH_WIDTH );
+        sb.append( getValue() );
+        return sb;
+    }
+
+    @Override
+    public void toPhyloXML( final Writer w, final int level, final String indentation ) throws IOException {
+        w.write( ForesterUtil.LINE_SEPARATOR );
+        w.write( indentation );
+        PhylogenyDataUtil.appendElement( w, PhyloXmlMapping.WIDTH, getValue() + "" );
+    }
+
+    @Override
+    public String toString() {
+        return asText().toString();
+    }
+}
diff --git a/forester/java/src/org/forester/phylogeny/data/Confidence.java b/forester/java/src/org/forester/phylogeny/data/Confidence.java

new file mode 100644 (file)

index 0000000..dbb9d07
--- /dev/null
+++ b/forester/java/src/org/forester/phylogeny/data/Confidence.java
@@ -0,0 +1,142 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.phylogeny.data;
+
+import java.io.IOException;
+import java.io.Writer;
+
+import org.forester.io.parsers.nhx.NHXtags;
+import org.forester.io.parsers.phyloxml.PhyloXmlMapping;
+import org.forester.io.parsers.phyloxml.PhyloXmlUtil;
+import org.forester.util.ForesterUtil;
+
+public class Confidence implements PhylogenyData, Comparable<Confidence> {
+
+    public final static double CONFIDENCE_DEFAULT_VALUE = -9999.0;
+    private double             _value;
+    private String             _type;
+
+    public Confidence() {
+        init();
+    }
+
+    public Confidence( final double value, final String type ) {
+        setValue( value );
+        setType( type );
+    }
+
+    public StringBuffer asSimpleText() {
+        return new StringBuffer().append( ForesterUtil.FORMATTER_6.format( getValue() ) );
+    }
+
+    public StringBuffer asText() {
+        final StringBuffer sb = new StringBuffer();
+        if ( !ForesterUtil.isEmpty( getType() ) ) {
+            sb.append( "[" );
+            sb.append( getType() );
+            sb.append( "] " );
+        }
+        sb.append( ForesterUtil.FORMATTER_6.format( getValue() ) );
+        return sb;
+    }
+
+    @Override
+    public int compareTo( final Confidence confidence ) {
+        if ( this == confidence ) {
+            return 0;
+        }
+        return getType().compareToIgnoreCase( confidence.getType() );
+    }
+
+    public PhylogenyData copy() {
+        return new Confidence( getValue(), getType() );
+    }
+
+    public String getType() {
+        return _type;
+    }
+
+    public double getValue() {
+        return _value;
+    }
+
+    public void init() {
+        setValue( CONFIDENCE_DEFAULT_VALUE );
+        setType( "" );
+    }
+
+    public boolean isEqual( final PhylogenyData confidence ) {
+        if ( confidence == null ) {
+            return false;
+        }
+        if ( !( confidence instanceof Confidence ) ) {
+            return false;
+        }
+        final Confidence s = ( Confidence ) confidence;
+        if ( s.getValue() != getValue() ) {
+            return false;
+        }
+        if ( !s.getType().equals( getType() ) ) {
+            return false;
+        }
+        return true;
+    }
+
+    public void setType( final String type ) {
+        _type = type;
+    }
+
+    public void setValue( final double value ) {
+        _value = value;
+    }
+
+    public StringBuffer toNHX() {
+        final StringBuffer sb = new StringBuffer();
+        sb.append( NHXtags.SUPPORT );
+        sb.append( getValue() );
+        return sb;
+    }
+
+    public void toPhyloXML( final Writer writer, final int level, final String indentation ) throws IOException {
+        if ( getValue() == CONFIDENCE_DEFAULT_VALUE ) {
+            return;
+        }
+        writer.write( ForesterUtil.LINE_SEPARATOR );
+        writer.write( indentation );
+        PhylogenyDataUtil.appendElement( writer,
+                                         PhyloXmlMapping.CONFIDENCE,
+                                         String.valueOf( ForesterUtil
+                                                 .round( getValue(),
+                                                         PhyloXmlUtil.ROUNDING_DIGITS_FOR_PHYLOXML_DOUBLE_OUTPUT ) ),
+                                         PhyloXmlMapping.CONFIDENCE_TYPE_ATTR,
+                                         ForesterUtil.isEmpty( getType() ) ? "unknown" : getType() );
+    }
+
+    @Override
+    public String toString() {
+        return asText().toString();
+    }
+}
diff --git a/forester/java/src/org/forester/phylogeny/data/Date.java b/forester/java/src/org/forester/phylogeny/data/Date.java

new file mode 100644 (file)

index 0000000..6e2b814
--- /dev/null
+++ b/forester/java/src/org/forester/phylogeny/data/Date.java
@@ -0,0 +1,188 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// Copyright (C) 2000-2001 Washington University School of Medicine
+// and Howard Hughes Medical Institute
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.phylogeny.data;
+
+import java.io.IOException;
+import java.io.Writer;
+import java.math.BigDecimal;
+
+import org.forester.io.parsers.phyloxml.PhyloXmlMapping;
+import org.forester.util.ForesterUtil;
+
+public class Date implements PhylogenyData {
+
+    private String     _desc;
+    private BigDecimal _value;
+    private BigDecimal _min;
+    private BigDecimal _max;
+    private String     _unit;
+
+    public Date() {
+        _desc = "";
+        _value = null;
+        _min = null;
+        _max = null;
+        _unit = "";
+    }
+
+    public Date( final String desc ) {
+        if ( desc == null ) {
+            throw new IllegalArgumentException( "illegaly empty of null fields in constructor" );
+        }
+        _desc = desc;
+        _value = null;
+        _min = null;
+        _max = null;
+        _unit = "";
+    }
+
+    public Date( final String desc,
+                 final BigDecimal value,
+                 final BigDecimal min,
+                 final BigDecimal max,
+                 final String unit ) {
+        if ( ( desc == null ) || ( unit == null ) ) {
+            throw new IllegalArgumentException( "illegaly empty of null fields in constructor" );
+        }
+        _desc = desc;
+        _value = value;
+        _min = min;
+        _max = max;
+        _unit = unit;
+    }
+
+    @Override
+    public StringBuffer asSimpleText() {
+        if ( getValue() != null ) {
+            return new StringBuffer( getDesc() + " [" + getValue().toPlainString() + " " + getUnit() + "]" );
+        }
+        else {
+            return new StringBuffer( getDesc() );
+        }
+    }
+
+    @Override
+    public StringBuffer asText() {
+        return asSimpleText();
+    }
+
+    @Override
+    public PhylogenyData copy() {
+        return new Date( getDesc(),
+                         getValue() == null ? null : new BigDecimal( getValue().toPlainString() ),
+                         getMin() == null ? null : new BigDecimal( getMin().toPlainString() ),
+                         getMax() == null ? null : new BigDecimal( getMax().toPlainString() ),
+                         getUnit() );
+    }
+
+    public String getDesc() {
+        return _desc;
+    }
+
+    public BigDecimal getMax() {
+        return _max;
+    }
+
+    public BigDecimal getMin() {
+        return _min;
+    }
+
+    public String getUnit() {
+        return _unit;
+    }
+
+    public BigDecimal getValue() {
+        return _value;
+    }
+
+    @Override
+    public boolean isEqual( final PhylogenyData data ) {
+        throw new UnsupportedOperationException();
+    }
+
+    public void setDesc( final String desc ) {
+        _desc = desc;
+    }
+
+    public void setMax( final BigDecimal max ) {
+        _max = max;
+    }
+
+    public void setMin( final BigDecimal min ) {
+        _min = min;
+    }
+
+    public void setUnit( final String unit ) {
+        _unit = unit;
+    }
+
+    public void setValue( final BigDecimal value ) {
+        _value = value;
+    }
+
+    @Override
+    public StringBuffer toNHX() {
+        throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public void toPhyloXML( final Writer writer, final int level, final String indentation ) throws IOException {
+        writer.write( ForesterUtil.LINE_SEPARATOR );
+        writer.write( indentation );
+        PhylogenyDataUtil.appendOpen( writer, PhyloXmlMapping.CLADE_DATE, PhyloXmlMapping.CLADE_DATE_UNIT, getUnit() );
+        if ( !ForesterUtil.isEmpty( getDesc() ) ) {
+            PhylogenyDataUtil.appendElement( writer, PhyloXmlMapping.CLADE_DATE_DESC, getDesc(), indentation );
+        }
+        if ( getValue() != null ) {
+            PhylogenyDataUtil.appendElement( writer,
+                                             PhyloXmlMapping.CLADE_DATE_VALUE,
+                                             getValue().toPlainString(),
+                                             indentation );
+        }
+        if ( getMin() != null ) {
+            PhylogenyDataUtil.appendElement( writer,
+                                             PhyloXmlMapping.CLADE_DATE_MIN,
+                                             getMin().toPlainString(),
+                                             indentation );
+        }
+        if ( getMax() != null ) {
+            PhylogenyDataUtil.appendElement( writer,
+                                             PhyloXmlMapping.CLADE_DATE_MAX,
+                                             getMax().toPlainString(),
+                                             indentation );
+        }
+        writer.write( ForesterUtil.LINE_SEPARATOR );
+        writer.write( indentation );
+        PhylogenyDataUtil.appendClose( writer, PhyloXmlMapping.CLADE_DATE );
+    }
+
+    @Override
+    public String toString() {
+        return asSimpleText().toString();
+    }
+}
\ No newline at end of file
diff --git a/forester/java/src/org/forester/phylogeny/data/Distribution.java b/forester/java/src/org/forester/phylogeny/data/Distribution.java

new file mode 100644 (file)

index 0000000..9a6ebdb
--- /dev/null
+++ b/forester/java/src/org/forester/phylogeny/data/Distribution.java
@@ -0,0 +1,182 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// Copyright (C) 2000-2001 Washington University School of Medicine
+// and Howard Hughes Medical Institute
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.phylogeny.data;
+
+import java.io.IOException;
+import java.io.Writer;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.forester.io.parsers.phyloxml.PhyloXmlMapping;
+import org.forester.io.writers.PhylogenyWriter;
+import org.forester.util.ForesterUtil;
+
+public class Distribution implements PhylogenyData {
+
+    private final String        _desc;
+    private final List<Point>   _points;
+    private final List<Polygon> _polygons;
+
+    public Distribution( final String desc ) {
+        _desc = desc;
+        _points = null;
+        _polygons = null;
+    }
+
+    public Distribution( final String desc, final List<Point> points ) {
+        _desc = null;
+        _points = points;
+        _polygons = null;
+    }
+
+    public Distribution( final String desc, final List<Point> points, final List<Polygon> polygons ) {
+        _desc = desc;
+        _points = points;
+        _polygons = polygons;
+    }
+
+    public boolean isEmpty() {
+        return ForesterUtil.isEmpty( _desc ) && ForesterUtil.isEmpty( _points ) && ForesterUtil.isEmpty( _polygons );
+    }
+
+    @Override
+    public StringBuffer asSimpleText() {
+        final StringBuffer sb = new StringBuffer();
+        if ( isEmpty() ) {
+            return sb;
+        }
+        sb.append( "Distribution: " );
+        if ( !ForesterUtil.isEmpty( getDesc() ) ) {
+            sb.append( ForesterUtil.LINE_SEPARATOR );
+            sb.append( " Description: " );
+            sb.append( getDesc() );
+        }
+        int i = 0;
+        if ( getPoints() != null ) {
+            for( final Point point : getPoints() ) {
+                if ( point != null ) {
+                    sb.append( ForesterUtil.LINE_SEPARATOR );
+                    sb.append( " Point " + i + ": " );
+                    sb.append( point.asSimpleText() );
+                    i++;
+                }
+            }
+        }
+        i = 0;
+        if ( getPolygons() != null ) {
+            for( final Polygon polygon : getPolygons() ) {
+                if ( polygon != null ) {
+                    sb.append( ForesterUtil.LINE_SEPARATOR );
+                    sb.append( " Polygon " + i + ":" );
+                    sb.append( ForesterUtil.LINE_SEPARATOR );
+                    sb.append( polygon.asSimpleText() );
+                    i++;
+                }
+            }
+        }
+        return sb;
+    }
+
+    @Override
+    public StringBuffer asText() {
+        return asSimpleText();
+    }
+
+    @Override
+    public PhylogenyData copy() {
+        List<Point> new_points = null;
+        List<Polygon> new_polygons = null;
+        if ( getPoints() != null ) {
+            new_points = new ArrayList<Point>();
+            for( final Point point : getPoints() ) {
+                new_points.add( ( Point ) point.copy() );
+            }
+        }
+        if ( getPolygons() != null ) {
+            new_polygons = new ArrayList<Polygon>();
+            for( final Polygon polygon : getPolygons() ) {
+                new_polygons.add( ( Polygon ) polygon.copy() );
+            }
+        }
+        return new Distribution( getDesc(), new_points, new_polygons );
+    }
+
+    public String getDesc() {
+        return _desc;
+    }
+
+    public List<Point> getPoints() {
+        return _points;
+    }
+
+    public List<Polygon> getPolygons() {
+        return _polygons;
+    }
+
+    @Override
+    public boolean isEqual( final PhylogenyData data ) {
+        throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public StringBuffer toNHX() {
+        throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public void toPhyloXML( final Writer writer, final int level, final String indentation ) throws IOException {
+        if ( isEmpty() ) {
+            return;
+        }
+        writer.write( ForesterUtil.LINE_SEPARATOR );
+        writer.write( indentation );
+        PhylogenyDataUtil.appendOpen( writer, PhyloXmlMapping.DISTRIBUTION );
+        if ( !ForesterUtil.isEmpty( getDesc() ) ) {
+            PhylogenyDataUtil.appendElement( writer, PhyloXmlMapping.DISTRIBUTION_DESC, getDesc(), indentation );
+        }
+        final String ind = indentation + PhylogenyWriter.PHYLO_XML_INTENDATION_BASE;
+        if ( getPoints() != null ) {
+            for( final Point point : getPoints() ) {
+                point.toPhyloXML( writer, level, ind );
+            }
+        }
+        if ( getPolygons() != null ) {
+            for( final Polygon polygon : getPolygons() ) {
+                polygon.toPhyloXML( writer, level, ind );
+            }
+        }
+        writer.write( ForesterUtil.LINE_SEPARATOR );
+        writer.write( indentation );
+        PhylogenyDataUtil.appendClose( writer, PhyloXmlMapping.DISTRIBUTION );
+    }
+
+    @Override
+    public String toString() {
+        return asSimpleText().toString();
+    }
+}
diff --git a/forester/java/src/org/forester/phylogeny/data/DomainArchitecture.java b/forester/java/src/org/forester/phylogeny/data/DomainArchitecture.java

new file mode 100644 (file)

index 0000000..e19c2ef
--- /dev/null
+++ b/forester/java/src/org/forester/phylogeny/data/DomainArchitecture.java
@@ -0,0 +1,221 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.phylogeny.data;
+
+import java.io.IOException;
+import java.io.Writer;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.SortedMap;
+import java.util.StringTokenizer;
+import java.util.TreeMap;
+
+import org.forester.io.parsers.nhx.NHXtags;
+import org.forester.io.parsers.phyloxml.PhyloXmlMapping;
+import org.forester.io.writers.PhylogenyWriter;
+import org.forester.util.ForesterUtil;
+
+public class DomainArchitecture implements PhylogenyData {
+
+    public final static String               NHX_SEPARATOR = ">";
+    private static final double              INCREASE_KEY  = 0.0001;
+    private SortedMap<Double, ProteinDomain> _domains;
+    private int                              _total_length;
+
+    public DomainArchitecture() {
+        init();
+    }
+
+    public DomainArchitecture( final List<PhylogenyData> domains, final int total_length ) {
+        init();
+        for( final PhylogenyData phylogenyData : domains ) {
+            final ProteinDomain pd = ( ProteinDomain ) phylogenyData;
+            addDomain( pd );
+        }
+        _total_length = total_length;
+    }
+
+    public DomainArchitecture( final String da_str ) {
+        init();
+        int total_length = 0;
+        int to = -1;
+        try {
+            final StringTokenizer st = new StringTokenizer( da_str, DomainArchitecture.NHX_SEPARATOR );
+            final String length_str = ( String ) st.nextElement();
+            total_length = new Integer( length_str ).intValue();
+            while ( st.hasMoreElements() ) {
+                final String from_str = ( String ) st.nextElement();
+                final String to_str = ( String ) st.nextElement();
+                final String support_str = ( String ) st.nextElement();
+                final String name = ( String ) st.nextElement();
+                to = new Integer( to_str ).intValue();
+                final int from = new Integer( from_str ).intValue();
+                final double support = new Double( support_str ).doubleValue();
+                final ProteinDomain pd = new ProteinDomain( name, from, to, support );
+                addDomain( pd );
+            }
+        }
+        catch ( final Exception e ) {
+            throw new IllegalArgumentException( "Malformed format for domain structure \"" + da_str + "\": "
+                    + e.getMessage() );
+        }
+        if ( to > total_length ) {
+            throw new IllegalArgumentException( "total length of domain structure is too short" );
+        }
+        _total_length = total_length;
+    }
+
+    public void addDomain( final ProteinDomain pd ) {
+        Double key = new Double( pd.getFrom() );
+        while ( _domains.containsKey( key ) ) {
+            key = new Double( key.doubleValue() + DomainArchitecture.INCREASE_KEY );
+        }
+        _domains.put( key, pd );
+    }
+
+    public StringBuffer asSimpleText() {
+        final StringBuffer sb = new StringBuffer();
+        for( int i = 0; i < getDomains().size(); ++i ) {
+            if ( i > 0 ) {
+                sb.append( "~" );
+            }
+            sb.append( getDomain( i ).asSimpleText() );
+        }
+        return sb;
+    }
+
+    public StringBuffer asText() {
+        final StringBuffer sb = new StringBuffer();
+        for( int i = 0; i < getDomains().size(); ++i ) {
+            if ( i > 0 ) {
+                sb.append( "~" );
+            }
+            sb.append( getDomain( i ).asText() );
+        }
+        return sb;
+    }
+
+    public PhylogenyData copy() {
+        final List<PhylogenyData> domains = new ArrayList<PhylogenyData>( getDomains().size() );
+        for( int i = 0; i < getDomains().size(); ++i ) {
+            domains.add( getDomain( i ).copy() );
+        }
+        return new DomainArchitecture( domains, getTotalLength() );
+    }
+
+    public ProteinDomain getDomain( final int i ) {
+        return ( ProteinDomain ) _domains.values().toArray()[ i ];
+    }
+
+    public SortedMap<Double, ProteinDomain> getDomains() {
+        return _domains;
+    }
+
+    public int getNumberOfDomains() {
+        return _domains.size();
+    }
+
+    public int getTotalLength() {
+        return _total_length;
+    }
+
+    private void init() {
+        _domains = new TreeMap<Double, ProteinDomain>();
+        _total_length = 0;
+    }
+
+    /**
+     * Returns true if the names and the order of the domains match (domain and
+     * linker lengths are ignored).
+     * 
+     * 
+     */
+    public boolean isEqual( final PhylogenyData domain_architecture ) {
+        if ( domain_architecture == null ) {
+            return false;
+        }
+        if ( !( domain_architecture instanceof DomainArchitecture ) ) {
+            return false;
+        }
+        final DomainArchitecture d = ( DomainArchitecture ) domain_architecture;
+        if ( getDomains().size() != d.getDomains().size() ) {
+            return false;
+        }
+        for( int i = 0; i < getDomains().size(); ++i ) {
+            if ( !getDomain( i ).getName().equals( d.getDomain( i ).getName() ) ) {
+                return false;
+            }
+        }
+        return true;
+    }
+
+    public void setTotalLength( final int total_length ) {
+        _total_length = total_length;
+    }
+
+    public StringBuffer toNHX() {
+        final StringBuffer sb = new StringBuffer();
+        sb.append( ":" );
+        sb.append( NHXtags.DOMAIN_STRUCTURE );
+        sb.append( getTotalLength() );
+        if ( getDomains() != null ) {
+            for( int i = 0; i < getDomains().size(); ++i ) {
+                sb.append( DomainArchitecture.NHX_SEPARATOR );
+                sb.append( getDomain( i ).getFrom() );
+                sb.append( DomainArchitecture.NHX_SEPARATOR );
+                sb.append( getDomain( i ).getTo() );
+                sb.append( DomainArchitecture.NHX_SEPARATOR );
+                sb.append( getDomain( i ).getConfidence() );
+                sb.append( DomainArchitecture.NHX_SEPARATOR );
+                sb.append( ForesterUtil.replaceIllegalNhxCharacters( getDomain( i ).getName() ) );
+            }
+        }
+        return sb;
+    }
+
+    public void toPhyloXML( final Writer writer, final int level, final String indentation ) throws IOException {
+        writer.write( ForesterUtil.LINE_SEPARATOR );
+        writer.write( indentation );
+        PhylogenyDataUtil.appendOpen( writer,
+                                      PhyloXmlMapping.SEQUENCE_DOMAIN_ARCHITECURE,
+                                      PhyloXmlMapping.SEQUENCE_DOMAIN_ARCHITECTURE_LENGTH,
+                                      getTotalLength() + "" );
+        if ( getDomains() != null ) {
+            final String ind = indentation + PhylogenyWriter.PHYLO_XML_INTENDATION_BASE;
+            for( int i = 0; i < getDomains().size(); ++i ) {
+                getDomain( i ).toPhyloXML( writer, level, ind );
+            }
+        }
+        writer.write( ForesterUtil.LINE_SEPARATOR );
+        writer.write( indentation );
+        PhylogenyDataUtil.appendClose( writer, PhyloXmlMapping.SEQUENCE_DOMAIN_ARCHITECURE );
+    }
+
+    @Override
+    public String toString() {
+        return asText().toString();
+    }
+}
diff --git a/forester/java/src/org/forester/phylogeny/data/Event.java b/forester/java/src/org/forester/phylogeny/data/Event.java

new file mode 100644 (file)

index 0000000..22dacc9
--- /dev/null
+++ b/forester/java/src/org/forester/phylogeny/data/Event.java
@@ -0,0 +1,376 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.phylogeny.data;
+
+import java.io.IOException;
+import java.io.Writer;
+import java.util.StringTokenizer;
+
+import org.forester.io.parsers.nhx.NHXFormatException;
+import org.forester.io.parsers.nhx.NHXtags;
+import org.forester.io.parsers.phyloxml.PhyloXmlMapping;
+import org.forester.io.writers.PhylogenyWriter;
+import org.forester.util.ForesterUtil;
+
+public class Event implements PhylogenyData {
+
+    public final static int     DEFAULT_VALUE = -1;
+    private static final String NHX_SEPARATOR = ">";
+    private int                 _duplications;
+    private int                 _speciations;
+    private int                 _gene_losses;
+    private EventType           _event_type;
+    private Confidence          _confidence;
+
+    public Event() {
+        _duplications = DEFAULT_VALUE;
+        _speciations = DEFAULT_VALUE;
+        _gene_losses = DEFAULT_VALUE;
+        _event_type = EventType.unassigned;
+    }
+
+    public Event( final EventType type ) {
+        _duplications = DEFAULT_VALUE;
+        _speciations = DEFAULT_VALUE;
+        _gene_losses = DEFAULT_VALUE;
+        _event_type = type;
+    }
+
+    public Event( final int duplications, final int speciations, final int gene_losses ) {
+        _duplications = duplications;
+        _speciations = speciations;
+        _gene_losses = gene_losses;
+        _event_type = EventType.mixed;
+    }
+
+    public Event( final int duplications, final int speciations, final int gene_losses, final String type ) {
+        _duplications = duplications;
+        _speciations = speciations;
+        _gene_losses = gene_losses;
+        _event_type = EventType.valueOf( type );
+    }
+
+    public Event( final String nhx ) throws NHXFormatException {
+        if ( ForesterUtil.isEmpty( nhx ) ) {
+            _duplications = DEFAULT_VALUE;
+            _speciations = DEFAULT_VALUE;
+            _gene_losses = DEFAULT_VALUE;
+            _event_type = EventType.unassigned;
+        }
+        else {
+            final StringTokenizer st = new StringTokenizer( nhx, NHX_SEPARATOR );
+            if ( st.countTokens() != 4 ) {
+                throw new NHXFormatException( "malformed NHX format for event [" + nhx + "]" );
+            }
+            final String duplications = ( String ) st.nextElement();
+            final String speciations = ( String ) st.nextElement();
+            final String losses = ( String ) st.nextElement();
+            final String event_type = ( String ) st.nextElement();
+            int d = 0;
+            int s = 0;
+            int l = 0;
+            try {
+                d = Integer.parseInt( duplications );
+                s = Integer.parseInt( speciations );
+                l = Integer.parseInt( losses );
+                _duplications = d;
+                _speciations = s;
+                _gene_losses = l;
+                _event_type = EventType.valueOf( event_type );
+            }
+            catch ( final Exception e ) {
+                throw new NHXFormatException( "malformed NHX format for event [" + nhx + "]:" + e.getMessage() );
+            }
+        }
+    }
+
+    public StringBuffer asSimpleText() {
+        final StringBuffer sb = new StringBuffer();
+        if ( isUnassigned() ) {
+        }
+        else if ( isSpeciationOrDuplication() ) {
+            sb.append( "?" );
+        }
+        else if ( isOther() || isRoot() || isTransfer() || isFusion() ) {
+            sb.append( getEventType().toString() );
+        }
+        else {
+            if ( getNumberOfDuplications() > 0 ) {
+                if ( getNumberOfDuplications() > 1 ) {
+                    sb.append( getNumberOfDuplications() );
+                }
+                sb.append( "D" );
+            }
+            if ( getNumberOfSpeciations() > 0 ) {
+                if ( getNumberOfSpeciations() > 1 ) {
+                    sb.append( getNumberOfSpeciations() );
+                }
+                sb.append( "S" );
+            }
+            if ( getNumberOfGeneLosses() > 0 ) {
+                if ( getNumberOfGeneLosses() > 1 ) {
+                    sb.append( getNumberOfGeneLosses() );
+                }
+                sb.append( "L" );
+            }
+        }
+        return sb;
+    }
+
+    public StringBuffer asText() {
+        final StringBuffer sb = new StringBuffer();
+        if ( isUnassigned() || isSpeciationOrDuplication() || isOther() || isRoot() || isTransfer() || isFusion() ) {
+            sb.append( getEventType().toString() );
+        }
+        else {
+            if ( isDuplication() ) {
+                if ( getNumberOfDuplications() == 1 ) {
+                    sb.append( "duplication" );
+                }
+                else {
+                    sb.append( "duplications [" + getNumberOfDuplications() + "]" );
+                }
+            }
+            else if ( isSpeciation() ) {
+                if ( getNumberOfSpeciations() == 1 ) {
+                    sb.append( "speciation" );
+                }
+                else {
+                    sb.append( "speciations [" + getNumberOfSpeciations() + "]" );
+                }
+            }
+            else if ( isGeneLoss() ) {
+                if ( getNumberOfGeneLosses() == 1 ) {
+                    sb.append( "gene-loss" );
+                }
+                else {
+                    sb.append( "gene-losses [" + getNumberOfGeneLosses() + "]" );
+                }
+            }
+            else {
+                sb.append( "duplications [" + getNumberOfDuplications() + "] " );
+                sb.append( "speciations [" + getNumberOfSpeciations() + "] " );
+                sb.append( "gene-losses [" + getNumberOfGeneLosses() + "]" );
+            }
+        }
+        return sb;
+    }
+
+    public PhylogenyData copy() {
+        if ( isUnassigned() ) {
+            return new Event();
+        }
+        else if ( _event_type != EventType.mixed ) {
+            return new Event( _event_type );
+        }
+        else {
+            return new Event( _duplications, _speciations, _gene_losses );
+        }
+    }
+
+    public Confidence getConfidence() {
+        return _confidence;
+    }
+
+    public EventType getEventType() {
+        return _event_type;
+    }
+
+    public int getNumberOfDuplications() {
+        return _duplications;
+    }
+
+    public int getNumberOfGeneLosses() {
+        return _gene_losses;
+    }
+
+    public int getNumberOfSpeciations() {
+        return _speciations;
+    }
+
+    /**
+     * Returns true if this event contains one or more duplications events only
+     * 
+     * @return true if this event contains one or more duplications events only
+     */
+    public boolean isDuplication() {
+        return ( _duplications > 0 ) && ( _gene_losses < 1 ) && ( _speciations < 1 );
+    }
+
+    public boolean isEqual( final PhylogenyData event ) {
+        if ( ( event == null ) || !( event instanceof Event ) ) {
+            return false;
+        }
+        final Event e = ( Event ) event;
+        if ( getEventType().compareTo( e.getEventType() ) != 0 ) {
+            return false;
+        }
+        if ( getNumberOfDuplications() != e.getNumberOfDuplications() ) {
+            return false;
+        }
+        if ( getNumberOfSpeciations() != e.getNumberOfSpeciations() ) {
+            return false;
+        }
+        if ( getNumberOfGeneLosses() != e.getNumberOfGeneLosses() ) {
+            return false;
+        }
+        return true;
+    }
+
+    public boolean isFusion() {
+        return _event_type == EventType.fusion;
+    }
+
+    /**
+     * Returns true if this event contains one or more gene loss events only
+     * 
+     * @return true if this event contains one or more gene loss events only
+     */
+    public boolean isGeneLoss() {
+        return ( _duplications < 1 ) && ( _gene_losses > 0 ) && ( _speciations < 1 );
+    }
+
+    public boolean isOther() {
+        return _event_type == EventType.other;
+    }
+
+    public boolean isRoot() {
+        return _event_type == EventType.root;
+    }
+
+    /**
+     * Returns true if this event contains one or more speciation events only
+     * 
+     * @return true if this event contains one or more speciation events only
+     */
+    public boolean isSpeciation() {
+        return ( _duplications < 1 ) && ( _gene_losses < 1 ) && ( _speciations > 0 );
+    }
+
+    public boolean isSpeciationOrDuplication() {
+        return _event_type == EventType.speciation_or_duplication;
+    }
+
+    public boolean isTransfer() {
+        return _event_type == EventType.transfer;
+    }
+
+    public boolean isUnassigned() {
+        return ( _duplications == DEFAULT_VALUE ) && ( _event_type == EventType.unassigned );
+    }
+
+    public void setConfidence( final Confidence confidence ) {
+        _confidence = confidence;
+    }
+
+    public void setDuplications( final int duplications ) {
+        _duplications = duplications;
+        _event_type = EventType.mixed;
+    }
+
+    public void setGeneLosses( final int gene_losses ) {
+        _gene_losses = gene_losses;
+        _event_type = EventType.mixed;
+    }
+
+    public void setSpeciations( final int speciations ) {
+        _speciations = speciations;
+        _event_type = EventType.mixed;
+    }
+
+    public StringBuffer toNHX() {
+        final StringBuffer sb = new StringBuffer();
+        if ( !isUnassigned() && ( isSpeciationOrDuplication() || isDuplication() || isSpeciation() ) ) {
+            sb.append( ":" );
+            sb.append( NHXtags.IS_DUPLICATION );
+            if ( isSpeciationOrDuplication() ) {
+                sb.append( "?" );
+            }
+            else if ( isDuplication() ) {
+                sb.append( "Y" );
+            }
+            else if ( isSpeciation() ) {
+                sb.append( "N" );
+            }
+        }
+        return sb;
+    }
+
+    public void toPhyloXML( final Writer writer, final int level, final String indentation ) throws IOException {
+        writer.write( ForesterUtil.LINE_SEPARATOR );
+        writer.write( indentation );
+        PhylogenyDataUtil.appendOpen( writer, PhyloXmlMapping.EVENTS );
+        if ( ( getEventType() != EventType.unassigned ) && ( getEventType() != EventType.mixed ) ) {
+            PhylogenyDataUtil
+                    .appendElement( writer, PhyloXmlMapping.EVENT_TYPE, getEventType().toString(), indentation );
+        }
+        if ( getNumberOfDuplications() > 0 ) {
+            PhylogenyDataUtil.appendElement( writer,
+                                             PhyloXmlMapping.EVENT_DUPLICATIONS,
+                                             getNumberOfDuplications() + "",
+                                             indentation );
+        }
+        if ( getNumberOfSpeciations() > 0 ) {
+            PhylogenyDataUtil.appendElement( writer,
+                                             PhyloXmlMapping.EVENT_SPECIATIONS,
+                                             getNumberOfSpeciations() + "",
+                                             indentation );
+        }
+        if ( getNumberOfGeneLosses() > 0 ) {
+            PhylogenyDataUtil.appendElement( writer,
+                                             PhyloXmlMapping.EVENT_LOSSES,
+                                             getNumberOfGeneLosses() + "",
+                                             indentation );
+        }
+        if ( getConfidence() != null ) {
+            getConfidence().toPhyloXML( writer, level, indentation + PhylogenyWriter.PHYLO_XML_INTENDATION_BASE );
+        }
+        writer.write( ForesterUtil.LINE_SEPARATOR );
+        writer.write( indentation );
+        PhylogenyDataUtil.appendClose( writer, PhyloXmlMapping.EVENTS );
+    }
+
+    @Override
+    public String toString() {
+        return asText().toString();
+    }
+
+    public static Event createSingleDuplicationEvent() {
+        return new Event( 1, 0, 0 );
+    }
+
+    public static Event createSingleSpeciationEvent() {
+        return new Event( 0, 1, 0 );
+    }
+
+    public static Event createSingleSpeciationOrDuplicationEvent() {
+        return new Event( EventType.speciation_or_duplication );
+    }
+
+    public static enum EventType {
+        transfer, fusion, root, speciation_or_duplication, other, mixed, unassigned
+    }
+}
diff --git a/forester/java/src/org/forester/phylogeny/data/Identifier.java b/forester/java/src/org/forester/phylogeny/data/Identifier.java

new file mode 100644 (file)

index 0000000..33c3a8b
--- /dev/null
+++ b/forester/java/src/org/forester/phylogeny/data/Identifier.java
@@ -0,0 +1,147 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.phylogeny.data;
+
+import java.io.IOException;
+import java.io.Writer;
+
+import org.forester.io.parsers.nhx.NHXtags;
+import org.forester.io.parsers.phyloxml.PhyloXmlMapping;
+import org.forester.util.ForesterUtil;
+
+public class Identifier implements PhylogenyData {
+
+    final String _value;
+    final String _provider;
+
+    public Identifier() {
+        _value = "";
+        _provider = "";
+    }
+
+    public Identifier( final String value ) {
+        _value = value;
+        _provider = "";
+    }
+
+    public Identifier( final String value, final String provider ) {
+        _value = value;
+        _provider = provider;
+    }
+
+    public StringBuffer asSimpleText() {
+        return new StringBuffer( getValue() );
+    }
+
+    public StringBuffer asText() {
+        final StringBuffer sb = new StringBuffer();
+        if ( !ForesterUtil.isEmpty( getProvider() ) ) {
+            sb.append( "[" );
+            sb.append( getProvider() );
+            sb.append( "] " );
+        }
+        sb.append( getValue() );
+        return sb;
+    }
+
+    public PhylogenyData copy() {
+        return new Identifier( getValue(), getProvider() );
+    }
+
+    @Override
+    public boolean equals( final Object o ) {
+        if ( this == o ) {
+            return true;
+        }
+        else if ( o == null ) {
+            return false;
+        }
+        else if ( o.getClass() != this.getClass() ) {
+            throw new IllegalArgumentException( "attempt to check [" + this.getClass() + "] equality to " + o + " ["
+                    + o.getClass() + "]" );
+        }
+        else {
+            return isEqual( ( Identifier ) o );
+        }
+    }
+
+    public String getProvider() {
+        return _provider;
+    }
+
+    public String getValue() {
+        return _value;
+    }
+
+    @Override
+    public int hashCode() {
+        if ( getProvider() != null ) {
+            return ( getProvider() + getValue() ).hashCode();
+        }
+        return getValue().hashCode();
+    }
+
+    public boolean isEqual( final PhylogenyData data ) {
+        if ( this == data ) {
+            return true;
+        }
+        if ( ( data == null ) || ( getValue() == null ) ) {
+            return false;
+        }
+        final Identifier a = ( Identifier ) data;
+        if ( ( getProvider() != null ) && ( a.getProvider() != null ) ) {
+            return ( a.getValue().equals( getValue() ) && a.getProvider().equals( getProvider() ) );
+        }
+        return ( a.getValue().equals( getValue() ) );
+    }
+
+    public StringBuffer toNHX() {
+        final StringBuffer sb = new StringBuffer();
+        sb.append( ":" );
+        sb.append( NHXtags.NODE_IDENTIFIER );
+        sb.append( ForesterUtil.replaceIllegalNhxCharacters( getValue() ) );
+        return sb;
+    }
+
+    public void toPhyloXML( final Writer writer, final int level, final String indentation ) throws IOException {
+        if ( !org.forester.util.ForesterUtil.isEmpty( getProvider() ) ) {
+            PhylogenyDataUtil.appendElement( writer,
+                                             PhyloXmlMapping.IDENTIFIER,
+                                             getValue(),
+                                             PhyloXmlMapping.IDENTIFIER_PROVIDER_ATTR,
+                                             getProvider(),
+                                             indentation );
+        }
+        else {
+            PhylogenyDataUtil.appendElement( writer, PhyloXmlMapping.IDENTIFIER, getValue(), indentation );
+        }
+    }
+
+    @Override
+    public String toString() {
+        return asText().toString();
+    }
+}
diff --git a/forester/java/src/org/forester/phylogeny/data/MultipleUris.java b/forester/java/src/org/forester/phylogeny/data/MultipleUris.java

new file mode 100644 (file)

index 0000000..cce4be5
--- /dev/null
+++ b/forester/java/src/org/forester/phylogeny/data/MultipleUris.java
@@ -0,0 +1,39 @@
+// $Id:
+// forester -- software libraries and applications
+// for genomics and evolutionary biology research.
+//
+// Copyright (C) 2010 Christian M Zmasek
+// Copyright (C) 2010 Sanford-Burnham Medical Research Institute
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.phylogeny.data;
+
+import java.util.List;
+
+public interface MultipleUris {
+
+    public List<Uri> getUris();
+
+    public void setUris( final List<Uri> uris );
+
+    public Uri getUri( final int index );
+
+    public void addUri( final Uri uri );
+}
diff --git a/forester/java/src/org/forester/phylogeny/data/NodeData.java b/forester/java/src/org/forester/phylogeny/data/NodeData.java

new file mode 100644 (file)

index 0000000..d1c75e2
--- /dev/null
+++ b/forester/java/src/org/forester/phylogeny/data/NodeData.java
@@ -0,0 +1,524 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// Copyright (C) 2000-2001 Washington University School of Medicine
+// and Howard Hughes Medical Institute
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.phylogeny.data;
+
+import java.io.IOException;
+import java.io.Writer;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.forester.io.parsers.phyloxml.PhyloXmlUtil;
+import org.forester.phylogeny.data.Property.AppliesTo;
+import org.forester.util.ForesterUtil;
+
+public class NodeData implements PhylogenyData {
+
+    private String             _node_name;
+    private Event              _event;
+    private List<Sequence>     _sequences;
+    private Identifier         _node_identifier;
+    private List<Taxonomy>     _taxonomies;
+    private List<Distribution> _distributions;
+    private Date               _date;
+    private BinaryCharacters   _binary_characters;
+    private PropertiesMap      _properties;
+    private List<Reference>    _references;
+    private List<Double>       _vector;
+
+    public NodeData() {
+        init();
+    }
+
+    private void init() {
+        _node_name = "";
+    }
+
+    public void addDistribution( final Distribution distribution ) {
+        if ( _distributions == null ) {
+            _distributions = new ArrayList<Distribution>();
+        }
+        _distributions.add( distribution );
+    }
+
+    public void addReference( final Reference reference ) {
+        if ( _references == null ) {
+            _references = new ArrayList<Reference>();
+        }
+        _references.add( reference );
+    }
+
+    public void addSequence( final Sequence sequence ) {
+        if ( _sequences == null ) {
+            _sequences = new ArrayList<Sequence>();
+        }
+        _sequences.add( sequence );
+    }
+
+    public void addTaxonomy( final Taxonomy taxonomy ) {
+        if ( _taxonomies == null ) {
+            _taxonomies = new ArrayList<Taxonomy>();
+        }
+        _taxonomies.add( taxonomy );
+    }
+
+    public StringBuffer asSimpleText() {
+        throw new UnsupportedOperationException();
+    }
+
+    public StringBuffer asText() {
+        throw new UnsupportedOperationException();
+    }
+
+    public PhylogenyData copy() {
+        final NodeData new_data = new NodeData();
+        new_data.setNodeName( getNodeName() );
+        if ( ( getSequences() != null ) && ( getSequences().size() > 0 ) ) {
+            new_data.setSequences( new ArrayList<Sequence>() );
+            for( final Sequence s : getSequences() ) {
+                if ( s != null ) {
+                    new_data.addSequence( ( Sequence ) s.copy() );
+                }
+            }
+        }
+        if ( isHasEvent() ) {
+            new_data.setEvent( ( Event ) getEvent().copy() );
+        }
+        if ( isHasNodeIdentifier() ) {
+            new_data.setNodeIdentifier( ( Identifier ) getNodeIdentifier().copy() );
+        }
+        if ( ( getTaxonomies() != null ) && ( getTaxonomies().size() > 0 ) ) {
+            new_data.setTaxonomies( new ArrayList<Taxonomy>() );
+            for( final Taxonomy t : getTaxonomies() ) {
+                if ( t != null ) {
+                    new_data.addTaxonomy( ( Taxonomy ) t.copy() );
+                }
+            }
+        }
+        if ( isHasBinaryCharacters() ) {
+            new_data.setBinaryCharacters( ( BinaryCharacters ) getBinaryCharacters().copy() );
+        }
+        if ( ( getReferences() != null ) && ( getReferences().size() > 0 ) ) {
+            new_data.setReferences( new ArrayList<Reference>() );
+            for( final Reference r : getReferences() ) {
+                if ( r != null ) {
+                    new_data.addReference( ( Reference ) r.copy() );
+                }
+            }
+        }
+        if ( ( getDistributions() != null ) && ( getDistributions().size() > 0 ) ) {
+            new_data.setDistributions( new ArrayList<Distribution>() );
+            for( final Distribution d : getDistributions() ) {
+                if ( d != null ) {
+                    new_data.addDistribution( ( Distribution ) d.copy() );
+                }
+            }
+        }
+        if ( isHasDate() ) {
+            new_data.setDate( ( Date ) getDate().copy() );
+        }
+        if ( isHasProperties() ) {
+            new_data.setProperties( ( PropertiesMap ) getProperties().copy() );
+        }
+        return new_data;
+    }
+
+    public BinaryCharacters getBinaryCharacters() {
+        return _binary_characters;
+    }
+
+    public Date getDate() {
+        return _date;
+    }
+
+    /**
+     * Convenience method -- always returns the first Distribution.
+     *  
+     * @return Distribution
+     */
+    public Distribution getDistribution() {
+        return getDistribution( 0 );
+    }
+
+    public Distribution getDistribution( final int index ) {
+        return _distributions.get( index );
+    }
+
+    public List<Distribution> getDistributions() {
+        return _distributions;
+    }
+
+    public Event getEvent() {
+        return _event;
+    }
+
+    public Identifier getNodeIdentifier() {
+        return _node_identifier;
+    }
+
+    public PropertiesMap getProperties() {
+        return _properties;
+    }
+
+    /**
+     * Convenience method -- always returns the first Reference.
+     * 
+     *  @return Reference
+     *  
+     */
+    public Reference getReference() {
+        return getReference( 0 );
+    }
+
+    public Reference getReference( final int index ) {
+        return _references.get( index );
+    }
+
+    public List<Reference> getReferences() {
+        return _references;
+    }
+
+    /**
+     * Convenience method -- always returns the first Sequence.
+     * 
+     * @return Sequence
+     */
+    public Sequence getSequence() {
+        return getSequence( 0 );
+    }
+
+    public Sequence getSequence( final int index ) {
+        return _sequences.get( index );
+    }
+
+    public List<Sequence> getSequences() {
+        return _sequences;
+    }
+
+    public List<Taxonomy> getTaxonomies() {
+        return _taxonomies;
+    }
+
+    /**
+     * Convenience method -- always returns the first Taxonomy.
+     * 
+     * @return  Taxonomy
+     * 
+     */
+    public Taxonomy getTaxonomy() {
+        return getTaxonomy( 0 );
+    }
+
+    public Taxonomy getTaxonomy( final int index ) {
+        return _taxonomies.get( index );
+    }
+
+    public boolean isEqual( final PhylogenyData data ) {
+        throw new UnsupportedOperationException();
+    }
+
+    public boolean isHasBinaryCharacters() {
+        return getBinaryCharacters() != null;
+    }
+
+    public boolean isHasDate() {
+        return ( getDate() != null )
+                && ( !ForesterUtil.isEmpty( getDate().getDesc() ) || !ForesterUtil.isNull( getDate().getMax() )
+                        || !ForesterUtil.isNull( getDate().getMin() ) || !ForesterUtil.isNull( getDate().getValue() ) || !ForesterUtil
+                        .isEmpty( getDate().getUnit() ) );
+    }
+
+    public boolean isHasDistribution() {
+        return ( ( ( getDistributions() != null ) && ( getDistributions().size() > 0 ) ) && ( ( !ForesterUtil
+                .isEmpty( getDistribution().getDesc() ) )
+                || ( ( getDistribution().getPoints() != null ) && ( getDistribution().getPoints().size() > 0 ) ) || ( ( getDistribution()
+                .getPolygons() != null ) && ( getDistribution().getPolygons().size() > 0 ) ) ) );
+    }
+
+    public boolean isHasEvent() {
+        return getEvent() != null;
+    }
+
+    public boolean isHasNodeIdentifier() {
+        return getNodeIdentifier() != null;
+    }
+
+    public boolean isHasProperties() {
+        return getProperties() != null;
+    }
+
+    public boolean isHasReference() {
+        return ( ( getReferences() != null ) && ( getReferences().size() > 0 ) )
+                && ( !ForesterUtil.isEmpty( getReference().getDoi() ) || !ForesterUtil.isEmpty( getReference()
+                        .getDescription() ) );
+    }
+
+    public boolean isHasSequence() {
+        return ( getSequences() != null ) && ( getSequences().size() > 0 ) && ( getSequences().get( 0 ) != null );
+    }
+
+    public boolean isHasTaxonomy() {
+        return ( getTaxonomies() != null ) && ( getTaxonomies().size() > 0 ) && ( getTaxonomies().get( 0 ) != null );
+    }
+
+    public void setBinaryCharacters( final BinaryCharacters binary_characters ) {
+        _binary_characters = binary_characters;
+    }
+
+    public void setDate( final Date date ) {
+        _date = date;
+    }
+
+    /**
+     * Convenience method -- always sets the first Distribution.
+     * 
+     */
+    public void setDistribution( final Distribution distribution ) {
+        if ( _distributions == null ) {
+            _distributions = new ArrayList<Distribution>();
+        }
+        if ( _distributions.size() == 0 ) {
+            _distributions.add( distribution );
+        }
+        else {
+            _distributions.set( 0, distribution );
+        }
+    }
+
+    public void setDistribution( final int index, final Distribution distribution ) {
+        if ( _distributions == null ) {
+            _distributions = new ArrayList<Distribution>();
+        }
+        _distributions.set( index, distribution );
+    }
+
+    private void setDistributions( final List<Distribution> distributions ) {
+        _distributions = distributions;
+    }
+
+    public void setEvent( final Event event ) {
+        _event = event;
+    }
+
+    public void setNodeIdentifier( final Identifier node_identifier ) {
+        _node_identifier = node_identifier;
+    }
+
+    public void setProperties( final PropertiesMap custom_data ) {
+        _properties = custom_data;
+    }
+
+    public void setReference( final int index, final Reference reference ) {
+        if ( _references == null ) {
+            _references = new ArrayList<Reference>();
+        }
+        _references.set( index, reference );
+    }
+
+    /**
+     * Convenience method -- always sets the first Reference.
+     * 
+     */
+    public void setReference( final Reference reference ) {
+        if ( _references == null ) {
+            _references = new ArrayList<Reference>();
+        }
+        if ( _references.size() == 0 ) {
+            _references.add( reference );
+        }
+        else {
+            _references.set( 0, reference );
+        }
+    }
+
+    private void setReferences( final List<Reference> references ) {
+        _references = references;
+    }
+
+    public void setSequence( final int index, final Sequence sequence ) {
+        if ( _sequences == null ) {
+            _sequences = new ArrayList<Sequence>();
+        }
+        _sequences.set( index, sequence );
+    }
+
+    /**
+     * Convenience method -- always sets the first Sequence.
+     * 
+     */
+    public void setSequence( final Sequence sequence ) {
+        if ( _sequences == null ) {
+            _sequences = new ArrayList<Sequence>();
+        }
+        if ( _sequences.size() == 0 ) {
+            _sequences.add( sequence );
+        }
+        else {
+            _sequences.set( 0, sequence );
+        }
+    }
+
+    private void setSequences( final List<Sequence> sequences ) {
+        _sequences = sequences;
+    }
+
+    private void setTaxonomies( final List<Taxonomy> taxonomies ) {
+        _taxonomies = taxonomies;
+    }
+
+    public void setTaxonomy( final int index, final Taxonomy taxonomy ) {
+        if ( _taxonomies == null ) {
+            _taxonomies = new ArrayList<Taxonomy>();
+        }
+        _taxonomies.set( index, taxonomy );
+    }
+
+    /**
+     * Convenience method -- always sets the first Taxonomy.
+     * 
+     */
+    public void setTaxonomy( final Taxonomy taxonomy ) {
+        if ( _taxonomies == null ) {
+            _taxonomies = new ArrayList<Taxonomy>();
+        }
+        if ( _taxonomies.size() == 0 ) {
+            _taxonomies.add( taxonomy );
+        }
+        else {
+            _taxonomies.set( 0, taxonomy );
+        }
+    }
+
+    public StringBuffer toNHX() {
+        final StringBuffer sb = new StringBuffer();
+        if ( isHasNodeIdentifier() ) {
+            sb.append( getNodeIdentifier().toNHX() );
+        }
+        if ( isHasTaxonomy() ) {
+            sb.append( getTaxonomy().toNHX() );
+        }
+        if ( isHasSequence() ) {
+            sb.append( getSequence().toNHX() );
+        }
+        if ( isHasEvent() ) {
+            sb.append( getEvent().toNHX() );
+        }
+        if ( isHasProperties() ) {
+            sb.append( getProperties().toNHX() );
+        }
+        return sb;
+    }
+
+    public void toPhyloXML( final Writer writer, final int level, final String indentation ) throws IOException {
+        if ( isHasNodeIdentifier() ) {
+            writer.write( ForesterUtil.LINE_SEPARATOR );
+            writer.write( indentation );
+            //  if ( !org.forester.util.ForesterUtil.isEmpty( getNodeIdentifier().getProvider() ) ) {
+            //     PhylogenyDataUtil.appendElement( writer, PhyloXmlMapping.NODE_IDENTIFIER, getNodeIdentifier()
+            //             .getValue(), PhyloXmlMapping.IDENTIFIER_PROVIDER_ATTR, getNodeIdentifier().getProvider() );
+            // }
+            // else {
+            //     PhylogenyDataUtil.appendElement( writer, PhyloXmlMapping.NODE_IDENTIFIER, getNodeIdentifier()
+            //             .getValue() );
+            // }
+        }
+        if ( isHasTaxonomy() ) {
+            for( final Taxonomy t : getTaxonomies() ) {
+                if ( !t.isEmpty() ) {
+                    t.toPhyloXML( writer, level, indentation );
+                }
+            }
+        }
+        if ( isHasSequence() ) {
+            for( final Sequence s : getSequences() ) {
+                if ( !s.isEmpty() ) {
+                    s.toPhyloXML( writer, level, indentation );
+                }
+            }
+        }
+        if ( isHasEvent() ) {
+            getEvent().toPhyloXML( writer, level, indentation );
+        }
+        if ( isHasBinaryCharacters() ) {
+            getBinaryCharacters().toPhyloXML( writer, level, indentation );
+        }
+        if ( isHasDistribution() ) {
+            for( final Distribution d : getDistributions() ) {
+                d.toPhyloXML( writer, level, indentation );
+            }
+        }
+        if ( isHasDate() ) {
+            getDate().toPhyloXML( writer, level, indentation );
+        }
+        if ( isHasReference() ) {
+            for( final Reference r : getReferences() ) {
+                r.toPhyloXML( writer, level, indentation );
+            }
+        }
+        if ( isHasProperties() ) {
+            getProperties().toPhyloXML( writer, level, indentation.substring( 0, indentation.length() - 2 ) );
+        }
+        if ( ( getVector() != null )
+                && !getVector().isEmpty()
+                && ( ( getProperties() == null ) || getProperties()
+                        .getPropertiesWithGivenReferencePrefix( PhyloXmlUtil.VECTOR_PROPERTY_REF ).isEmpty() ) ) {
+            final List<Property> ps = vectorToProperties( getVector() );
+            final String my_indent = indentation.substring( 0, indentation.length() - 2 );
+            for( final Property p : ps ) {
+                p.toPhyloXML( writer, level, my_indent );
+            }
+        }
+    }
+
+    private List<Property> vectorToProperties( final List<Double> vector ) {
+        final List<Property> properties = new ArrayList<Property>();
+        for( int i = 0; i < vector.size(); ++i ) {
+            properties.add( new Property( PhyloXmlUtil.VECTOR_PROPERTY_REF + i,
+                                          String.valueOf( vector.get( i ) ),
+                                          "",
+                                          PhyloXmlUtil.VECTOR_PROPERTY_TYPE,
+                                          AppliesTo.NODE ) );
+        }
+        return properties;
+    }
+
+    public void setVector( final List<Double> vector ) {
+        _vector = vector;
+    }
+
+    public List<Double> getVector() {
+        return _vector;
+    }
+
+    public String getNodeName() {
+        return _node_name;
+    }
+
+    public void setNodeName( final String node_name ) {
+        _node_name = node_name;
+    }
+}
diff --git a/forester/java/src/org/forester/phylogeny/data/PhylogenyData.java b/forester/java/src/org/forester/phylogeny/data/PhylogenyData.java

new file mode 100644 (file)

index 0000000..d5be4d7
--- /dev/null
+++ b/forester/java/src/org/forester/phylogeny/data/PhylogenyData.java
@@ -0,0 +1,72 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.phylogeny.data;
+
+import java.io.IOException;
+import java.io.Writer;
+
+/*
+ * Interface for data for annotating a Phylogeny.
+ */
+public interface PhylogenyData {
+
+    public StringBuffer asSimpleText();
+
+    public StringBuffer asText();
+
+    /**
+     * Creates a new PhylogenyData object with identical values as this
+     * PhylogenyData.
+     * This ~should~ return a deep copy, but not there yet.
+     * 
+     * 
+     * @return a ~deep~ copy of this PhylogenyData
+     */
+    public PhylogenyData copy();
+
+    /**
+     * Compares this PhylogenyData to PhylogenyData data. In general, this
+     * should return true if and only if all fiels are exactly identical.
+     * 
+     * @param PhylogenyData
+     *            the PhylogenyData to compare to
+     * @return in general, true if and only if all fiels are exactly identical,
+     *         false otherwise
+     */
+    public boolean isEqual( final PhylogenyData data );
+
+    public StringBuffer toNHX();
+
+    /**
+     *  Writes a phyloXML representation of this phylogeny data.
+     * 
+     * @param writer
+     * @param level
+     * @param indentation
+     * @throws IOException 
+     */
+    public void toPhyloXML( final Writer writer, final int level, final String indentation ) throws IOException;
+}
\ No newline at end of file
diff --git a/forester/java/src/org/forester/phylogeny/data/PhylogenyDataUtil.java b/forester/java/src/org/forester/phylogeny/data/PhylogenyDataUtil.java

new file mode 100644 (file)

index 0000000..a90181d
--- /dev/null
+++ b/forester/java/src/org/forester/phylogeny/data/PhylogenyDataUtil.java
@@ -0,0 +1,372 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.phylogeny.data;
+
+import java.awt.Graphics;
+import java.io.IOException;
+import java.io.Writer;
+import java.util.ArrayList;
+
+import org.forester.io.writers.PhylogenyWriter;
+import org.forester.util.ForesterUtil;
+
+public final class PhylogenyDataUtil {
+
+    public static void appendClose( final Writer w, final String element_name ) throws IOException {
+        w.write( "</" );
+        w.write( element_name );
+        w.write( ">" );
+    }
+
+    public static void appendElement( final Writer w, final String element_name, final String value )
+            throws IOException {
+        appendOpen( w, element_name );
+        w.write( replaceIllegalXmlCharacters( value ) );
+        appendClose( w, element_name );
+    }
+
+    public static void appendElement( final Writer w,
+                                      final String element_name,
+                                      final String value,
+                                      final String indentation ) throws IOException {
+        w.write( ForesterUtil.LINE_SEPARATOR );
+        w.write( indentation );
+        w.write( PhylogenyWriter.PHYLO_XML_INTENDATION_BASE );
+        // Something like this replacement needs to be done in a more systematic manner.
+        appendElement( w, element_name, value );
+    }
+
+    public static void appendElement( final Writer w,
+                                      final String element_name,
+                                      final String value,
+                                      final String attribute_name,
+                                      final String attribute_value ) throws IOException {
+        appendOpen( w, element_name, attribute_name, attribute_value );
+        w.write( replaceIllegalXmlCharacters( value ) );
+        appendClose( w, element_name );
+    }
+
+    public static void appendElement( final Writer w,
+                                      final String element_name,
+                                      final String value,
+                                      final String attribute_name,
+                                      final String attribute_value,
+                                      final String indentation ) throws IOException {
+        w.write( ForesterUtil.LINE_SEPARATOR );
+        w.write( indentation );
+        w.write( PhylogenyWriter.PHYLO_XML_INTENDATION_BASE );
+        appendOpen( w, element_name, attribute_name, attribute_value );
+        w.write( replaceIllegalXmlCharacters( value ) );
+        appendClose( w, element_name );
+    }
+
+    public static void appendElement( final Writer w,
+                                      final String element_name,
+                                      final String value,
+                                      final String attribute1_name,
+                                      final String attribute1_value,
+                                      final String attribute2_name,
+                                      final String attribute2_value,
+                                      final String indentation ) throws IOException {
+        w.write( ForesterUtil.LINE_SEPARATOR );
+        w.write( indentation );
+        w.write( PhylogenyWriter.PHYLO_XML_INTENDATION_BASE );
+        appendOpen( w, element_name, attribute1_name, attribute1_value, attribute2_name, attribute2_value );
+        w.write( replaceIllegalXmlCharacters( value ) );
+        appendClose( w, element_name );
+    }
+
+    public static void appendElement( final Writer w,
+                                      final String element_name,
+                                      final String attribute1_name,
+                                      final String attribute1_value,
+                                      final String attribute2_name,
+                                      final String attribute2_value,
+                                      final String attribute3_name,
+                                      final String attribute3_value,
+                                      final String attribute4_name,
+                                      final String attribute4_value,
+                                      final String indentation ) throws IOException {
+        w.write( ForesterUtil.LINE_SEPARATOR );
+        w.write( indentation );
+        appendOpen( w,
+                    element_name,
+                    attribute1_name,
+                    attribute1_value,
+                    attribute2_name,
+                    attribute2_value,
+                    attribute3_name,
+                    attribute3_value,
+                    attribute4_name,
+                    attribute4_value );
+        appendClose( w, element_name );
+    }
+
+    public static void appendElement( final Writer w,
+                                      final String element_name,
+                                      final String value,
+                                      final String attribute1_name,
+                                      final String attribute1_value,
+                                      final String attribute2_name,
+                                      final String attribute2_value,
+                                      final String attribute3_name,
+                                      final String attribute3_value,
+                                      final String attribute4_name,
+                                      final String attribute4_value,
+                                      final String attribute5_name,
+                                      final String attribute5_value,
+                                      final String indentation ) throws IOException {
+        w.write( ForesterUtil.LINE_SEPARATOR );
+        w.write( indentation );
+        w.write( PhylogenyWriter.PHYLO_XML_INTENDATION_BASE );
+        appendOpen( w,
+                    element_name,
+                    attribute1_name,
+                    attribute1_value,
+                    attribute2_name,
+                    attribute2_value,
+                    attribute3_name,
+                    attribute3_value,
+                    attribute4_name,
+                    attribute4_value,
+                    attribute5_name,
+                    attribute5_value );
+        w.write( replaceIllegalXmlCharacters( value ) );
+        appendClose( w, element_name );
+    }
+
+    public static void appendOpen( final Writer w, final String element_name ) throws IOException {
+        w.write( "<" );
+        w.write( element_name );
+        w.write( ">" );
+    }
+
+    public static void appendOpen( final Writer w,
+                                   final String element_name,
+                                   final String attribute_name,
+                                   final String attribute_value ) throws IOException {
+        w.write( "<" );
+        w.write( element_name );
+        if ( !ForesterUtil.isEmpty( attribute_value ) ) {
+            w.write( " " );
+            w.write( attribute_name );
+            w.write( "=\"" );
+            w.write( attribute_value );
+            w.write( "\"" );
+        }
+        w.write( ">" );
+    }
+
+    public static void appendOpen( final Writer w,
+                                   final String element_name,
+                                   final String attribute1_name,
+                                   final String attribute1_value,
+                                   final String attribute2_name,
+                                   final String attribute2_value ) throws IOException {
+        w.write( "<" );
+        w.write( element_name );
+        if ( !ForesterUtil.isEmpty( attribute1_value ) ) {
+            w.write( " " );
+            w.write( attribute1_name );
+            w.write( "=\"" );
+            w.write( attribute1_value );
+            w.write( "\"" );
+        }
+        if ( !ForesterUtil.isEmpty( attribute2_value ) ) {
+            w.write( " " );
+            w.write( attribute2_name );
+            w.write( "=\"" );
+            w.write( attribute2_value );
+            w.write( "\"" );
+        }
+        w.write( ">" );
+    }
+
+    public static void appendOpen( final Writer w,
+                                   final String element_name,
+                                   final String attribute1_name,
+                                   final String attribute1_value,
+                                   final String attribute2_name,
+                                   final String attribute2_value,
+                                   final String attribute3_name,
+                                   final String attribute3_value ) throws IOException {
+        w.write( "<" );
+        w.write( element_name );
+        if ( !ForesterUtil.isEmpty( attribute1_value ) ) {
+            w.write( " " );
+            w.write( attribute1_name );
+            w.write( "=\"" );
+            w.write( attribute1_value );
+            w.write( "\"" );
+        }
+        if ( !ForesterUtil.isEmpty( attribute2_value ) ) {
+            w.write( " " );
+            w.write( attribute2_name );
+            w.write( "=\"" );
+            w.write( attribute2_value );
+            w.write( "\"" );
+        }
+        if ( !ForesterUtil.isEmpty( attribute2_value ) ) {
+            w.write( " " );
+            w.write( attribute3_name );
+            w.write( "=\"" );
+            w.write( attribute3_value );
+            w.write( "\"" );
+        }
+        w.write( ">" );
+    }
+
+    public static void appendOpen( final Writer w,
+                                   final String element_name,
+                                   final String attribute1_name,
+                                   final String attribute1_value,
+                                   final String attribute2_name,
+                                   final String attribute2_value,
+                                   final String attribute3_name,
+                                   final String attribute3_value,
+                                   final String attribute4_name,
+                                   final String attribute4_value ) throws IOException {
+        w.write( "<" );
+        w.write( element_name );
+        if ( !ForesterUtil.isEmpty( attribute1_value ) ) {
+            w.write( " " );
+            w.write( attribute1_name );
+            w.write( "=\"" );
+            w.write( attribute1_value );
+            w.write( "\"" );
+        }
+        if ( !ForesterUtil.isEmpty( attribute2_value ) ) {
+            w.write( " " );
+            w.write( attribute2_name );
+            w.write( "=\"" );
+            w.write( attribute2_value );
+            w.write( "\"" );
+        }
+        if ( !ForesterUtil.isEmpty( attribute3_value ) ) {
+            w.write( " " );
+            w.write( attribute3_name );
+            w.write( "=\"" );
+            w.write( attribute3_value );
+            w.write( "\"" );
+        }
+        if ( !ForesterUtil.isEmpty( attribute4_value ) ) {
+            w.write( " " );
+            w.write( attribute4_name );
+            w.write( "=\"" );
+            w.write( attribute4_value );
+            w.write( "\"" );
+        }
+        w.write( ">" );
+    }
+
+    public static void appendOpen( final Writer w,
+                                   final String element_name,
+                                   final String attribute1_name,
+                                   final String attribute1_value,
+                                   final String attribute2_name,
+                                   final String attribute2_value,
+                                   final String attribute3_name,
+                                   final String attribute3_value,
+                                   final String attribute4_name,
+                                   final String attribute4_value,
+                                   final String attribute5_name,
+                                   final String attribute5_value ) throws IOException {
+        w.write( "<" );
+        w.write( element_name );
+        if ( !ForesterUtil.isEmpty( attribute1_value ) ) {
+            w.write( " " );
+            w.write( attribute1_name );
+            w.write( "=\"" );
+            w.write( attribute1_value );
+            w.write( "\"" );
+        }
+        if ( !ForesterUtil.isEmpty( attribute2_value ) ) {
+            w.write( " " );
+            w.write( attribute2_name );
+            w.write( "=\"" );
+            w.write( attribute2_value );
+            w.write( "\"" );
+        }
+        if ( !ForesterUtil.isEmpty( attribute3_value ) ) {
+            w.write( " " );
+            w.write( attribute3_name );
+            w.write( "=\"" );
+            w.write( attribute3_value );
+            w.write( "\"" );
+        }
+        if ( !ForesterUtil.isEmpty( attribute4_value ) ) {
+            w.write( " " );
+            w.write( attribute4_name );
+            w.write( "=\"" );
+            w.write( attribute4_value );
+            w.write( "\"" );
+        }
+        if ( !ForesterUtil.isEmpty( attribute5_value ) ) {
+            w.write( " " );
+            w.write( attribute5_name );
+            w.write( "=\"" );
+            w.write( attribute5_value );
+            w.write( "\"" );
+        }
+        w.write( ">" );
+    }
+
+    /**
+     * Creates a deep copy of ArrayList of PhylogenyData objects.
+     * 
+     * @param list
+     *            an ArrayList of PhylogenyData objects
+     * @return a deep copy of ArrayList list
+     */
+    public static ArrayList<PhylogenyData> copy( final ArrayList<PhylogenyData> list ) {
+        final ArrayList<PhylogenyData> l = new ArrayList<PhylogenyData>( list.size() );
+        for( int i = 0; i < list.size(); ++i ) {
+            l.add( ( list.get( i ) ).copy() );
+        }
+        return l;
+    }
+
+    public static void drawLine( final double x1, final double y1, final double x2, final double y2, final Graphics g ) {
+        g.drawLine( org.forester.util.ForesterUtil.roundToInt( x1 ),
+                    org.forester.util.ForesterUtil.roundToInt( y1 ),
+                    org.forester.util.ForesterUtil.roundToInt( x2 ),
+                    org.forester.util.ForesterUtil.roundToInt( y2 ) );
+    }
+
+    public static void drawString( final String str, final double x, final double y, final Graphics g ) {
+        g.drawString( str, org.forester.util.ForesterUtil.roundToInt( x ), org.forester.util.ForesterUtil
+                .roundToInt( y ) );
+    }
+
+    public static String replaceIllegalXmlCharacters( final String value ) {
+        String v = value.replaceAll( "&", "&amp;" );
+        v = v.replaceAll( "<", "&lt;" );
+        v = v.replaceAll( ">", "&gt;" );
+        v = v.replaceAll( "'", "&apos;" );
+        v = v.replaceAll( "\"", "&quot;" );
+        return v;
+    }
+}
diff --git a/forester/java/src/org/forester/phylogeny/data/Point.java b/forester/java/src/org/forester/phylogeny/data/Point.java

new file mode 100644 (file)

index 0000000..a20b3d9
--- /dev/null
+++ b/forester/java/src/org/forester/phylogeny/data/Point.java
@@ -0,0 +1,152 @@
+
+package org.forester.phylogeny.data;
+
+import java.io.IOException;
+import java.io.Writer;
+import java.math.BigDecimal;
+
+import org.forester.io.parsers.phyloxml.PhyloXmlMapping;
+import org.forester.util.ForesterUtil;
+
+public class Point implements PhylogenyData {
+
+    private final String       _geodetic_datum;
+    private final BigDecimal   _lat;
+    private final BigDecimal   _long;
+    private final BigDecimal   _alt;
+    private final String       _alt_unit;
+    public static final String UNKNOWN_GEODETIC_DATUM = "?";
+
+    public Point() {
+        this( UNKNOWN_GEODETIC_DATUM, null, null, null, "" );
+    }
+
+    public Point( final String geodetic_datum, final BigDecimal lat, final BigDecimal longitude ) {
+        this( geodetic_datum, lat, longitude, null, "" );
+    }
+
+    public boolean isEmpty() {
+        return ( _lat == null ) && ( _long == null ) && ( _alt == null );
+    }
+
+    public Point( final String geodetic_datum,
+                  final BigDecimal lat,
+                  final BigDecimal longitude,
+                  final BigDecimal alt,
+                  final String alt_unit ) {
+        if ( ForesterUtil.isEmpty( geodetic_datum ) ) {
+            throw new IllegalArgumentException( "illegal attempt to use empty geodetic datum" );
+        }
+        if ( ( alt != null ) && ForesterUtil.isEmpty( alt_unit ) ) {
+            throw new IllegalArgumentException( "altitude must hava a unit" );
+        }
+        _geodetic_datum = geodetic_datum;
+        _lat = lat;
+        _long = longitude;
+        _alt = alt;
+        _alt_unit = alt_unit;
+    }
+
+    @Override
+    public StringBuffer asSimpleText() {
+        if ( isEmpty() ) {
+            return new StringBuffer();
+        }
+        else if ( getAltitude() == null ) {
+            return new StringBuffer( "[" + getLatitude().toPlainString() + ", " + getLongitude() + "]" );
+        }
+        else {
+            return new StringBuffer( "[" + getLatitude().toPlainString() + ", " + getLongitude() + ", " + getAltitude()
+                    + getAltiudeUnit() + "]" );
+        }
+    }
+
+    @Override
+    public StringBuffer asText() {
+        return asSimpleText();
+    }
+
+    @Override
+    public PhylogenyData copy() {
+        return new Point( getGeodeticDatum(),
+                          getLatitude() == null ? null : new BigDecimal( getLatitude().toPlainString() ),
+                          getLongitude() == null ? null : new BigDecimal( getLongitude().toPlainString() ),
+                          getAltitude() == null ? null : new BigDecimal( getAltitude().toPlainString() ),
+                          getAltiudeUnit() );
+    }
+
+    public BigDecimal getAltitude() {
+        return _alt;
+    }
+
+    public String getAltiudeUnit() {
+        return _alt_unit;
+    }
+
+    public String getGeodeticDatum() {
+        return _geodetic_datum;
+    }
+
+    public BigDecimal getLatitude() {
+        return _lat;
+    }
+
+    public BigDecimal getLongitude() {
+        return _long;
+    }
+
+    @Override
+    public boolean isEqual( final PhylogenyData point ) {
+        throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public StringBuffer toNHX() {
+        throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public void toPhyloXML( final Writer writer, final int level, final String indentation ) throws IOException {
+        if ( isEmpty() ) {
+            return;
+        }
+        writer.write( ForesterUtil.LINE_SEPARATOR );
+        writer.write( indentation );
+        if ( getAltitude() != null ) {
+            PhylogenyDataUtil.appendOpen( writer,
+                                          PhyloXmlMapping.POINT,
+                                          PhyloXmlMapping.POINT_GEODETIC_DATUM,
+                                          getGeodeticDatum(),
+                                          PhyloXmlMapping.POINT_ALTITUDE_UNIT_ATTR,
+                                          getAltiudeUnit() );
+        }
+        else {
+            PhylogenyDataUtil.appendOpen( writer,
+                                          PhyloXmlMapping.POINT,
+                                          PhyloXmlMapping.POINT_GEODETIC_DATUM,
+                                          getGeodeticDatum() );
+        }
+        PhylogenyDataUtil.appendElement( writer,
+                                         PhyloXmlMapping.POINT_LATITUDE,
+                                         getLatitude().toPlainString(),
+                                         indentation );
+        PhylogenyDataUtil.appendElement( writer,
+                                         PhyloXmlMapping.POINT_LONGITUDE,
+                                         getLongitude().toPlainString(),
+                                         indentation );
+        if ( getAltitude() != null ) {
+            PhylogenyDataUtil.appendElement( writer,
+                                             PhyloXmlMapping.POINT_ALTITUDE,
+                                             getAltitude().toPlainString(),
+                                             indentation );
+        }
+        writer.write( ForesterUtil.LINE_SEPARATOR );
+        writer.write( indentation );
+        PhylogenyDataUtil.appendClose( writer, PhyloXmlMapping.POINT );
+    }
+
+    @Override
+    public String toString() {
+        return asSimpleText().toString();
+    }
+}
diff --git a/forester/java/src/org/forester/phylogeny/data/Polygon.java b/forester/java/src/org/forester/phylogeny/data/Polygon.java

new file mode 100644 (file)

index 0000000..427d294
--- /dev/null
+++ b/forester/java/src/org/forester/phylogeny/data/Polygon.java
@@ -0,0 +1,109 @@
+// $Id:
+// forester -- software libraries and applications
+// for genomics and evolutionary biology research.
+//
+// Copyright (C) 2010 Christian M Zmasek
+// Copyright (C) 2010 Sanford-Burnham Medical Research Institute
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.phylogeny.data;
+
+import java.io.IOException;
+import java.io.Writer;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.forester.io.parsers.phyloxml.PhyloXmlMapping;
+import org.forester.io.writers.PhylogenyWriter;
+import org.forester.util.ForesterUtil;
+
+public class Polygon implements PhylogenyData {
+
+    private final List<Point> _points;
+
+    public Polygon( final List<Point> points ) {
+        _points = points;
+    }
+
+    @Override
+    public StringBuffer asSimpleText() {
+        final StringBuffer sb = new StringBuffer();
+        boolean first = true;
+        for( final Point point : getPoints() ) {
+            if ( first ) {
+                first = false;
+            }
+            else {
+                sb.append( ForesterUtil.LINE_SEPARATOR );
+            }
+            sb.append( point.asSimpleText() );
+        }
+        return sb;
+    }
+
+    @Override
+    public StringBuffer asText() {
+        return asSimpleText();
+    }
+
+    @Override
+    public PhylogenyData copy() {
+        final List<Point> new_points = new ArrayList<Point>();
+        for( final Point point : getPoints() ) {
+            new_points.add( ( Point ) point.copy() );
+        }
+        return new Polygon( new_points );
+    }
+
+    public List<Point> getPoints() {
+        return _points;
+    }
+
+    public boolean isEmpty() {
+        return ForesterUtil.isEmpty( _points );
+    }
+
+    @Override
+    public boolean isEqual( final PhylogenyData data ) {
+        throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public StringBuffer toNHX() {
+        throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public void toPhyloXML( final Writer writer, final int level, final String indentation ) throws IOException {
+        if ( isEmpty() ) {
+            return;
+        }
+        writer.write( ForesterUtil.LINE_SEPARATOR );
+        writer.write( indentation );
+        PhylogenyDataUtil.appendOpen( writer, PhyloXmlMapping.POLYGON );
+        for( final Point point : getPoints() ) {
+            point.toPhyloXML( writer, level, indentation + PhylogenyWriter.PHYLO_XML_INTENDATION_BASE );
+            writer.write( indentation );
+        }
+        writer.write( ForesterUtil.LINE_SEPARATOR );
+        writer.write( indentation );
+        PhylogenyDataUtil.appendClose( writer, PhyloXmlMapping.POLYGON );
+    }
+}
diff --git a/forester/java/src/org/forester/phylogeny/data/PropertiesMap.java b/forester/java/src/org/forester/phylogeny/data/PropertiesMap.java

new file mode 100644 (file)

index 0000000..195590f
--- /dev/null
+++ b/forester/java/src/org/forester/phylogeny/data/PropertiesMap.java
@@ -0,0 +1,205 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.phylogeny.data;
+
+import java.io.IOException;
+import java.io.Writer;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.SortedMap;
+import java.util.TreeMap;
+
+import org.forester.util.ForesterUtil;
+
+public class PropertiesMap implements PhylogenyData {
+
+    private final SortedMap<String, Property> _properties;
+
+    public PropertiesMap() {
+        _properties = new TreeMap<String, Property>();
+    }
+
+    public int size() {
+        return _properties.size();
+    }
+
+    public void addProperty( final Property property ) throws IllegalArgumentException {
+        if ( getProperties().containsKey( property.getRef() ) ) {
+            throw new IllegalArgumentException( "ref [" + property.getRef() + "] is already present" );
+        }
+        getProperties().put( property.getRef(), property );
+    }
+
+    @Override
+    public StringBuffer asSimpleText() {
+        final StringBuffer sb = new StringBuffer();
+        boolean first = true;
+        for( final String ref : getPropertyRefs() ) {
+            if ( first ) {
+                first = false;
+            }
+            else {
+                sb.append( " " );
+            }
+            sb.append( getProperty( ref ).asText() );
+        }
+        return sb;
+    }
+
+    @Override
+    public StringBuffer asText() {
+        return asSimpleText();
+    }
+
+    @Override
+    public PhylogenyData copy() {
+        final PropertiesMap new_one = new PropertiesMap();
+        for( final String r : getProperties().keySet() ) {
+            new_one.addProperty( getProperties().get( r ) );
+        }
+        return new_one;
+    }
+
+    public SortedMap<String, Property> getProperties() {
+        return _properties;
+    }
+
+    public Property[] getPropertiesArray() {
+        final Property[] a = new Property[ getProperties().size() ];
+        int i = 0;
+        for( final String ref : getProperties().keySet() ) {
+            a[ i++ ] = getProperties().get( ref );
+        }
+        return a;
+    }
+
+    public List<Property> getPropertiesWithGivenReferencePrefix( final String ref_prefix )
+            throws IllegalArgumentException {
+        if ( ForesterUtil.isEmpty( ref_prefix ) ) {
+            throw new IllegalArgumentException( "reference prefix is null or empty" );
+        }
+        final String my_ref_prefix = new String( ref_prefix.trim() );
+        final List<Property> props = new ArrayList<Property>();
+        for( final String ref : getProperties().keySet() ) {
+            if ( ref.startsWith( my_ref_prefix ) ) {
+                props.add( getProperty( ref ) );
+            }
+        }
+        return props;
+    }
+
+    public Property getProperty( final String ref ) throws IllegalArgumentException {
+        if ( getProperties().containsKey( ref ) ) {
+            return getProperties().get( ref );
+        }
+        else {
+            throw new IllegalArgumentException( "reference [" + ref + "] is not present" );
+        }
+    }
+
+    /**
+     * Returns all property refs of this PhylogenyNode as String array.
+     */
+    public String[] getPropertyRefs() {
+        if ( getProperties() == null ) {
+            return new String[ 0 ];
+        }
+        final Property[] properties = getPropertiesArray();
+        final String[] refs = new String[ properties.length ];
+        for( int i = 0; i < properties.length; ++i ) {
+            refs[ i ] = properties[ i ].getRef();
+        }
+        return refs;
+    }
+
+    @Override
+    public boolean isEqual( final PhylogenyData data ) {
+        throw new UnsupportedOperationException();
+    }
+
+    public boolean refExists( final String ref ) {
+        if ( getProperties() != null ) {
+            for( final String r : getProperties().keySet() ) {
+                if ( r.equalsIgnoreCase( ref ) ) {
+                    return true;
+                }
+            }
+        }
+        return false;
+    }
+
+    public Property removeProperty( final String ref ) throws IllegalArgumentException {
+        if ( getProperties().containsKey( ref ) ) {
+            return getProperties().remove( ref );
+        }
+        else {
+            throw new IllegalArgumentException( "reference [" + ref + "] is not present" );
+        }
+    }
+
+    public List<String> removePropertiesWithGivenReferencePrefix( final String ref_prefix )
+            throws IllegalArgumentException {
+        if ( ForesterUtil.isEmpty( ref_prefix ) ) {
+            throw new IllegalArgumentException( "reference prefix is null or empty" );
+        }
+        final String my_ref_prefix = new String( ref_prefix.trim() );
+        final List<String> to_remove = new ArrayList<String>();
+        for( final String ref : getProperties().keySet() ) {
+            if ( ref.startsWith( my_ref_prefix ) ) {
+                to_remove.add( ref );
+            }
+        }
+        for( final String ref : to_remove ) {
+            getProperties().remove( ref );
+        }
+        return to_remove;
+    }
+
+    @Override
+    public StringBuffer toNHX() {
+        final StringBuffer sb = new StringBuffer();
+        if ( getProperties() != null ) {
+            for( final String ref : getProperties().keySet() ) {
+                sb.append( getProperties().get( ref ).toNHX() );
+            }
+        }
+        return sb;
+    }
+
+    @Override
+    public void toPhyloXML( final Writer writer, final int level, final String indentation ) throws IOException {
+        if ( getProperties() != null ) {
+            for( final String ref : getProperties().keySet() ) {
+                getProperties().get( ref ).toPhyloXML( writer, level, indentation );
+            }
+        }
+    }
+
+    @Override
+    public String toString() {
+        return asSimpleText().toString();
+    }
+}
diff --git a/forester/java/src/org/forester/phylogeny/data/Property.java b/forester/java/src/org/forester/phylogeny/data/Property.java

new file mode 100644 (file)

index 0000000..9fd9904
--- /dev/null
+++ b/forester/java/src/org/forester/phylogeny/data/Property.java
@@ -0,0 +1,332 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// Copyright (C) 2000-2001 Washington University School of Medicine
+// and Howard Hughes Medical Institute
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.phylogeny.data;
+
+import java.io.IOException;
+import java.io.Writer;
+import java.util.StringTokenizer;
+
+import org.forester.io.parsers.phyloxml.PhyloXmlMapping;
+import org.forester.util.ForesterUtil;
+
+public class Property implements PhylogenyData {
+
+    private String          _value;
+    private final String    _ref;
+    private final String    _unit;
+    private final String    _datatype;
+    private final AppliesTo _applies_to;
+    private final String    _id_ref;
+
+    public Property( final String ref,
+                     final String value,
+                     final String unit,
+                     final String datatype,
+                     final AppliesTo applies_to ) {
+        this( ref, value, unit, datatype, applies_to, "" );
+    }
+
+    // Only used by method createFromNhxString.
+    private Property( final String ref,
+                      final String value,
+                      final String unit,
+                      final String datatype,
+                      final AppliesTo applies_to,
+                      final boolean dummy ) {
+        _ref = ref;
+        _unit = unit;
+        _datatype = datatype;
+        _applies_to = applies_to;
+        _id_ref = "";
+        setValue( value );
+    }
+
+    public Property( final String ref,
+                     final String value,
+                     final String unit,
+                     final String datatype,
+                     final AppliesTo applies_to,
+                     final String id_ref ) {
+        if ( !ForesterUtil.isEmpty( ref ) && ( ref.indexOf( ":" ) < 1 ) ) {
+            throw new IllegalArgumentException( "property reference [" + ref
+                    + "] is not in the expected format (missing a \":\")" );
+        }
+        if ( !ForesterUtil.isEmpty( unit ) && ( unit.indexOf( ":" ) < 1 ) ) {
+            throw new IllegalArgumentException( "property unit [" + unit
+                    + "] is not in the expected format (missing a \":\")" );
+        }
+        if ( !ForesterUtil.isEmpty( datatype ) && ( datatype.indexOf( ":" ) < 1 ) ) {
+            throw new IllegalArgumentException( "property datatype [" + unit
+                    + "] is not in the expected format (missing a \":\")" );
+        }
+        _ref = ref;
+        _unit = unit;
+        _datatype = datatype;
+        _applies_to = applies_to;
+        _id_ref = id_ref;
+        setValue( value );
+    }
+
+    @Override
+    public StringBuffer asSimpleText() {
+        return new StringBuffer( getValue() );
+    }
+
+    @Override
+    public StringBuffer asText() {
+        final StringBuffer sb = new StringBuffer();
+        sb.append( getRef() );
+        sb.append( ": " );
+        sb.append( getValue() );
+        if ( !ForesterUtil.isEmpty( getUnit() ) ) {
+            sb.append( getUnit() );
+        }
+        return sb;
+    }
+
+    @Override
+    public PhylogenyData copy() {
+        return new Property( getRef(), getValue(), getUnit(), getDataType(), getAppliesTo(), getIdRef() );
+    }
+
+    public AppliesTo getAppliesTo() {
+        return _applies_to;
+    }
+
+    public String getDataType() {
+        return _datatype;
+    }
+
+    public String getIdRef() {
+        return _id_ref;
+    }
+
+    public String getRef() {
+        return _ref;
+    }
+
+    public String getUnit() {
+        return _unit;
+    }
+
+    public String getValue() {
+        return _value;
+    }
+
+    @Override
+    public boolean isEqual( final PhylogenyData data ) {
+        if ( data == null ) {
+            return false;
+        }
+        return ( ( Property ) data ).getValue().equals( getValue() )
+                && ( ( Property ) data ).getUnit().equals( getUnit() )
+                && ( ( Property ) data ).getRef().equals( getRef() );
+    }
+
+    public void setValue( final String value ) {
+        _value = value;
+    }
+
+    @Override
+    public StringBuffer toNHX() {
+        final StringBuffer nhx = new StringBuffer();
+        nhx.append( ":X" );
+        switch ( getAppliesTo() ) {
+            case CLADE:
+                nhx.append( "C=" );
+                break;
+            case NODE:
+                nhx.append( "N=" );
+                break;
+            case PARENT_BRANCH:
+                nhx.append( "B=" );
+                break;
+            case PHYLOGENY:
+                nhx.append( "P=" );
+                break;
+            case ANNOTATION:
+                nhx.append( "S=" );
+                break;
+            default:
+                nhx.append( "O=" );
+                break;
+        }
+        if ( !getDataType().equals( "" ) ) {
+            if ( getDataType().equals( "xsd:string" ) ) {
+                nhx.append( "S=" );
+            }
+            else if ( getDataType().equals( "xsd:long" ) ) {
+                nhx.append( "L=" );
+            }
+            else if ( getDataType().equals( "xsd:decimal" ) ) {
+                nhx.append( "D=" );
+            }
+            else if ( getDataType().equals( "xsd:boolean" ) ) {
+                nhx.append( "B=" );
+            }
+            else if ( getDataType().equals( "xsd:anyUR" ) ) {
+                nhx.append( "U=" );
+            }
+        }
+        nhx.append( getRef() );
+        nhx.append( "=" );
+        nhx.append( getValue() );
+        if ( !getUnit().equals( "" ) ) {
+            nhx.append( "=" );
+            nhx.append( getUnit() );
+        }
+        return nhx;
+    }
+
+    @Override
+    public void toPhyloXML( final Writer writer, final int level, final String indentation ) throws IOException {
+        PhylogenyDataUtil.appendElement( writer,
+                                         PhyloXmlMapping.PROPERTY,
+                                         getValue(),
+                                         PhyloXmlMapping.PROPERTY_REF,
+                                         getRef(),
+                                         PhyloXmlMapping.PROPERTY_UNIT,
+                                         getUnit(),
+                                         PhyloXmlMapping.PROPERTY_DATATYPE,
+                                         getDataType(),
+                                         PhyloXmlMapping.PROPERTY_APPLIES_TO,
+                                         getAppliesTo().toString(),
+                                         PhyloXmlMapping.ID_REF,
+                                         getIdRef(),
+                                         indentation );
+    }
+
+    @Override
+    public String toString() {
+        return asText().toString();
+    }
+
+    public static Property createFromNhxString( final String nhx ) throws IllegalArgumentException {
+        final StringTokenizer st = new StringTokenizer( nhx, "=" );
+        final int tokens = st.countTokens();
+        final String error = "error in NHX property tag format: "
+                + "expected: X[N|B|C|S|T|P|O]=<datatype>=<ref>=<value>[=<unit>], got: \"" + nhx + "\" instead";
+        if ( ( tokens != 4 ) && ( tokens != 5 ) ) {
+            throw new IllegalArgumentException( error );
+        }
+        final String first = st.nextToken();
+        AppliesTo applies_to = null;
+        if ( first.equals( "XN" ) ) {
+            applies_to = AppliesTo.NODE;
+        }
+        else if ( first.equals( "XB" ) ) {
+            applies_to = AppliesTo.PARENT_BRANCH;
+        }
+        else if ( first.equals( "XC" ) ) {
+            applies_to = AppliesTo.CLADE;
+        }
+        else if ( first.equals( "XS" ) ) {
+            applies_to = AppliesTo.ANNOTATION;
+        }
+        else if ( first.equals( "XT" ) ) {
+            applies_to = AppliesTo.OTHER;
+        }
+        else if ( first.equals( "XP" ) ) {
+            applies_to = AppliesTo.PHYLOGENY;
+        }
+        else if ( first.equals( "XO" ) ) {
+            applies_to = AppliesTo.OTHER;
+        }
+        else {
+            throw new IllegalArgumentException( error );
+        }
+        String datatype = st.nextToken();
+        if ( datatype.equals( "S" ) ) {
+            datatype = "xsd:string";
+        }
+        else if ( datatype.equals( "L" ) ) {
+            datatype = "xsd:long";
+        }
+        else if ( datatype.equals( "D" ) ) {
+            datatype = "xsd:decimal";
+        }
+        else if ( datatype.equals( "B" ) ) {
+            datatype = "xsd:boolean";
+        }
+        else if ( datatype.equals( "U" ) ) {
+            datatype = "xsd:anyURI";
+        }
+        final String ref = st.nextToken();
+        final String value = st.nextToken();
+        String unit = "";
+        if ( tokens == 5 ) {
+            unit = st.nextToken();
+        }
+        return new Property( ref, value, unit, datatype, applies_to, true );
+    }
+
+    public static enum AppliesTo {
+        PHYLOGENY {
+
+            @Override
+            public String toString() {
+                return "phylogeny";
+            }
+        },
+        CLADE {
+
+            @Override
+            public String toString() {
+                return "clade";
+            }
+        },
+        NODE {
+
+            @Override
+            public String toString() {
+                return "node";
+            }
+        },
+        ANNOTATION {
+
+            @Override
+            public String toString() {
+                return "annotation";
+            }
+        },
+        PARENT_BRANCH {
+
+            @Override
+            public String toString() {
+                return "parent_branch";
+            }
+        },
+        OTHER {
+
+            @Override
+            public String toString() {
+                return "other";
+            }
+        }
+    }
+}
diff --git a/forester/java/src/org/forester/phylogeny/data/ProteinDomain.java b/forester/java/src/org/forester/phylogeny/data/ProteinDomain.java

new file mode 100644 (file)

index 0000000..81155ff
--- /dev/null
+++ b/forester/java/src/org/forester/phylogeny/data/ProteinDomain.java
@@ -0,0 +1,171 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.phylogeny.data;
+
+import java.io.IOException;
+import java.io.Writer;
+
+import org.forester.io.parsers.phyloxml.PhyloXmlMapping;
+import org.forester.util.ForesterUtil;
+
+public class ProteinDomain implements PhylogenyData {
+
+    final public static double CONFIDENCE_DEFAULT = 0.0;
+    final public static String IDENTIFIER_DEFAULT = "";
+    final private String       _name;
+    final private int          _from;
+    final private int          _to;
+    final private String       _id;
+    final private double       _confidence;
+
+    public ProteinDomain( final String name, final int from, final int to ) {
+        this( name, from, to, ProteinDomain.IDENTIFIER_DEFAULT, ProteinDomain.CONFIDENCE_DEFAULT );
+    }
+
+    public ProteinDomain( final String name, final int from, final int to, final double confidence ) {
+        this( name, from, to, ProteinDomain.IDENTIFIER_DEFAULT, confidence );
+    }
+
+    public ProteinDomain( final String name, final int from, final int to, final String id ) {
+        this( name, from, to, id, ProteinDomain.CONFIDENCE_DEFAULT );
+    }
+
+    public ProteinDomain( final String name, final int from, final int to, final String id, final double confidence ) {
+        if ( ( from >= to ) || ( to < 0 ) ) {
+            throw new IllegalArgumentException( "attempt to create protein domain from " + from + " to " + to );
+        }
+        _name = name;
+        _from = from;
+        _to = to;
+        _id = id;
+        _confidence = confidence;
+    }
+
+    public StringBuffer asSimpleText() {
+        return new StringBuffer( getName() );
+    }
+
+    public StringBuffer asText() {
+        final StringBuffer sb = new StringBuffer( getName() );
+        sb.append( " [" );
+        sb.append( getLength() );
+        if ( !ForesterUtil.isEmpty( getId() ) ) {
+            sb.append( " " );
+            sb.append( getId() );
+        }
+        if ( getConfidence() != CONFIDENCE_DEFAULT ) {
+            sb.append( " " );
+            sb.append( getConfidence() );
+        }
+        sb.append( "]" );
+        return sb;
+    }
+
+    public PhylogenyData copy() {
+        if ( getId() == null ) {
+            return new ProteinDomain( getName(), getFrom(), getTo(), getConfidence() );
+        }
+        return new ProteinDomain( getName(), getFrom(), getTo(), getId(), getConfidence() );
+    }
+
+    public double getConfidence() {
+        return _confidence;
+    }
+
+    public int getFrom() {
+        return _from;
+    }
+
+    public String getId() {
+        return _id;
+    }
+
+    public int getLength() {
+        return ( getTo() - getFrom() + 1 );
+    }
+
+    public String getName() {
+        return _name;
+    }
+
+    public int getTo() {
+        return _to;
+    }
+
+    public boolean isEqual( final PhylogenyData protein_domain ) {
+        if ( protein_domain == null ) {
+            return false;
+        }
+        if ( !( protein_domain instanceof ProteinDomain ) ) {
+            return false;
+        }
+        else if ( ( ( ProteinDomain ) protein_domain ).getLength() != getLength() ) {
+            return false;
+        }
+        else if ( !( ( ProteinDomain ) protein_domain ).getName().equals( getName() ) ) {
+            return false;
+        }
+        return true;
+    }
+
+    public StringBuffer toNHX() {
+        throw new UnsupportedOperationException();
+    }
+
+    public void toPhyloXML( final Writer writer, final int level, final String indentation ) throws IOException {
+        writer.write( ForesterUtil.LINE_SEPARATOR );
+        writer.write( indentation );
+        if ( getId() != null ) {
+            PhylogenyDataUtil.appendOpen( writer,
+                                          PhyloXmlMapping.SEQUENCE_DOMAIN_ARCHITECTURE_DOMAIN,
+                                          PhyloXmlMapping.SEQUENCE_DOMAIN_ARCHITECTURE_PROT_DOMAIN_FROM,
+                                          getFrom() + "",
+                                          PhyloXmlMapping.SEQUENCE_DOMAIN_ARCHITECTURE_PROT_DOMAIN_TO,
+                                          getTo() + "",
+                                          PhyloXmlMapping.SEQUENCE_DOMAIN_ARCHITECTURE_PROT_DOMAIN_CONFIDENCE,
+                                          getConfidence() + "",
+                                          PhyloXmlMapping.IDENTIFIER,
+                                          getId() );
+        }
+        else {
+            PhylogenyDataUtil.appendOpen( writer,
+                                          PhyloXmlMapping.SEQUENCE_DOMAIN_ARCHITECTURE_DOMAIN,
+                                          PhyloXmlMapping.SEQUENCE_DOMAIN_ARCHITECTURE_PROT_DOMAIN_FROM,
+                                          getFrom() + "",
+                                          PhyloXmlMapping.SEQUENCE_DOMAIN_ARCHITECTURE_PROT_DOMAIN_TO,
+                                          getTo() + "",
+                                          PhyloXmlMapping.SEQUENCE_DOMAIN_ARCHITECTURE_PROT_DOMAIN_CONFIDENCE,
+                                          getConfidence() + "" );
+        }
+        writer.write( getName() );
+        PhylogenyDataUtil.appendClose( writer, PhyloXmlMapping.SEQUENCE_DOMAIN_ARCHITECTURE_DOMAIN );
+    }
+
+    @Override
+    public String toString() {
+        return asText().toString();
+    }
+}
diff --git a/forester/java/src/org/forester/phylogeny/data/Reference.java b/forester/java/src/org/forester/phylogeny/data/Reference.java

new file mode 100644 (file)

index 0000000..01118a5
--- /dev/null
+++ b/forester/java/src/org/forester/phylogeny/data/Reference.java
@@ -0,0 +1,117 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org
+
+package org.forester.phylogeny.data;
+
+import java.io.IOException;
+import java.io.Writer;
+
+import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException;
+import org.forester.io.parsers.phyloxml.PhyloXmlMapping;
+import org.forester.io.parsers.phyloxml.PhyloXmlUtil;
+import org.forester.util.ForesterUtil;
+
+public class Reference implements PhylogenyData {
+
+    String _desc;
+    String _doi;
+
+    public Reference( final String desc ) {
+        _desc = desc;
+        _doi = "";
+    }
+
+    public Reference( final String desc, final String doi ) {
+        _desc = desc;
+        _doi = doi;
+    }
+
+    public StringBuffer asSimpleText() {
+        return new StringBuffer( getDescription() );
+    }
+
+    public StringBuffer asText() {
+        final StringBuffer sb = new StringBuffer();
+        if ( !ForesterUtil.isEmpty( getDoi() ) ) {
+            sb.append( "[doi:" );
+            sb.append( getDoi() );
+            sb.append( "] " );
+        }
+        sb.append( getDescription() );
+        return sb;
+    }
+
+    public PhylogenyData copy() {
+        return new Reference( getDescription(), getDoi() );
+    }
+
+    public String getDoi() {
+        return _doi;
+    }
+
+    public String getDescription() {
+        return _desc;
+    }
+
+    public boolean isEqual( final PhylogenyData data ) {
+        if ( ( data == null ) || ( getDescription() == null ) ) {
+            return false;
+        }
+        return ( ( Reference ) data ).getDescription().equals( getDescription() )
+                && ( ( Reference ) data ).getDoi().equals( getDoi() );
+    }
+
+    public void setDoi( final String doi ) {
+        if ( !ForesterUtil.isEmpty( doi ) && !PhyloXmlUtil.LIT_REF_DOI_PATTERN.matcher( doi ).matches() ) {
+            throw new PhyloXmlDataFormatException( "illegal doi: [" + doi + "]" );
+        }
+        _doi = doi;
+    }
+
+    public void setValue( final String value ) {
+        _desc = value;
+    }
+
+    public StringBuffer toNHX() {
+        throw new UnsupportedOperationException();
+    }
+
+    public void toPhyloXML( final Writer writer, final int level, final String indentation ) throws IOException {
+        writer.write( ForesterUtil.LINE_SEPARATOR );
+        writer.write( indentation );
+        PhylogenyDataUtil.appendOpen( writer, PhyloXmlMapping.REFERENCE, PhyloXmlMapping.REFERENCE_DOI_ATTR, getDoi() );
+        if ( !ForesterUtil.isEmpty( getDescription() ) ) {
+            PhylogenyDataUtil.appendElement( writer, PhyloXmlMapping.REFERENCE_DESC, getDescription(), indentation );
+        }
+        writer.write( ForesterUtil.LINE_SEPARATOR );
+        writer.write( indentation );
+        PhylogenyDataUtil.appendClose( writer, PhyloXmlMapping.REFERENCE );
+    }
+
+    @Override
+    public String toString() {
+        return asText().toString();
+    }
+}
\ No newline at end of file
diff --git a/forester/java/src/org/forester/phylogeny/data/Sequence.java b/forester/java/src/org/forester/phylogeny/data/Sequence.java

new file mode 100644 (file)

index 0000000..27b3b0c
--- /dev/null
+++ b/forester/java/src/org/forester/phylogeny/data/Sequence.java
@@ -0,0 +1,388 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.phylogeny.data;
+
+import java.io.IOException;
+import java.io.Writer;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.SortedSet;
+import java.util.TreeSet;
+
+import org.forester.io.parsers.nhx.NHXtags;
+import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException;
+import org.forester.io.parsers.phyloxml.PhyloXmlMapping;
+import org.forester.io.parsers.phyloxml.PhyloXmlUtil;
+import org.forester.io.writers.PhylogenyWriter;
+import org.forester.util.ForesterUtil;
+
+public class Sequence implements PhylogenyData, MultipleUris {
+
+    private String                 _mol_sequence;
+    private boolean                _mol_sequence_is_aligned;
+    private String                 _name;
+    private String                 _source_id;
+    private Accession              _accession;
+    private String                 _symbol;
+    private String                 _location;
+    private String                 _type;
+    private SortedSet<Annotation>  _annotations;
+    private DomainArchitecture     _da;
+    private List<Uri>              _uris;
+    private List<SequenceRelation> _seq_relations;
+
+    public Sequence() {
+        init();
+    }
+
+    public boolean isEmpty() {
+        return ( getAccession() == null ) && ForesterUtil.isEmpty( getName() ) && ForesterUtil.isEmpty( getSymbol() )
+                && ForesterUtil.isEmpty( getType() ) && ForesterUtil.isEmpty( getLocation() )
+                && ForesterUtil.isEmpty( getSourceId() ) && ForesterUtil.isEmpty( getMolecularSequence() )
+                && ( getDomainArchitecture() == null ) && ForesterUtil.isEmpty( _annotations )
+                && ForesterUtil.isEmpty( _uris ) && ForesterUtil.isEmpty( _seq_relations );
+    }
+
+    public void addAnnotation( final Annotation annotation ) {
+        getAnnotations().add( annotation );
+    }
+
+    public void addUri( final Uri uri ) {
+        if ( getUris() == null ) {
+            setUris( new ArrayList<Uri>() );
+        }
+        getUris().add( uri );
+    }
+
+    public void addSequenceRelation( final SequenceRelation sr ) {
+        _seq_relations.add( sr );
+    }
+
+    public StringBuffer asSimpleText() {
+        final StringBuffer sb = new StringBuffer();
+        if ( getAccession() != null ) {
+            sb.append( "[" );
+            sb.append( getAccession() );
+            sb.append( "] " );
+        }
+        if ( !ForesterUtil.isEmpty( getName() ) ) {
+            sb.append( getName() );
+            sb.append( " " );
+        }
+        if ( !ForesterUtil.isEmpty( getLocation() ) ) {
+            sb.append( getLocation() );
+        }
+        return sb;
+    }
+
+    public StringBuffer asText() {
+        return asSimpleText();
+    }
+
+    /**
+     * Not a deep copy.
+     * 
+     */
+    public PhylogenyData copy() {
+        final Sequence seq = new Sequence();
+        seq.setAnnotations( getAnnotations() );
+        seq.setName( getName() );
+        seq.setSymbol( getSymbol() );
+        seq.setMolecularSequence( getMolecularSequence() );
+        seq.setMolecularSequenceAligned( isMolecularSequenceAligned() );
+        seq.setLocation( getLocation() );
+        if ( getAccession() != null ) {
+            seq.setAccession( ( Accession ) getAccession().copy() );
+        }
+        else {
+            seq.setAccession( null );
+        }
+        seq.setType( getType() );
+        if ( getUris() != null ) {
+            seq.setUris( new ArrayList<Uri>() );
+            for( final Uri uri : getUris() ) {
+                if ( uri != null ) {
+                    seq.getUris().add( uri );
+                }
+            }
+        }
+        if ( getDomainArchitecture() != null ) {
+            seq.setDomainArchitecture( ( DomainArchitecture ) getDomainArchitecture().copy() );
+        }
+        else {
+            seq.setDomainArchitecture( null );
+        }
+        return seq;
+    }
+
+    @Override
+    public boolean equals( final Object o ) {
+        if ( this == o ) {
+            return true;
+        }
+        else if ( o == null ) {
+            return false;
+        }
+        else if ( o.getClass() != this.getClass() ) {
+            throw new IllegalArgumentException( "attempt to check [" + this.getClass() + "] equality to " + o + " ["
+                    + o.getClass() + "]" );
+        }
+        else {
+            return isEqual( ( Sequence ) o );
+        }
+    }
+
+    public Accession getAccession() {
+        return _accession;
+    }
+
+    public Annotation getAnnotation( final int i ) {
+        return ( Annotation ) getAnnotations().toArray()[ i ];
+    }
+
+    public SortedSet<Annotation> getAnnotations() {
+        if ( _annotations == null ) {
+            _annotations = new TreeSet<Annotation>();
+        }
+        return _annotations;
+    }
+
+    public DomainArchitecture getDomainArchitecture() {
+        return _da;
+    }
+
+    public String getLocation() {
+        return _location;
+    }
+
+    public String getMolecularSequence() {
+        return _mol_sequence;
+    }
+
+    public boolean isMolecularSequenceAligned() {
+        return _mol_sequence_is_aligned;
+    }
+
+    public String getName() {
+        return _name;
+    }
+
+    public List<SequenceRelation> getSequenceRelations() {
+        if ( _seq_relations == null ) {
+            _seq_relations = new ArrayList<SequenceRelation>();
+        }
+        return _seq_relations;
+    }
+
+    private void setSequenceRelations( final List<SequenceRelation> seq_relations ) {
+        _seq_relations = seq_relations;
+    }
+
+    public String getSourceId() {
+        return _source_id;
+    }
+
+    public String getSymbol() {
+        return _symbol;
+    }
+
+    public String getType() {
+        return _type;
+    }
+
+    public List<Uri> getUris() {
+        return _uris;
+    }
+
+    public Uri getUri( final int index ) {
+        return getUris().get( index );
+    }
+
+    @Override
+    public int hashCode() {
+        if ( getAccession() != null ) {
+            return getAccession().hashCode();
+        }
+        int result = getSymbol().hashCode();
+        if ( getName().length() > 0 ) {
+            result ^= getName().hashCode();
+        }
+        if ( getMolecularSequence().length() > 0 ) {
+            result ^= getMolecularSequence().hashCode();
+        }
+        return result;
+    }
+
+    public boolean hasSequenceRelations() {
+        return _seq_relations.size() > 0;
+    }
+
+    public void init() {
+        setAnnotations( null );
+        setName( "" );
+        setMolecularSequence( "" );
+        setMolecularSequenceAligned( false );
+        setLocation( "" );
+        setAccession( null );
+        setSymbol( "" );
+        setType( "" );
+        setDomainArchitecture( null );
+        setUris( null );
+        setSequenceRelations( null );
+        setSourceId( null );
+    }
+
+    public boolean isEqual( final PhylogenyData data ) {
+        if ( this == data ) {
+            return true;
+        }
+        final Sequence s = ( Sequence ) data;
+        if ( ( getAccession() != null ) && ( s.getAccession() != null ) ) {
+            return getAccession().isEqual( s.getAccession() );
+        }
+        return s.getMolecularSequence().equals( getMolecularSequence() ) && s.getName().equals( getName() )
+                && s.getSymbol().equals( getSymbol() );
+    }
+
+    public void setAccession( final Accession accession ) {
+        _accession = accession;
+    }
+
+    private void setAnnotations( final SortedSet<Annotation> annotations ) {
+        _annotations = annotations;
+    }
+
+    public void setDomainArchitecture( final DomainArchitecture ds ) {
+        _da = ds;
+    }
+
+    public void setLocation( final String description ) {
+        _location = description;
+    }
+
+    public void setMolecularSequence( final String mol_sequence ) {
+        _mol_sequence = mol_sequence;
+    }
+
+    public void setMolecularSequenceAligned( final boolean aligned ) {
+        _mol_sequence_is_aligned = aligned;
+    }
+
+    public void setName( final String name ) {
+        _name = name;
+    }
+
+    public void setSourceId( final String source_id ) {
+        _source_id = source_id;
+    }
+
+    public void setSymbol( final String symbol ) {
+        if ( !ForesterUtil.isEmpty( symbol ) && !PhyloXmlUtil.SEQUENCE_SYMBOL_PATTERN.matcher( symbol ).matches() ) {
+            throw new PhyloXmlDataFormatException( "illegal sequence symbol: [" + symbol + "]" );
+        }
+        _symbol = symbol;
+    }
+
+    public void setType( final String type ) {
+        if ( !ForesterUtil.isEmpty( type ) && !PhyloXmlUtil.SEQUENCE_TYPES.contains( type ) ) {
+            throw new PhyloXmlDataFormatException( "illegal sequence type: [" + type + "]" );
+        }
+        _type = type;
+    }
+
+    public void setUris( final List<Uri> uris ) {
+        _uris = uris;
+    }
+
+    public StringBuffer toNHX() {
+        final StringBuffer sb = new StringBuffer();
+        if ( getName().length() > 0 ) {
+            sb.append( ":" );
+            sb.append( NHXtags.GENE_NAME );
+            sb.append( ForesterUtil.replaceIllegalNhxCharacters( getName() ) );
+        }
+        if ( getAccession() != null ) {
+            getAccession().toNHX();
+        }
+        if ( getDomainArchitecture() != null ) {
+            sb.append( getDomainArchitecture().toNHX() );
+        }
+        return sb;
+    }
+
+    public void toPhyloXML( final Writer writer, final int level, final String indentation ) throws IOException {
+        if ( isEmpty() ) {
+            return;
+        }
+        final String my_ind = indentation + PhylogenyWriter.PHYLO_XML_INTENDATION_BASE;
+        writer.write( ForesterUtil.LINE_SEPARATOR );
+        writer.write( indentation );
+        PhylogenyDataUtil.appendOpen( writer, PhyloXmlMapping.SEQUENCE, PhyloXmlMapping.SEQUENCE_TYPE, getType() );
+        if ( !ForesterUtil.isEmpty( getSymbol() ) ) {
+            PhylogenyDataUtil.appendElement( writer, PhyloXmlMapping.SEQUENCE_SYMBOL, getSymbol(), indentation );
+        }
+        if ( ( getAccession() != null ) && !ForesterUtil.isEmpty( getAccession().getValue() ) ) {
+            getAccession().toPhyloXML( writer, level, indentation );
+        }
+        if ( !ForesterUtil.isEmpty( getName() ) ) {
+            PhylogenyDataUtil.appendElement( writer, PhyloXmlMapping.SEQUENCE_NAME, getName(), indentation );
+        }
+        if ( !ForesterUtil.isEmpty( getLocation() ) ) {
+            PhylogenyDataUtil.appendElement( writer, PhyloXmlMapping.SEQUENCE_LOCATION, getLocation(), indentation );
+        }
+        if ( !ForesterUtil.isEmpty( getMolecularSequence() ) ) {
+            PhylogenyDataUtil.appendElement( writer,
+                                             PhyloXmlMapping.SEQUENCE_MOL_SEQ,
+                                             getMolecularSequence(),
+                                             PhyloXmlMapping.SEQUENCE_MOL_SEQ_ALIGNED_ATTR,
+                                             String.valueOf( isMolecularSequenceAligned() ),
+                                             indentation );
+        }
+        if ( getUris() != null ) {
+            for( final Uri uri : getUris() ) {
+                if ( uri != null ) {
+                    uri.toPhyloXML( writer, level, indentation );
+                }
+            }
+        }
+        if ( _annotations != null ) {
+            for( final PhylogenyData annotation : getAnnotations() ) {
+                annotation.toPhyloXML( writer, level, my_ind );
+            }
+        }
+        if ( getDomainArchitecture() != null ) {
+            getDomainArchitecture().toPhyloXML( writer, level, my_ind );
+        }
+        writer.write( ForesterUtil.LINE_SEPARATOR );
+        writer.write( indentation );
+        PhylogenyDataUtil.appendClose( writer, PhyloXmlMapping.SEQUENCE );
+    }
+
+    @Override
+    public String toString() {
+        return asText().toString();
+    }
+}
diff --git a/forester/java/src/org/forester/phylogeny/data/SequenceRelation.java b/forester/java/src/org/forester/phylogeny/data/SequenceRelation.java

new file mode 100644 (file)

index 0000000..de50d59
--- /dev/null
+++ b/forester/java/src/org/forester/phylogeny/data/SequenceRelation.java
@@ -0,0 +1,149 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.phylogeny.data;
+
+import java.io.IOException;
+import java.io.Writer;
+import java.util.LinkedHashMap;
+import java.util.Map;
+
+public class SequenceRelation implements PhylogenyData {
+
+    //public final static Map<String, SEQUENCE_RELATION_TYPE> typesToNames = new LinkedHashMap<String, SEQUENCE_RELATION_TYPE>();
+    public final static Map<SEQUENCE_RELATION_TYPE, String> typesToNames                                = new LinkedHashMap<SEQUENCE_RELATION_TYPE, String>();
+    public final static String                              SEQUENCE_RELATION_TYPE_ORTHOLOGY            = "orthology";
+    public final static String                              SEQUENCE_RELATION_TYPE_ONE_TO_ONE_ORTHOLOGY = "one_to_one_orthology";
+    public final static String                              SEQUENCE_RELATION_TYPE_SUPER_ORTHOLOGY      = "super_orthology";
+    public final static String                              SEQUENCE_RELATION_TYPE_PARALOGY             = "paralogy";
+    public final static String                              SEQUENCE_RELATION_TYPE_ULTRA_PARALOGY       = "ultra_paralogy";
+    public final static String                              SEQUENCE_RELATION_TYPE_XENOLOGY             = "xenology";
+    public final static String                              SEQUENCE_RELATION_TYPE_UNKNOWN              = "unknown";
+    public final static String                              SEQUENCE_RELATION_TYPE_OTHER                = "other";
+    private Sequence                                        ref0;
+    private Sequence                                        ref1;
+    private SEQUENCE_RELATION_TYPE                          type;
+    private Double                                          distance;
+    private Confidence                                      confidence;
+    static {
+        typesToNames.put( SEQUENCE_RELATION_TYPE.orthology, SEQUENCE_RELATION_TYPE_ORTHOLOGY );
+        typesToNames.put( SEQUENCE_RELATION_TYPE.one_to_one_orthology, SEQUENCE_RELATION_TYPE_ONE_TO_ONE_ORTHOLOGY );
+        typesToNames.put( SEQUENCE_RELATION_TYPE.super_orthology, SEQUENCE_RELATION_TYPE_SUPER_ORTHOLOGY );
+        typesToNames.put( SEQUENCE_RELATION_TYPE.paralogy, SEQUENCE_RELATION_TYPE_PARALOGY );
+        typesToNames.put( SEQUENCE_RELATION_TYPE.ultra_paralogy, SEQUENCE_RELATION_TYPE_ULTRA_PARALOGY );
+        typesToNames.put( SEQUENCE_RELATION_TYPE.xenology, SEQUENCE_RELATION_TYPE_XENOLOGY );
+        typesToNames.put( SEQUENCE_RELATION_TYPE.unknown, SEQUENCE_RELATION_TYPE_UNKNOWN );
+        typesToNames.put( SEQUENCE_RELATION_TYPE.other, SEQUENCE_RELATION_TYPE_OTHER );
+    }
+
+    @Override
+    public StringBuffer asSimpleText() {
+        // TODO Auto-generated method stub
+        return null;
+    }
+
+    @Override
+    public StringBuffer asText() {
+        // TODO Auto-generated method stub
+        return null;
+    }
+
+    @Override
+    public PhylogenyData copy() {
+        // TODO Auto-generated method stub
+        return null;
+    }
+
+    public Confidence getConfidence() {
+        return confidence;
+    }
+
+    public Double getDistance() {
+        return distance;
+    }
+
+    public Sequence getRef0() {
+        return ref0;
+    }
+
+    public Sequence getRef1() {
+        return ref1;
+    }
+
+    public SEQUENCE_RELATION_TYPE getType() {
+        return type;
+    }
+
+    @Override
+    public boolean isEqual( final PhylogenyData data ) {
+        // TODO Auto-generated method stub
+        return false;
+    }
+
+    public void setConfidence( final Confidence confidence ) {
+        this.confidence = confidence;
+    }
+
+    public void setDistance( final Double distance ) {
+        this.distance = distance;
+    }
+
+    public void setRef0( final Sequence ref0 ) {
+        this.ref0 = ref0;
+    }
+
+    public void setRef1( final Sequence ref1 ) {
+        this.ref1 = ref1;
+    }
+
+    public void setType( final SEQUENCE_RELATION_TYPE type ) {
+        this.type = type;
+    }
+
+    @Override
+    public StringBuffer toNHX() {
+        throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public void toPhyloXML( final Writer writer, final int level, final String indentation ) throws IOException {
+        // TODO Auto-generated method stub
+    }
+
+    public static String getPrintableNameByType( final SEQUENCE_RELATION_TYPE type ) {
+        String s = typesToNames.get( type );
+        if ( s != null ) {
+            s = s.replace( '_', ' ' );
+            if ( ( s.length() > 15 ) && s.toLowerCase().endsWith( "ology" ) ) {
+                s = s.substring( 0, s.length() - 5 ) + ".";
+            }
+        }
+        return s;
+    }
+
+    public static enum SEQUENCE_RELATION_TYPE {
+        orthology, one_to_one_orthology, super_orthology, paralogy, ultra_paralogy, xenology, unknown, other;
+    }
+}
diff --git a/forester/java/src/org/forester/phylogeny/data/Taxonomy.java b/forester/java/src/org/forester/phylogeny/data/Taxonomy.java

new file mode 100644 (file)

index 0000000..026d68b
--- /dev/null
+++ b/forester/java/src/org/forester/phylogeny/data/Taxonomy.java
@@ -0,0 +1,394 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.phylogeny.data;
+
+import java.io.IOException;
+import java.io.Writer;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.forester.io.parsers.nhx.NHXtags;
+import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException;
+import org.forester.io.parsers.phyloxml.PhyloXmlMapping;
+import org.forester.io.parsers.phyloxml.PhyloXmlUtil;
+import org.forester.util.ForesterUtil;
+
+public class Taxonomy implements PhylogenyData, MultipleUris, Comparable<Taxonomy> {
+
+    private String       _scientific_name;
+    private String       _common_name;
+    private List<String> _synonyms;
+    private String       _authority;
+    private Identifier   _identifier;
+    private String       _taxonomy_code;
+    private String       _rank;
+    private List<Uri>    _uris;
+
+    public Taxonomy() {
+        init();
+    }
+
+    public StringBuffer asSimpleText() {
+        return asText();
+    }
+
+    public Uri getUri( final int index ) {
+        return getUris().get( index );
+    }
+
+    public void addUri( final Uri uri ) {
+        if ( getUris() == null ) {
+            setUris( new ArrayList<Uri>() );
+        }
+        getUris().add( uri );
+    }
+
+    public StringBuffer asText() {
+        final StringBuffer sb = new StringBuffer();
+        if ( getIdentifier() != null ) {
+            sb.append( "[" );
+            sb.append( getIdentifier().asSimpleText() );
+            sb.append( "]" );
+        }
+        if ( !ForesterUtil.isEmpty( getTaxonomyCode() ) ) {
+            if ( sb.length() > 0 ) {
+                sb.append( " " );
+            }
+            sb.append( "[" );
+            sb.append( getTaxonomyCode() );
+            sb.append( "]" );
+        }
+        if ( !ForesterUtil.isEmpty( getScientificName() ) ) {
+            if ( sb.length() > 0 ) {
+                sb.append( " " );
+            }
+            sb.append( getScientificName() );
+            if ( !ForesterUtil.isEmpty( getAuthority() ) ) {
+                sb.append( " (" );
+                sb.append( getAuthority() );
+                sb.append( ")" );
+            }
+        }
+        if ( !ForesterUtil.isEmpty( getCommonName() ) ) {
+            if ( sb.length() > 0 ) {
+                sb.append( " " );
+            }
+            sb.append( getCommonName() );
+        }
+        return sb;
+    }
+
+    public PhylogenyData copy() {
+        final Taxonomy t = new Taxonomy();
+        t.setTaxonomyCode( getTaxonomyCode() );
+        t.setScientificName( getScientificName() );
+        t.setCommonName( getCommonName() );
+        t.setAuthority( getAuthority() );
+        for( final String syn : getSynonyms() ) {
+            t.getSynonyms().add( syn );
+        }
+        if ( getIdentifier() != null ) {
+            t.setIdentifier( ( Identifier ) getIdentifier().copy() );
+        }
+        else {
+            t.setIdentifier( null );
+        }
+        t.setRank( new String( getRank() ) );
+        if ( getUris() != null ) {
+            t.setUris( new ArrayList<Uri>() );
+            for( final Uri uri : getUris() ) {
+                if ( uri != null ) {
+                    t.getUris().add( uri );
+                }
+            }
+        }
+        return t;
+    }
+
+    @Override
+    public boolean equals( final Object o ) {
+        if ( this == o ) {
+            return true;
+        }
+        else if ( o == null ) {
+            return false;
+        }
+        else if ( o.getClass() != this.getClass() ) {
+            throw new IllegalArgumentException( "attempt to check [" + this.getClass() + "] equality to " + o + " ["
+                    + o.getClass() + "]" );
+        }
+        else {
+            return isEqual( ( Taxonomy ) o );
+        }
+    }
+
+    public String getAuthority() {
+        return _authority;
+    }
+
+    public String getCommonName() {
+        return _common_name;
+    }
+
+    public Identifier getIdentifier() {
+        return _identifier;
+    }
+
+    public String getRank() {
+        return _rank;
+    }
+
+    public String getScientificName() {
+        return _scientific_name;
+    }
+
+    public List<String> getSynonyms() {
+        if ( _synonyms == null ) {
+            _synonyms = new ArrayList<String>();
+        }
+        return _synonyms;
+    }
+
+    public String getTaxonomyCode() {
+        return _taxonomy_code;
+    }
+
+    public List<Uri> getUris() {
+        return _uris;
+    }
+
+    @Override
+    public int hashCode() {
+        if ( getIdentifier() != null ) {
+            return getIdentifier().hashCode();
+        }
+        else if ( !ForesterUtil.isEmpty( getTaxonomyCode() ) ) {
+            return getTaxonomyCode().hashCode();
+        }
+        else if ( !ForesterUtil.isEmpty( getScientificName() ) ) {
+            if ( !ForesterUtil.isEmpty( getAuthority() ) ) {
+                return ( getScientificName().toLowerCase() + getAuthority().toLowerCase() ).hashCode();
+            }
+            return getScientificName().toLowerCase().hashCode();
+        }
+        else {
+            return getCommonName().toLowerCase().hashCode();
+        }
+    }
+
+    public void init() {
+        setScientificName( "" );
+        setCommonName( "" );
+        setIdentifier( null );
+        setRank( "" );
+        setTaxonomyCode( "" );
+        setAuthority( "" );
+        setSynonyms( null );
+        setUris( null );
+    }
+
+    public boolean isEmpty() {
+        return ( ( getIdentifier() == null ) && ForesterUtil.isEmpty( getTaxonomyCode() )
+                && ForesterUtil.isEmpty( getCommonName() ) && ForesterUtil.isEmpty( getScientificName() )
+                && ForesterUtil.isEmpty( getRank() ) && ForesterUtil.isEmpty( _uris )
+                && ForesterUtil.isEmpty( getAuthority() ) && ForesterUtil.isEmpty( _synonyms ) );
+    }
+
+    /**
+     * 
+     * If this and taxonomy 'data' has an identifier, comparison will be based on that.
+     * Otherwise,  if this and taxonomy 'data' has a code, comparison will be based on that.
+     * Otherwise,  if Taxonomy 'data' has a scientific name, comparison will be
+     * based on that (case insensitive!).
+     * Otherwise,  if Taxonomy 'data' has a common  name, comparison will be
+     * based on that (case insensitive!).
+     * (Note. This is important and should not be change without a very good reason.)
+     * 
+     */
+    public boolean isEqual( final PhylogenyData data ) {
+        if ( this == data ) {
+            return true;
+        }
+        final Taxonomy tax = ( Taxonomy ) data;
+        if ( ( getIdentifier() != null ) && ( tax.getIdentifier() != null ) ) {
+            return getIdentifier().isEqual( tax.getIdentifier() );
+        }
+        else if ( !ForesterUtil.isEmpty( getTaxonomyCode() ) && !ForesterUtil.isEmpty( tax.getTaxonomyCode() ) ) {
+            return getTaxonomyCode().equals( tax.getTaxonomyCode() );
+        }
+        else if ( !ForesterUtil.isEmpty( getScientificName() ) && !ForesterUtil.isEmpty( tax.getScientificName() ) ) {
+            if ( !ForesterUtil.isEmpty( getAuthority() ) && !ForesterUtil.isEmpty( tax.getAuthority() ) ) {
+                return ( getScientificName().equalsIgnoreCase( tax.getScientificName() ) )
+                        && ( getAuthority().equalsIgnoreCase( tax.getAuthority() ) );
+            }
+            return getScientificName().equalsIgnoreCase( tax.getScientificName() );
+        }
+        else if ( !ForesterUtil.isEmpty( getCommonName() ) && !ForesterUtil.isEmpty( tax.getCommonName() ) ) {
+            return getCommonName().equalsIgnoreCase( tax.getCommonName() );
+        }
+        else if ( !ForesterUtil.isEmpty( getScientificName() ) && !ForesterUtil.isEmpty( tax.getCommonName() ) ) {
+            return getScientificName().equalsIgnoreCase( tax.getCommonName() );
+        }
+        else if ( !ForesterUtil.isEmpty( getCommonName() ) && !ForesterUtil.isEmpty( tax.getScientificName() ) ) {
+            return getCommonName().equalsIgnoreCase( tax.getScientificName() );
+        }
+        throw new RuntimeException( "comparison not possible with empty fields" );
+    }
+
+    public void setAuthority( final String authority ) {
+        _authority = authority;
+    }
+
+    public void setCommonName( final String common_name ) {
+        _common_name = common_name;
+    }
+
+    public void setIdentifier( final Identifier identifier ) {
+        _identifier = identifier;
+    }
+
+    public void setRank( final String rank ) {
+        if ( !ForesterUtil.isEmpty( rank ) && !PhyloXmlUtil.TAXONOMY_RANKS.contains( rank ) ) {
+            throw new PhyloXmlDataFormatException( "illegal rank: [" + rank + "]" );
+        }
+        _rank = rank;
+    }
+
+    public void setScientificName( final String scientific_name ) {
+        _scientific_name = scientific_name;
+    }
+
+    private void setSynonyms( final List<String> synonyms ) {
+        _synonyms = synonyms;
+    }
+
+    public void setTaxonomyCode( final String taxonomy_code ) {
+        if ( !ForesterUtil.isEmpty( taxonomy_code )
+                && !PhyloXmlUtil.TAXOMONY_CODE_PATTERN.matcher( taxonomy_code ).matches() ) {
+            throw new PhyloXmlDataFormatException( "illegal taxonomy code: [" + taxonomy_code + "]" );
+        }
+        _taxonomy_code = taxonomy_code;
+    }
+
+    public void setUris( final List<Uri> uris ) {
+        _uris = uris;
+    }
+
+    public StringBuffer toNHX() {
+        final StringBuffer sb = new StringBuffer();
+        if ( getIdentifier() != null ) {
+            sb.append( ':' + NHXtags.TAXONOMY_ID );
+            sb.append( ForesterUtil.replaceIllegalNhxCharacters( getIdentifier().getValue() ) );
+        }
+        final StringBuffer species = new StringBuffer();
+        if ( !ForesterUtil.isEmpty( getTaxonomyCode() ) ) {
+            species.append( ForesterUtil.replaceIllegalNhxCharacters( getTaxonomyCode() ) );
+        }
+        if ( !ForesterUtil.isEmpty( getScientificName() ) ) {
+            ForesterUtil.appendSeparatorIfNotEmpty( species, '|' );
+            species.append( ForesterUtil.replaceIllegalNhxCharacters( getScientificName() ) );
+        }
+        if ( !ForesterUtil.isEmpty( getCommonName() ) ) {
+            ForesterUtil.appendSeparatorIfNotEmpty( species, '|' );
+            species.append( ForesterUtil.replaceIllegalNhxCharacters( getCommonName() ) );
+        }
+        if ( species.length() > 0 ) {
+            sb.append( ':' + NHXtags.SPECIES_NAME );
+            sb.append( species );
+        }
+        return sb;
+    }
+
+    public void toPhyloXML( final Writer writer, final int level, final String indentation ) throws IOException {
+        if ( isEmpty() ) {
+            return;
+        }
+        writer.write( ForesterUtil.LINE_SEPARATOR );
+        writer.write( indentation );
+        PhylogenyDataUtil.appendOpen( writer, PhyloXmlMapping.TAXONOMY );
+        if ( ( getIdentifier() != null ) && !ForesterUtil.isEmpty( getIdentifier().getValue() ) ) {
+            getIdentifier().toPhyloXML( writer, level, indentation );
+        }
+        if ( !ForesterUtil.isEmpty( getTaxonomyCode() ) ) {
+            PhylogenyDataUtil.appendElement( writer, PhyloXmlMapping.TAXONOMY_CODE, getTaxonomyCode(), indentation );
+        }
+        if ( !ForesterUtil.isEmpty( getScientificName() ) ) {
+            PhylogenyDataUtil.appendElement( writer,
+                                             PhyloXmlMapping.TAXONOMY_SCIENTIFIC_NAME,
+                                             getScientificName(),
+                                             indentation );
+        }
+        if ( !ForesterUtil.isEmpty( getAuthority() ) ) {
+            PhylogenyDataUtil.appendElement( writer, PhyloXmlMapping.TAXONOMY_AUTHORITY, getAuthority(), indentation );
+        }
+        if ( !ForesterUtil.isEmpty( getCommonName() ) ) {
+            PhylogenyDataUtil
+                    .appendElement( writer, PhyloXmlMapping.TAXONOMY_COMMON_NAME, getCommonName(), indentation );
+        }
+        if ( _synonyms != null ) {
+            for( final String syn : getSynonyms() ) {
+                if ( !ForesterUtil.isEmpty( syn ) ) {
+                    PhylogenyDataUtil.appendElement( writer, PhyloXmlMapping.TAXONOMY_SYNONYM, syn, indentation );
+                }
+            }
+        }
+        if ( !ForesterUtil.isEmpty( getRank() ) ) {
+            PhylogenyDataUtil.appendElement( writer, PhyloXmlMapping.TAXONOMY_RANK, getRank(), indentation );
+        }
+        if ( getUris() != null ) {
+            for( final Uri uri : getUris() ) {
+                if ( uri != null ) {
+                    uri.toPhyloXML( writer, level, indentation );
+                }
+            }
+        }
+        writer.write( ForesterUtil.LINE_SEPARATOR );
+        writer.write( indentation );
+        PhylogenyDataUtil.appendClose( writer, PhyloXmlMapping.TAXONOMY );
+    }
+
+    @Override
+    public String toString() {
+        return asText().toString();
+    }
+
+    @Override
+    public int compareTo( final Taxonomy o ) {
+        if ( equals( o ) ) {
+            return 0;
+        }
+        else if ( !ForesterUtil.isEmpty( getScientificName() ) && !ForesterUtil.isEmpty( o.getScientificName() ) ) {
+            return getScientificName().compareToIgnoreCase( o.getScientificName() );
+        }
+        else if ( !ForesterUtil.isEmpty( getCommonName() ) && !ForesterUtil.isEmpty( o.getCommonName() ) ) {
+            return getCommonName().compareToIgnoreCase( o.getCommonName() );
+        }
+        else if ( !ForesterUtil.isEmpty( getTaxonomyCode() ) && !ForesterUtil.isEmpty( o.getTaxonomyCode() ) ) {
+            return getTaxonomyCode().compareToIgnoreCase( o.getTaxonomyCode() );
+        }
+        return 0;
+    }
+}
diff --git a/forester/java/src/org/forester/phylogeny/data/Uri.java b/forester/java/src/org/forester/phylogeny/data/Uri.java

new file mode 100644 (file)

index 0000000..06559fa
--- /dev/null
+++ b/forester/java/src/org/forester/phylogeny/data/Uri.java
@@ -0,0 +1,127 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.phylogeny.data;
+
+import java.io.IOException;
+import java.io.Writer;
+import java.net.URI;
+
+import org.forester.io.parsers.phyloxml.PhyloXmlMapping;
+
+public class Uri implements PhylogenyData {
+
+    final private URI    _uri;
+    final private String _description;
+    final private String _type;
+
+    public Uri( final String uri_str, final String description, final String type ) {
+        if ( uri_str == null ) {
+            throw new IllegalArgumentException( "attempt to create Uri from null" );
+        }
+        _uri = URI.create( uri_str );
+        _description = description;
+        _type = type;
+    }
+
+    public Uri( final URI uri ) {
+        if ( uri == null ) {
+            throw new IllegalArgumentException( "attempt to create Uri from null URI" );
+        }
+        _uri = uri;
+        _description = "";
+        _type = "";
+    }
+
+    public Uri( final URI uri, final String description, final String type ) {
+        if ( uri == null ) {
+            throw new IllegalArgumentException( "attempt to create Uri from null URI" );
+        }
+        _uri = uri;
+        _description = description;
+        _type = type;
+    }
+
+    @Override
+    public StringBuffer asSimpleText() {
+        return new StringBuffer( getValue().toString() );
+    }
+
+    @Override
+    public StringBuffer asText() {
+        final StringBuffer sb = new StringBuffer();
+        sb.append( "[" );
+        sb.append( getDescription() );
+        sb.append( " " );
+        sb.append( getType() );
+        sb.append( "] " );
+        sb.append( getValue().toString() );
+        return sb;
+    }
+
+    @Override
+    public PhylogenyData copy() {
+        return new Uri( getValue().toString(), new String( getDescription() ), new String( getType() ) );
+    }
+
+    public String getDescription() {
+        return _description;
+    }
+
+    public String getType() {
+        return _type;
+    }
+
+    public URI getValue() {
+        return _uri;
+    }
+
+    @Override
+    public boolean isEqual( final PhylogenyData data ) {
+        throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public StringBuffer toNHX() {
+        throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public void toPhyloXML( final Writer writer, final int level, final String indentation ) throws IOException {
+        PhylogenyDataUtil.appendElement( writer,
+                                         PhyloXmlMapping.URI,
+                                         getValue().toString(),
+                                         PhyloXmlMapping.TYPE_ATTR,
+                                         getType(),
+                                         PhyloXmlMapping.URI_DESC_ATTR,
+                                         getDescription(),
+                                         indentation );
+    }
+
+    @Override
+    public String toString() {
+        return asSimpleText().toString();
+    }
+}
diff --git a/forester/java/src/org/forester/phylogeny/factories/BasicPhylogenyFactory.java b/forester/java/src/org/forester/phylogeny/factories/BasicPhylogenyFactory.java

new file mode 100644 (file)

index 0000000..0d37507
--- /dev/null
+++ b/forester/java/src/org/forester/phylogeny/factories/BasicPhylogenyFactory.java
@@ -0,0 +1,46 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.phylogeny.factories;
+
+import java.io.IOException;
+
+import org.forester.phylogeny.Phylogeny;
+
+/*
+ * Convinience class for PhylogenyFactories not using parameters.
+ * 
+ * @author Christian M. Zmasek
+ */
+public abstract class BasicPhylogenyFactory implements PhylogenyFactory {
+
+    public Phylogeny create() {
+        return new Phylogeny();
+    }
+
+    public Phylogeny[] create( final Object source, final Object creator ) throws IOException {
+        return create( source, creator, null );
+    }
+}
diff --git a/forester/java/src/org/forester/phylogeny/factories/ParserBasedPhylogenyFactory.java b/forester/java/src/org/forester/phylogeny/factories/ParserBasedPhylogenyFactory.java

new file mode 100644 (file)

index 0000000..eab4efa
--- /dev/null
+++ b/forester/java/src/org/forester/phylogeny/factories/ParserBasedPhylogenyFactory.java
@@ -0,0 +1,89 @@
+// $Id:
+// $
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.phylogeny.factories;
+
+import java.io.IOException;
+import java.util.List;
+
+import org.forester.io.parsers.PhylogenyParser;
+import org.forester.io.parsers.phyloxml.PhyloXmlParser;
+import org.forester.phylogeny.Phylogeny;
+import org.forester.util.ForesterUtil;
+
+public class ParserBasedPhylogenyFactory extends BasicPhylogenyFactory {
+
+    private final static PhylogenyFactory _instance;
+    static {
+        try {
+            _instance = new ParserBasedPhylogenyFactory();
+        }
+        catch ( final Throwable e ) {
+            throw new RuntimeException( e.getMessage() );
+        }
+    }
+
+    private ParserBasedPhylogenyFactory() {
+        // Private constructor.
+    }
+
+    @Override
+    public Object clone() throws CloneNotSupportedException {
+        throw new CloneNotSupportedException();
+    }
+
+    public synchronized Phylogeny[] create( final Object source, final Object parser, final List<Object> parameters )
+            throws IOException {
+        if ( !( parser instanceof PhylogenyParser ) ) {
+            throw new IllegalArgumentException( "attempt to use object of type other than PhylogenyParser as creator for ParserBasedPhylogenyFactory" );
+        }
+        final PhylogenyParser my_parser = ( PhylogenyParser ) parser;
+        my_parser.setSource( source );
+        return my_parser.parse();
+    }
+
+    public synchronized Phylogeny[] create( final Object source,
+                                            final Object parser,
+                                            final String schema_location,
+                                            final List<Object> parameters ) throws IOException {
+        if ( !( parser instanceof PhylogenyParser ) ) {
+            throw new IllegalArgumentException( "attempt to use object of type other than PhylogenyParser as creator for ParserBasedPhylogenyFactory." );
+        }
+        if ( !( parser instanceof PhyloXmlParser ) ) {
+            throw new IllegalArgumentException( "attempt to use schema location with other than phyloXML parser" );
+        }
+        final PhyloXmlParser xml_parser = ( PhyloXmlParser ) parser;
+        if ( !ForesterUtil.isEmpty( schema_location ) ) {
+            xml_parser.setValidateAgainstSchema( schema_location );
+        }
+        xml_parser.setSource( source );
+        return xml_parser.parse();
+    }
+
+    public static PhylogenyFactory getInstance() {
+        return _instance;
+    }
+}
diff --git a/forester/java/src/org/forester/phylogeny/factories/PhylogenyFactory.java b/forester/java/src/org/forester/phylogeny/factories/PhylogenyFactory.java

new file mode 100644 (file)

index 0000000..ce90f03
--- /dev/null
+++ b/forester/java/src/org/forester/phylogeny/factories/PhylogenyFactory.java
@@ -0,0 +1,77 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.phylogeny.factories;
+
+import java.io.IOException;
+import java.util.List;
+
+import org.forester.phylogeny.Phylogeny;
+
+/*
+ * Interface for Phylogeny factories.
+ * 
+ * @author Christian M. Zmasek
+ */
+public interface PhylogenyFactory {
+
+    /**
+     * This must be implemented in such a way that it returns an empty
+     * Phylogeny.
+     * 
+     * @return an empty Phylogeny
+     */
+    public Phylogeny create();
+
+    /**
+     * This must create a Phylogeny from source (e.g. an XML file, an alignment,
+     * pairwise distances) by using creator (e.g. an XML file parser, an
+     * algorithm implementation).
+     * 
+     * @param source
+     *            a source to create a Phylogeny from
+     * @param creator
+     *            a means to create a Phylogeny
+     * @return a Phylogeny[] based on argument source
+     * @throws IOException
+     */
+    public Phylogeny[] create( Object source, Object creator ) throws IOException;
+
+    /**
+     * This must create a Phylogeny from source (e.g. an XML file, an alignment,
+     * pairwise distances) by using creator (e.g. an XML file parser, an
+     * algorithm implementation) with parameters listed in parameters.
+     * 
+     * @param source
+     *            a source to create a Phylogeny from
+     * @param creator
+     *            a means to create a Phylogeny
+     * @param parameters
+     *            a List of parameters for Phylogeny creation
+     * @return a Phylogeny[] based on argument source
+     * @throws IOException
+     */
+    public Phylogeny[] create( Object source, Object creator, List<Object> parameters ) throws IOException;
+} // PhylogenyFactory
diff --git a/forester/java/src/org/forester/phylogeny/iterators/ChildNodeIteratorForward.java b/forester/java/src/org/forester/phylogeny/iterators/ChildNodeIteratorForward.java

new file mode 100644 (file)

index 0000000..98bbf59
--- /dev/null
+++ b/forester/java/src/org/forester/phylogeny/iterators/ChildNodeIteratorForward.java
@@ -0,0 +1,141 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.phylogeny.iterators;
+
+import java.util.NoSuchElementException;
+
+import org.forester.phylogeny.PhylogenyNode;
+
+/*
+ * An iterator to forward iterate over child nodes of a PhylogenyNode. Created:
+ * 10/23/2005 by Christian M. Zmasek. Last modified: 12/28/2006 by Christian M.
+ * Zmasek.
+ * 
+ * @author Christian M. Zmasek
+ * 
+ * @version 1.000
+ */
+public class ChildNodeIteratorForward implements PhylogenyNodeIterator {
+
+    // Instance variables
+    // ------------------
+    private int                 _i;
+    final private PhylogenyNode _node;
+
+    // Constructor
+    // -----------
+    /**
+     * Creates a new ChildNodeIteratorForward.
+     * 
+     * @param node
+     *            the parent of the PhylogenyNodes to iterate over.
+     * @throws IllegalArgumentException
+     *             if node has no child nodes
+     */
+    public ChildNodeIteratorForward( final PhylogenyNode node ) throws IllegalArgumentException {
+        if ( node.getNumberOfDescendants() < 1 ) {
+            throw new IllegalArgumentException( "Attempt to use ChildNodeIteratorForward on node with no child nodes." );
+        }
+        _node = node;
+        reset();
+    }
+
+    // Private methods
+    // ---------------
+    /**
+     * Returns the counter.
+     */
+    private int getI() {
+        return _i;
+    }
+
+    /**
+     * Returns the parent of the nodes to iterate over.
+     * 
+     * @return the parent of the nodes to iterate over.
+     */
+    private PhylogenyNode getNode() {
+        return _node;
+    }
+
+    // Public methods
+    // --------------
+    /**
+     * Returns true is this iterator has at least one more element, false
+     * otherwise.
+     * 
+     * @return true is this iterator has at least one more element, false
+     *         otherwise
+     */
+    public boolean hasNext() {
+        return ( getI() < getNode().getNumberOfDescendants() );
+    }
+
+    /**
+     * Increases the counter by one.
+     */
+    private void increaseI() {
+        ++_i;
+    }
+
+    /**
+     * Returns the next PhylogenyNode.
+     * 
+     * @return the next PhylogenyNode
+     * @throws NoSuchElementException
+     *             if iteration is complete
+     */
+    public PhylogenyNode next() throws NoSuchElementException {
+        if ( !hasNext() ) {
+            throw new NoSuchElementException( "Attempt to call \"next()\" on iterator which has no more next elements." );
+        }
+        final PhylogenyNode n = getNode().getChildNode( getI() );
+        increaseI();
+        return n;
+    }
+
+    /**
+     * Not supported.
+     * 
+     */
+    public void remove() {
+        throw new UnsupportedOperationException();
+    }
+
+    /**
+     * Resets the iterator.
+     */
+    public void reset() {
+        setI( 0 );
+    }
+
+    /**
+     * Sets the counter.
+     */
+    private void setI( final int i ) {
+        _i = i;
+    }
+} // end of class ChildNodeIteratorForward.
diff --git a/forester/java/src/org/forester/phylogeny/iterators/ExternalForwardIterator.java b/forester/java/src/org/forester/phylogeny/iterators/ExternalForwardIterator.java

new file mode 100644 (file)

index 0000000..f75e6f7
--- /dev/null
+++ b/forester/java/src/org/forester/phylogeny/iterators/ExternalForwardIterator.java
@@ -0,0 +1,119 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.phylogeny.iterators;
+
+import java.util.NoSuchElementException;
+
+import org.forester.phylogeny.Phylogeny;
+import org.forester.phylogeny.PhylogenyNode;
+
+/*
+ * @author Christian Zmasek
+ */
+public class ExternalForwardIterator implements PhylogenyNodeIterator {
+
+    private PhylogenyNode       _current_node;
+    private final PhylogenyNode _last_ext_node;
+    private final PhylogenyNode _first_ext_node;
+
+    /**
+     * Constructor for ExternalForwardIterator.
+     * 
+     * @param tree
+     *            the tree on which to iterate over all external nodes.
+     */
+    public ExternalForwardIterator( final Phylogeny phylogeny ) throws IllegalArgumentException {
+        if ( phylogeny.isEmpty() ) {
+            throw new IllegalArgumentException( "Attempt to use ExternalForwardIterator on an empty phylogeny." );
+        }
+        PhylogenyNode n = phylogeny.getRoot();
+        while ( !n.isExternal() ) {
+            n = n.getLastChildNode();
+        }
+        _last_ext_node = n;
+        _first_ext_node = phylogeny.getFirstExternalNode();
+        reset();
+    }
+
+    private PhylogenyNode getCurrentNode() {
+        return _current_node;
+    }
+
+    private PhylogenyNode getFirstExtNode() {
+        return _first_ext_node;
+    }
+
+    private PhylogenyNode getLastExtNode() {
+        return _last_ext_node;
+    }
+
+    /*
+     * (non-Javadoc)
+     * 
+     * @see java.util.Iterator#hasNext()
+     */
+    public boolean hasNext() {
+        return getCurrentNode() != null;
+    }
+
+    /*
+     * (non-Javadoc)
+     * 
+     * @see java.util.Iterator#next()
+     */
+    public PhylogenyNode next() throws NoSuchElementException {
+        if ( !hasNext() ) {
+            throw new NoSuchElementException( "Attempt to call \"next()\" on iterator which has no more next elements." );
+        }
+        final PhylogenyNode n = getCurrentNode();
+        if ( n == getLastExtNode() ) {
+            setCurrentNode( null );
+        }
+        else {
+            setCurrentNode( n.getNextExternalNode() );
+        }
+        return n;
+    }
+
+    /**
+     * Not supported.
+     * 
+     */
+    public void remove() {
+        throw new UnsupportedOperationException();
+    }
+
+    /**
+     * DOCUMENT ME!
+     */
+    public void reset() {
+        setCurrentNode( getFirstExtNode() );
+    }
+
+    private void setCurrentNode( final PhylogenyNode current_node ) {
+        _current_node = current_node;
+    }
+} // end of class ExternalForwardIterator
diff --git a/forester/java/src/org/forester/phylogeny/iterators/LevelOrderTreeIterator.java b/forester/java/src/org/forester/phylogeny/iterators/LevelOrderTreeIterator.java

new file mode 100644 (file)

index 0000000..1b1450f
--- /dev/null
+++ b/forester/java/src/org/forester/phylogeny/iterators/LevelOrderTreeIterator.java
@@ -0,0 +1,147 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.phylogeny.iterators;
+
+import java.util.NoSuchElementException;
+
+import org.forester.datastructures.Queue;
+import org.forester.phylogeny.Phylogeny;
+import org.forester.phylogeny.PhylogenyNode;
+
+/*
+ * An iterator to iterate a Phylogeny in level order.
+ * 
+ * Created: 10/23/2005 by Christian M. Zmasek. Last modified: 10/23/2005 by
+ * Christian M. Zmasek.
+ * 
+ * @author Christian M. Zmasek
+ * 
+ * @version 1.000
+ */
+public class LevelOrderTreeIterator implements PhylogenyNodeIterator {
+
+    // Instance variables
+    // ------------------
+    private final Queue         _queue;
+    private final PhylogenyNode _root;
+
+    // Constructors
+    // ------------
+    /**
+     * Creates a new LevelOrderTreeIterator for iterating over all the nodes of
+     * Phylogeny phylogeny
+     * 
+     * @param phylogeny
+     *            the Phylogeny to iterate over
+     * @throws IllegalArgumentException
+     *             if phylogeny is empty
+     */
+    public LevelOrderTreeIterator( final Phylogeny phylogeny ) throws IllegalArgumentException {
+        this( phylogeny.getRoot() );
+        if ( phylogeny.isEmpty() ) {
+            throw new IllegalArgumentException( "Attempt to use LevelOrderTreeIterator on an empty phylogeny." );
+        }
+    }
+
+    /**
+     * Creates a new LevelOrderTreeIterator for iterating over all the child
+     * nodes of PhylogenyNode node (including node itself).
+     * 
+     * @param node
+     *            the parent of the nodes to iterate over
+     */
+    public LevelOrderTreeIterator( final PhylogenyNode node ) {
+        _queue = new Queue();
+        _root = node;
+        reset();
+    }
+
+    // Private methods
+    // ---------------
+    /**
+     * Returns the queue upon which this iterator is based.
+     * 
+     */
+    private Queue getQueue() {
+        return _queue;
+    }
+
+    /**
+     * Returns the root of the phylogeny this iterators parses over.
+     * 
+     * @return the root of the phylogeny this iterators parses over.
+     */
+    private PhylogenyNode getRoot() {
+        return _root;
+    }
+
+    // Public methods
+    // --------------
+    /**
+     * Returns true is this iterator has at least one more element, false
+     * otherwise.
+     * 
+     * @return true is this iterator has at least one more element, false
+     *         otherwise
+     */
+    public boolean hasNext() {
+        return !getQueue().isEmpty();
+    }
+
+    /**
+     * Returns the next PhylogenyNode.
+     * 
+     * @return the next PhylogenyNode
+     * @throws NoSuchElementException
+     *             if iteration is complete
+     */
+    public PhylogenyNode next() throws NoSuchElementException {
+        if ( !hasNext() ) {
+            throw new NoSuchElementException( "Attempt to call \"next()\" on iterator which has no more next elements." );
+        }
+        final PhylogenyNode node = ( PhylogenyNode ) getQueue().dequeue();
+        for( int i = 0; i < node.getNumberOfDescendants(); ++i ) {
+            getQueue().enqueue( node.getChildNode( i ) );
+        }
+        return node;
+    }
+
+    /**
+     * Not supported.
+     * 
+     */
+    public void remove() {
+        throw new UnsupportedOperationException();
+    }
+
+    /**
+     * Resets the iterator.
+     */
+    public void reset() {
+        getQueue().clear();
+        getQueue().enqueue( getRoot() );
+    }
+} // enod of class LevelOrderTreeIterator
diff --git a/forester/java/src/org/forester/phylogeny/iterators/PhylogenyNodeIterator.java b/forester/java/src/org/forester/phylogeny/iterators/PhylogenyNodeIterator.java

new file mode 100644 (file)

index 0000000..587ab51
--- /dev/null
+++ b/forester/java/src/org/forester/phylogeny/iterators/PhylogenyNodeIterator.java
@@ -0,0 +1,46 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.phylogeny.iterators;
+
+import java.util.Iterator;
+import java.util.NoSuchElementException;
+
+import org.forester.phylogeny.PhylogenyNode;
+
+/*
+ * @author Christian Zmasek
+ * 
+ * TODO To change the template for this generated type comment go to Window -
+ * Preferences - Java - Code Style - Code Templates
+ */
+public interface PhylogenyNodeIterator extends Iterator<PhylogenyNode> {
+
+    public boolean hasNext();
+
+    public PhylogenyNode next() throws NoSuchElementException;
+
+    public void reset();
+}
diff --git a/forester/java/src/org/forester/phylogeny/iterators/PostOrderStackObject.java b/forester/java/src/org/forester/phylogeny/iterators/PostOrderStackObject.java

new file mode 100644 (file)

index 0000000..9f06e50
--- /dev/null
+++ b/forester/java/src/org/forester/phylogeny/iterators/PostOrderStackObject.java
@@ -0,0 +1,70 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.phylogeny.iterators;
+
+import org.forester.phylogeny.PhylogenyNode;
+
+/*
+ * @author Christian M. Zmasek
+ * 
+ * @version 1.00 -- last modified: 06/15/00
+ */
+public class PostOrderStackObject {
+
+    final private PhylogenyNode _node;
+    final private int           _phase;
+
+    /**
+     * Creates a new PostOrderStackObject object.
+     * 
+     * @param n
+     *            DOCUMENT ME!
+     * @param i
+     *            DOCUMENT ME!
+     */
+    public PostOrderStackObject( final PhylogenyNode n, final int i ) {
+        _node = n;
+        _phase = i;
+    }
+
+    /**
+     * DOCUMENT ME!
+     * 
+     * @return DOCUMENT ME!
+     */
+    public PhylogenyNode getNode() {
+        return _node;
+    }
+
+    /**
+     * DOCUMENT ME!
+     * 
+     * @return DOCUMENT ME!
+     */
+    public int getPhase() {
+        return _phase;
+    }
+}
diff --git a/forester/java/src/org/forester/phylogeny/iterators/PostorderTreeIterator.java b/forester/java/src/org/forester/phylogeny/iterators/PostorderTreeIterator.java

new file mode 100644 (file)

index 0000000..1405476
--- /dev/null
+++ b/forester/java/src/org/forester/phylogeny/iterators/PostorderTreeIterator.java
@@ -0,0 +1,128 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.phylogeny.iterators;
+
+import java.util.NoSuchElementException;
+import java.util.Stack;
+
+import org.forester.phylogeny.Phylogeny;
+import org.forester.phylogeny.PhylogenyNode;
+
+/*
+ * *
+ */
+public class PostorderTreeIterator implements PhylogenyNodeIterator {
+
+    final private Phylogeny                   _tree;
+    final private PhylogenyNode               _root;
+    private boolean                           _has_next;
+    final private Stack<PostOrderStackObject> _stack;
+
+    /**
+     * @param t
+     *            Phylogeny for which a Iterator is to be constructed.
+     */
+    public PostorderTreeIterator( final Phylogeny tree ) throws IllegalArgumentException {
+        if ( tree.isEmpty() ) {
+            throw new IllegalArgumentException( "Attempt to use PostorderTreeIterator on an empty phylogeny." );
+        }
+        _tree = tree;
+        _root = getTree().getRoot();
+        _stack = new Stack<PostOrderStackObject>();
+        reset();
+    }
+
+    private PhylogenyNode getRoot() {
+        return _root;
+    }
+
+    private Stack<PostOrderStackObject> getStack() {
+        return _stack;
+    }
+
+    private Phylogeny getTree() {
+        return _tree;
+    }
+
+    /**
+     * DOCUMENT ME!
+     * 
+     * @return DOCUMENT ME!
+     */
+    public boolean hasNext() {
+        return _has_next;
+    }
+
+    /**
+     * Advances the Iterator by one.
+     */
+    public PhylogenyNode next() throws NoSuchElementException {
+        if ( !hasNext() ) {
+            throw new NoSuchElementException( "Attempt to call \"next()\" on iterator which has no more next elements." );
+        }
+        while ( true ) {
+            final PostOrderStackObject si = getStack().pop();
+            final PhylogenyNode node = si.getNode();
+            final int phase = si.getPhase();
+            // if ( node != null ) {
+            if ( phase > node.getNumberOfDescendants() ) {
+                setHasNext( node != getRoot() );
+                return node;
+            }
+            else {
+                getStack().push( new PostOrderStackObject( node, ( phase + 1 ) ) );
+                if ( node.isInternal() ) {
+                    getStack().push( new PostOrderStackObject( node.getChildNode( phase - 1 ), 1 ) );
+                }
+                // else {
+                // getStack().push( new PostOrderStackObject( null, 1 ) );
+                // }
+            }
+            // }
+        }
+    }
+
+    /**
+     * Not supported.
+     * 
+     */
+    public void remove() {
+        throw new UnsupportedOperationException();
+    }
+
+    /**
+     * DOCUMENT ME!
+     */
+    public void reset() {
+        setHasNext( true );
+        getStack().clear();
+        getStack().push( new PostOrderStackObject( getTree().getRoot(), 1 ) );
+    }
+
+    private void setHasNext( final boolean has_next ) {
+        _has_next = has_next;
+    }
+} // End of class PostorderTreeIterator.
diff --git a/forester/java/src/org/forester/phylogeny/iterators/PreorderTreeIterator.java b/forester/java/src/org/forester/phylogeny/iterators/PreorderTreeIterator.java

new file mode 100644 (file)

index 0000000..18928db
--- /dev/null
+++ b/forester/java/src/org/forester/phylogeny/iterators/PreorderTreeIterator.java
@@ -0,0 +1,115 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.phylogeny.iterators;
+
+import java.util.NoSuchElementException;
+import java.util.Stack;
+
+import org.forester.phylogeny.Phylogeny;
+import org.forester.phylogeny.PhylogenyNode;
+
+// import java.util.Iterator; TODO should implement this, not some iterator of
+// this package.
+/*
+ * @author Christian M. Zmasek
+ * 
+ * @version 1.020 -- last modified: 10/10/05
+ */
+public class PreorderTreeIterator implements PhylogenyNodeIterator {
+
+    final private Phylogeny            _tree;
+    final private Stack<PhylogenyNode> _stack;
+
+    /**
+     * @param tree
+     *            Phylogeny for which a Iterator is to be constructed.
+     */
+    public PreorderTreeIterator( final Phylogeny tree ) throws IllegalArgumentException {
+        if ( tree.isEmpty() ) {
+            throw new IllegalArgumentException( "Attempt to use PreorderTreeIterator on empty tree." );
+        }
+        _stack = new Stack<PhylogenyNode>();
+        _tree = tree;
+        reset();
+    }
+
+    public PreorderTreeIterator( final PhylogenyNode node ) throws IllegalArgumentException {
+        _stack = new Stack<PhylogenyNode>();
+        _tree = null;
+        reset( node );
+    }
+
+    private Stack<PhylogenyNode> getStack() {
+        return _stack;
+    }
+
+    private Phylogeny getTree() {
+        return _tree;
+    }
+
+    /*
+     * (non-Javadoc)
+     * 
+     * @see java.util.Iterator#hasNext()
+     */
+    public boolean hasNext() {
+        return !getStack().isEmpty();
+    }
+
+    /**
+     * Advances the Iterator by one.
+     */
+    public PhylogenyNode next() throws NoSuchElementException {
+        if ( !hasNext() ) {
+            throw new NoSuchElementException( "Attempt to call \"next()\" on iterator which has no more next elements." );
+        }
+        final PhylogenyNode node = getStack().pop();
+        if ( !node.isExternal() ) {
+            for( int i = node.getNumberOfDescendants() - 1; i >= 0; --i ) {
+                getStack().push( node.getChildNode( i ) );
+            }
+        }
+        return node;
+    } // next()
+
+    /**
+     * Not supported.
+     * 
+     */
+    public void remove() {
+        throw new UnsupportedOperationException();
+    }
+
+    public void reset() {
+        getStack().clear();
+        getStack().push( getTree().getRoot() );
+    }
+
+    private void reset( final PhylogenyNode node ) {
+        getStack().clear();
+        getStack().push( node );
+    }
+} // End of class PreorderTreeIterator.
diff --git a/forester/java/src/org/forester/sdi/DistanceCalculator.java b/forester/java/src/org/forester/sdi/DistanceCalculator.java

new file mode 100644 (file)

index 0000000..51b4201
--- /dev/null
+++ b/forester/java/src/org/forester/sdi/DistanceCalculator.java
@@ -0,0 +1,500 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// Copyright (C) 2000-2001 Washington University School of Medicine
+// and Howard Hughes Medical Institute
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.sdi;
+
+import java.io.File;
+import java.util.ArrayList;
+import java.util.ListIterator;
+import java.util.Vector;
+
+import org.forester.io.parsers.PhylogenyParser;
+import org.forester.phylogeny.Phylogeny;
+import org.forester.phylogeny.PhylogenyNode;
+import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
+import org.forester.phylogeny.factories.PhylogenyFactory;
+import org.forester.util.ForesterUtil;
+
+/*
+ * @author Christian M. Zmasek
+ * 
+ * @version 1.001 -- last modified: 12/04/00
+ */
+public class DistanceCalculator {
+
+    public final static double       DEFAULT = -1.0;
+    private Phylogeny                tree_;
+    private ArrayList<PhylogenyNode> nodes_;
+    private int                      n_;
+    private double                   mean_, variance_, stand_dev_;
+    private PhylogenyNode            lca_;                        // The LCA of the
+
+    // Nodes in nodes_
+    /**
+     * Default constructor. (Last modified: 11/30/00)
+     */
+    public DistanceCalculator() {
+        tree_ = null;
+        nodes_ = null;
+        n_ = 0;
+        mean_ = DistanceCalculator.DEFAULT;
+        variance_ = DistanceCalculator.DEFAULT;
+        stand_dev_ = DistanceCalculator.DEFAULT;
+        lca_ = null;
+    }
+
+    /**
+     * Constructor. Sets the rooted Phylogeny t for which the mean distance to
+     * the root and its variance and standard deviation are calculated. (Last
+     * modified: 12/01/00)
+     * 
+     * @param t
+     *            the rooted Phylogeny for which the mean distance to the root
+     *            and its variance and standard deviation are calculated
+     */
+    public DistanceCalculator( final Phylogeny t ) {
+        setTree( t );
+    }
+
+    /**
+     * Constructor. Sets the rooted Phylogeny t and the external Nodes ext_nodes
+     * for which the mean distance to their lowest common ancestor and its
+     * variance and standard deviation are calculated. (Last modified: 12/01/00)
+     * 
+     * @param t
+     *            the rooted Phylogeny containing Nodes in Vector ext_nodes
+     * @param ext_nodes
+     *            a Vector of Nodes of t, the mean distance to their lowest
+     *            common ancestor and its variance and standard deviation are
+     *            calculated
+     */
+    public DistanceCalculator( final Phylogeny t, final Vector<PhylogenyNode> ext_nodes ) {
+        setTreeAndExtNodes( t, ext_nodes );
+    }
+
+    // (Last modified: 12/01/00)
+    private PhylogenyNode calculateLCA( final ArrayList<PhylogenyNode> nodes ) {
+        if ( ( nodes == null ) || nodes.isEmpty() ) {
+            return null;
+        }
+        PhylogenyNode node = nodes.get( 0 );
+        int c = node.getNumberOfExternalNodes();
+        final int v = nodes.size();
+        while ( !node.isRoot() && ( c < v ) ) {
+            node = node.getParent();
+            c = node.getNumberOfExternalNodes();
+        }
+        ArrayList<PhylogenyNode> current_nodes = new ArrayList<PhylogenyNode>( node.getAllExternalDescendants() );
+        while ( !node.isRoot() && !current_nodes.containsAll( nodes ) ) {
+            node = node.getParent();
+            current_nodes = new ArrayList<PhylogenyNode>( node.getAllExternalDescendants() );
+        }
+        return node;
+    }
+
+    // (Last modified: 11/31/00)
+    private void calculateMean() {
+        if ( ( nodes_ == null ) || nodes_.isEmpty() || ( tree_ == null ) || tree_.isEmpty() ) {
+            return;
+        }
+        double sum = 0.0;
+        final ListIterator<PhylogenyNode> li = nodes_.listIterator();
+        n_ = 0;
+        try {
+            while ( li.hasNext() ) {
+                n_++;
+                sum += getDistanceToNode( li.next(), lca_ );
+            }
+        }
+        catch ( final Exception e ) {
+            System.err.println( "calculateMean(): " + "Exception: " + e );
+            System.exit( -1 );
+        }
+        setMean( sum / n_ );
+    }
+
+    // (Last modified: 11/30/00)
+    private void calculateMeanDistToRoot() {
+        if ( ( tree_ == null ) || tree_.isEmpty() ) {
+            return;
+        }
+        double sum = 0.0;
+        PhylogenyNode node = tree_.getFirstExternalNode();
+        n_ = 0;
+        while ( node != null ) {
+            n_++;
+            sum += getDistanceToRoot( node );
+            node = node.getNextExternalNode();
+        }
+        setMean( sum / n_ );
+    }
+
+    // (Last modified: 11/31/00)
+    private void calculateStandardDeviation() {
+        if ( ( getVariance() == DistanceCalculator.DEFAULT ) || ( getVariance() < 0.0 ) ) {
+            return;
+        }
+        setStandardDeviation( java.lang.Math.sqrt( getVariance() ) );
+    }
+
+    // (Last modified: 11/31/00)
+    private void calculateVariance() {
+        if ( ( getMean() == DistanceCalculator.DEFAULT ) || ( nodes_ == null ) || nodes_.isEmpty() || ( tree_ == null )
+                || tree_.isEmpty() || ( n_ <= 1.0 ) ) {
+            return;
+        }
+        double x = 0.0, sum = 0.0;
+        final ListIterator<PhylogenyNode> li = nodes_.listIterator();
+        try {
+            while ( li.hasNext() ) {
+                x = getDistanceToNode( li.next(), lca_ ) - getMean();
+                sum += ( x * x );
+            }
+        }
+        catch ( final Exception e ) {
+            System.err.println( "calculateVariance(): " + "Exception: " + e );
+            System.exit( -1 );
+        }
+        setVariance( sum / ( n_ - 1 ) );
+    }
+
+    // (Last modified: 11/31/00)
+    private void calculateVarianceDistToRoot() {
+        if ( ( getMean() == DistanceCalculator.DEFAULT ) || ( tree_ == null ) || tree_.isEmpty() || ( n_ <= 1.0 ) ) {
+            return;
+        }
+        double x = 0.0, sum = 0.0;
+        PhylogenyNode node = tree_.getFirstExternalNode();
+        while ( node != null ) {
+            x = getDistanceToRoot( node ) - getMean();
+            sum += ( x * x );
+            node = node.getNextExternalNode();
+        }
+        setVariance( sum / ( n_ - 1 ) );
+    }
+
+    /**
+     * Calculates the distance of the PhylogenyNode with seq name seq_name to
+     * the LCA of ext_nodes, which has been set either with constructor
+     * DistanceCalculator(Phylogeny,Vector) or method
+     * setTreeAndExtNodes(Phylogeny,Vector). Throws an exception if no
+     * PhylogenyNode with seq name_seq name is found or if seq_name is not
+     * unique. (Last modified: 12/03/00)
+     * 
+     * @param seq_name
+     *            the seq name for the PhylogenyNode for which the distance to
+     *            the LCA is to be calculated
+     * @return distance of PhylogenyNode with seq name seq_name to the LCA of
+     *         Nodes in ext_nodes
+     * @see #DistanceCalculator(Phylogeny,Vector)
+     * @see #setTreeAndExtNodes(Phylogeny,Vector)
+     * @see #setTreeAndExtNodes(Phylogeny,ArrayList)
+     */
+    public double getDistanceToLCA( final String seq_name ) {
+        if ( ( tree_ == null ) || tree_.isEmpty() || ( lca_ == null ) ) {
+            return 0.0;
+        }
+        return getDistanceToNode( seq_name, lca_ );
+    }
+
+    /**
+     * Calculates the distance of PhylogenyNode outer to PhylogenyNode inner.
+     * PhylogenyNode inner must be closer to the root than PhylogenyNode outer
+     * and on the same "path". (Last modified: 12/01/00)
+     * 
+     * @param outer
+     *            a PhylogenyNode
+     * @param inner
+     *            a PhylogenyNode closer to the root than outer
+     * @return distance of PhylogenyNode outer to PhylogenyNode inner
+     */
+    public double getDistanceToNode( PhylogenyNode outer, final PhylogenyNode inner ) {
+        double d = 0.0, dist = 0.0;
+        while ( ( inner != outer ) && !outer.isRoot() ) {
+            d = outer.getDistanceToParent();
+            if ( d > 0.0 ) {
+                dist += d;
+            }
+            outer = outer.getParent();
+        }
+        if ( !inner.isRoot() && outer.isRoot() ) {
+            throw new IllegalArgumentException( "getDistanceToNode(PhylogenyNode outer,PhylogenyNode inner): "
+                    + "PhylogenyNode inner is not closer to the root than PhylogenyNode outer "
+                    + "or is not on the same \"subtree\"" );
+        }
+        return dist;
+    }
+
+    /**
+     * Calculates the distance of the PhylogenyNode with seq name seq_name to
+     * PhylogenyNode inner. PhylogenyNode inner must be closer to the root than
+     * the PhylogenyNode with seq name seq_name and on the same "path". Throws
+     * an exception if no PhylogenyNode with seq name_seq name is found or if
+     * seq_name is not unique. (Last modified: 12/01/00)
+     * 
+     * @param seq_name
+     *            the seq name of a PhylogenyNode further from the root than
+     *            PhylogenyNode inner
+     * @param inner
+     *            a PhylogenyNode
+     * @return distance of PhylogenyNode with seq name seq_nam to PhylogenyNode
+     *         inner
+     */
+    public double getDistanceToNode( final String seq_name, final PhylogenyNode inner ) {
+        if ( ( tree_ == null ) || tree_.isEmpty() ) {
+            return 0.0;
+        }
+        return getDistanceToNode( tree_.getNodeViaSequenceName( seq_name ), inner );
+    }
+
+    /**
+     * Calculates the distance of PhylogenyNode n to the root of Phylogeny t
+     * which has been set either with a constructor, setTree(Phylogeny), or
+     * setTreeAndExtNodes(Phylogeny,Vector). (Last modified: 12/01/00)
+     * 
+     * @param n
+     *            the PhylogenyNode for which the distance to the root is to be
+     *            calculated
+     * @return distance of PhylogenyNode n to the root
+     * @see #DistanceCalculator(Phylogeny)
+     * @see #DistanceCalculator(Phylogeny,Vector)
+     * @see #setTree(Phylogeny)
+     * @see #setTreeAndExtNodes(Phylogeny,Vector)
+     */
+    public double getDistanceToRoot( final PhylogenyNode n ) {
+        if ( ( tree_ == null ) || tree_.isEmpty() ) {
+            return 0.0;
+        }
+        double d = 0.0;
+        try {
+            d = getDistanceToNode( n, tree_.getRoot() );
+        }
+        catch ( final Exception e ) {
+            System.err.println( "getDistanceToRoot(PhylogenyNode): Unexpected " + "exception: " + e );
+            System.exit( -1 );
+        }
+        return d;
+    }
+
+    /**
+     * Calculates the distance of the PhylogenyNode with seq name seq_name to
+     * the root of Phylogeny t, which has been set either with a constructor,
+     * setTree(Phylogeny), or setTreeAndExtNodes(Phylogeny,Vector). Throws an
+     * exception if no PhylogenyNode with seq name_seq name is found or if
+     * seq_name is not unique. (Last modified: 12/01/00)
+     * 
+     * @param seq_name
+     *            the seq name for the PhylogenyNode for which the distance to
+     *            the root is to be calculated
+     * @return distance of PhylogenyNode with seq name seq_name to the root
+     * @see #DistanceCalculator(Phylogeny)
+     * @see #DistanceCalculator(Phylogeny,Vector)
+     * @see #setTree(Phylogeny)
+     * @see #setTreeAndExtNodes(Phylogeny,Vector)
+     * @see #setTreeAndExtNodes(Phylogeny,ArrayList)
+     */
+    public double getDistanceToRoot( final String seq_name ) {
+        if ( ( tree_ == null ) || tree_.isEmpty() ) {
+            return 0.0;
+        }
+        return getDistanceToNode( seq_name, tree_.getRoot() );
+    }
+
+    /**
+     * Returns the mean distance. If constructor DistanceCalculator(Phylogeny)
+     * or method setTree(Phylogeny) have been used, it is the mean of the
+     * distances from the root to all external Nodes. If constructor
+     * DistanceCalculator(Phylogeny,Vector) or method
+     * setTreeAndExtNodes(Phylogeny,Vector) have been used, it is the mean of
+     * the distances from the external nodes ext_nodes to their lowest common
+     * ancestor. (Last modified: 11/30/00)
+     * 
+     * @return mean distance
+     * @see #DistanceCalculator(Phylogeny)
+     * @see #DistanceCalculator(Phylogeny,Vector)
+     * @see #setTree(Phylogeny)
+     * @see #setTreeAndExtNodes(Phylogeny,Vector)
+     * @see #setTreeAndExtNodes(Phylogeny,ArrayList)
+     */
+    public double getMean() {
+        return mean_;
+    }
+
+    /**
+     * Returns the sum of all Nodes used to calculate the mean. (Last modified:
+     * 12/01/00)
+     * 
+     * @return n
+     */
+    public int getN() {
+        return n_;
+    }
+
+    /**
+     * Returns the standard deviation. If constructor
+     * DistanceCalculator(Phylogeny) or method setTree(Phylogeny) have been
+     * used, it is the standard deviation of the distances from the root to all
+     * external Nodes. If constructor DistanceCalculator(Phylogeny,Vector) or
+     * method setTreeAndExtNodes(Phylogeny,Vector) have been used, it is the
+     * standard deviation of the distances from the external nodes ext_nodes to
+     * their lowest common ancestor. (Last modified: 11/30/00)
+     * 
+     * @return standard deviation
+     * @see #DistanceCalculator(Phylogeny)
+     * @see #DistanceCalculator(Phylogeny,Vector)
+     * @see #setTree(Phylogeny)
+     * @see #setTreeAndExtNodes(Phylogeny,Vector)
+     * @see #setTreeAndExtNodes(Phylogeny,ArrayList)
+     */
+    public double getStandardDeviation() {
+        return stand_dev_;
+    }
+
+    /**
+     * Returns the variance. ( 1/(N - 1) * Sum((x-mean)^2) ) If constructor
+     * DistanceCalculator(Phylogeny) or method setTree(Phylogeny) have been
+     * used, it is the variance of the distances from the root to all external
+     * Nodes. If constructor DistanceCalculator(Phylogeny,Vector) or method
+     * setTreeAndExtNodes(Phylogeny,Vector) have been used, it is the variance
+     * of the distances from the external nodes ext_nodes to their lowest common
+     * ancestor. (Last modified: 11/30/00)
+     * 
+     * @return variance
+     * @see #DistanceCalculator(Phylogeny)
+     * @see #DistanceCalculator(Phylogeny,Vector)
+     * @see #setTree(Phylogeny)
+     * @see #setTreeAndExtNodes(Phylogeny,Vector)
+     * @see #setTreeAndExtNodes(Phylogeny,ArrayList)
+     */
+    public double getVariance() {
+        return variance_;
+    }
+
+    // (Last modified: 11/30/00)
+    private void setMean( final double d ) {
+        mean_ = d;
+    }
+
+    // (Last modified: 11/30/00)
+    private void setStandardDeviation( final double d ) {
+        stand_dev_ = d;
+    }
+
+    /**
+     * Sets the rooted Phylogeny t for which the mean distance to the root and
+     * its variance and standard deviation are calculated. (Last modified:
+     * 12/01/00)
+     * 
+     * @param t
+     *            the rooted Phylogeny for which the mean distance to the root
+     *            and its variance and standard deviation are calculated
+     */
+    public void setTree( final Phylogeny t ) {
+        tree_ = t;
+        nodes_ = null;
+        n_ = 0;
+        mean_ = DistanceCalculator.DEFAULT;
+        variance_ = DistanceCalculator.DEFAULT;
+        stand_dev_ = DistanceCalculator.DEFAULT;
+        lca_ = null;
+        calculateMeanDistToRoot();
+        calculateVarianceDistToRoot();
+        calculateStandardDeviation();
+    }
+
+    /**
+     * Sets the rooted Phylogeny t and the external Nodes ext_nodes for which
+     * the mean distance to their lowest common ancestor and its variance and
+     * standard deviation are calculated. (Last modified: 12/03/00)
+     * 
+     * @param t
+     *            the rooted Phylogeny containing Nodes in Vector ext_nodes
+     * @param ext_nodes
+     *            a ArrayList of Nodes of t, the mean distance to their lowest
+     *            common ancestor and its variance and standard deviation are
+     *            calculated
+     */
+    public void setTreeAndExtNodes( final Phylogeny t, final ArrayList<PhylogenyNode> ext_nodes ) {
+        tree_ = t;
+        nodes_ = ext_nodes;
+        n_ = 0;
+        mean_ = DistanceCalculator.DEFAULT;
+        variance_ = DistanceCalculator.DEFAULT;
+        stand_dev_ = DistanceCalculator.DEFAULT;
+        lca_ = calculateLCA( nodes_ );
+        calculateMean();
+        calculateVariance();
+        calculateStandardDeviation();
+    }
+
+    /**
+     * Sets the rooted Phylogeny t and the external Nodes ext_nodes for which
+     * the mean distance to their lowest common ancestor and its variance and
+     * standard deviation are calculated. (Last modified: 12/03/00)
+     * 
+     * @param t
+     *            the rooted Phylogeny containing Nodes in Vector ext_nodes
+     * @param ext_nodes
+     *            a Vector of Nodes of t, the mean distance to their lowest
+     *            common ancestor and its variance and standard deviation are
+     *            calculated
+     */
+    public void setTreeAndExtNodes( final Phylogeny t, final Vector<PhylogenyNode> ext_nodes ) {
+        setTreeAndExtNodes( t, new ArrayList<PhylogenyNode>( ext_nodes ) );
+    }
+
+    // (Last modified: 11/30/00)
+    private void setVariance( final double d ) {
+        variance_ = d;
+    }
+
+    // Main for testing.
+    public static void main( final String args[] ) {
+        File tree_file = null;
+        Phylogeny tree = null;
+        DistanceCalculator dc = null;
+        tree_file = new File( args[ 0 ] );
+        try {
+            final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
+            final PhylogenyParser pp = ForesterUtil.createParserDependingOnFileType( tree_file, true );
+            tree = factory.create( tree_file, pp )[ 0 ];
+        }
+        catch ( final Exception e ) {
+            System.out.println( e.toString() );
+            System.exit( -1 );
+        }
+        double time = System.currentTimeMillis();
+        dc = new DistanceCalculator( tree );
+        final double m = dc.getMean(), var = dc.getVariance(), sd = dc.getStandardDeviation();
+        time = ( System.currentTimeMillis() - time );
+        System.out.println( "\nn   = " + dc.getN() );
+        System.out.println( "mea = " + m );
+        System.out.println( "var = " + var );
+        System.out.println( "sd  = " + sd + "\n" );
+        System.out.println( "t=" + time + "\n" );
+    }
+}
diff --git a/forester/java/src/org/forester/sdi/GSDI.java b/forester/java/src/org/forester/sdi/GSDI.java

new file mode 100644 (file)

index 0000000..b6641bb
--- /dev/null
+++ b/forester/java/src/org/forester/sdi/GSDI.java
@@ -0,0 +1,389 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.sdi;
+
+import java.util.HashMap;
+
+import org.forester.phylogeny.Phylogeny;
+import org.forester.phylogeny.PhylogenyNode;
+import org.forester.phylogeny.data.Event;
+import org.forester.phylogeny.data.Taxonomy;
+import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
+import org.forester.util.ForesterUtil;
+
+/*
+ * Implements our algorithm for speciation - duplication inference (SDI). <p>
+ * The initialization is accomplished by: </p> <ul> <li>method
+ * "linkExtNodesOfG()" of class SDI: setting the links for the external nodes of
+ * the gene tree <li>"preorderReID(int)" from class Phylogeny: numbering of
+ * nodes of the species tree in preorder <li>the optional stripping of the
+ * species tree is accomplished by method "stripTree(Phylogeny,Phylogeny)" of
+ * class Phylogeny </ul> <p> The recursion part is accomplished by this class'
+ * method "geneTreePostOrderTraversal(PhylogenyNode)". <p> Requires JDK 1.5 or
+ * greater.
+ * 
+ * @see SDI#linkNodesOfG()
+ * 
+ * @see Phylogeny#preorderReID(int)
+ * 
+ * @see
+ * PhylogenyMethods#taxonomyBasedDeletionOfExternalNodes(Phylogeny,Phylogeny)
+ * 
+ * @see #geneTreePostOrderTraversal(PhylogenyNode)
+ * 
+ * @author Christian M. Zmasek
+ */
+public class GSDI extends SDI {
+
+    private final HashMap<PhylogenyNode, Integer> _transversal_counts;
+    private final boolean                         _most_parsimonious_duplication_model;
+    private int                                   _speciation_or_duplication_events_sum;
+    private int                                   _speciations_sum;
+
+    /**
+     * Constructor which sets the gene tree and the species tree to be compared.
+     * species_tree is the species tree to which the gene tree gene_tree will be
+     * compared to - with method "infer(boolean)". Both Trees must be completely
+     * binary and rooted. The actual inference is accomplished with method
+     * "infer(boolean)". The mapping cost L can then be calculated with method
+     * "computeMappingCost()".
+     * <p>
+     * 
+     * @see #infer(boolean)
+     * @see SDI#computeMappingCostL()
+     * @param gene_tree
+     *            reference to a rooted gene tree to which assign duplication vs
+     *            speciation, must have species names in the species name fields
+     *            for all external nodes
+     * @param species_tree
+     *            reference to a rooted binary species tree which might get
+     *            stripped in the process, must have species names in the
+     *            species name fields for all external nodes
+     * 
+     * @param most_parsimonious_duplication_model
+     *            set to true to assign nodes as speciations which would
+     *            otherwise be assiged as unknown because of polytomies in the
+     *            species tree.
+     * 
+     */
+    public GSDI( final Phylogeny gene_tree,
+                 final Phylogeny species_tree,
+                 final boolean most_parsimonious_duplication_model ) {
+        super( gene_tree, species_tree );
+        _speciation_or_duplication_events_sum = 0;
+        _speciations_sum = 0;
+        _most_parsimonious_duplication_model = most_parsimonious_duplication_model;
+        _transversal_counts = new HashMap<PhylogenyNode, Integer>();
+        _duplications_sum = 0;
+        getSpeciesTree().preOrderReId();
+        linkNodesOfG();
+        geneTreePostOrderTraversal( getGeneTree().getRoot() );
+    }
+
+    private Event createDuplicationEvent() {
+        final Event event = Event.createSingleDuplicationEvent();
+        ++_duplications_sum;
+        return event;
+    }
+
+    private Event createSingleSpeciationOrDuplicationEvent() {
+        final Event event = Event.createSingleSpeciationOrDuplicationEvent();
+        ++_speciation_or_duplication_events_sum;
+        return event;
+    }
+
+    private Event createSpeciationEvent() {
+        final Event event = Event.createSingleSpeciationEvent();
+        ++_speciations_sum;
+        return event;
+    }
+
+    // s is the node on the species tree g maps to.
+    private void determineEvent( final PhylogenyNode s, final PhylogenyNode g ) {
+        Event event = null;
+        // Determine how many children map to same node as parent.
+        int sum_g_childs_mapping_to_s = 0;
+        for( final PhylogenyNodeIterator iter = g.iterateChildNodesForward(); iter.hasNext(); ) {
+            if ( iter.next().getLink() == s ) {
+                ++sum_g_childs_mapping_to_s;
+            }
+        }
+        // Determine the sum of traversals.
+        int traversals_sum = 0;
+        int max_traversals = 0;
+        PhylogenyNode max_traversals_node = null;
+        if ( !s.isExternal() ) {
+            for( final PhylogenyNodeIterator iter = s.iterateChildNodesForward(); iter.hasNext(); ) {
+                final PhylogenyNode current_node = iter.next();
+                final int traversals = getTraversalCount( current_node );
+                traversals_sum += traversals;
+                if ( traversals > max_traversals ) {
+                    max_traversals = traversals;
+                    max_traversals_node = current_node;
+                }
+            }
+        }
+        // System.out.println( " sum=" + traversals_sum );
+        // System.out.println( " max=" + max_traversals );
+        // System.out.println( " m=" + sum_g_childs_mapping_to_s );
+        if ( sum_g_childs_mapping_to_s > 0 ) {
+            if ( traversals_sum == 2 ) {
+                event = createDuplicationEvent();
+            }
+            else if ( traversals_sum > 2 ) {
+                if ( max_traversals <= 1 ) {
+                    if ( _most_parsimonious_duplication_model ) {
+                        event = createSpeciationEvent();
+                    }
+                    else {
+                        event = createSingleSpeciationOrDuplicationEvent();
+                    }
+                }
+                else {
+                    event = createDuplicationEvent();
+                    _transversal_counts.put( max_traversals_node, 1 );
+                }
+            }
+            else {
+                event = createDuplicationEvent();
+            }
+        }
+        else {
+            event = createSpeciationEvent();
+        }
+        g.getNodeData().setEvent( event );
+    }
+
+    /**
+     * Traverses the subtree of PhylogenyNode g in postorder, calculating the
+     * mapping function M, and determines which nodes represent speciation
+     * events and which ones duplication events.
+     * <p>
+     * Preconditions: Mapping M for external nodes must have been calculated and
+     * the species tree must be labeled in preorder.
+     * <p>
+     * (Last modified: )
+     * 
+     * @param g
+     *            starting node of a gene tree - normally the root
+     */
+    void geneTreePostOrderTraversal( final PhylogenyNode g ) {
+        if ( !g.isExternal() ) {
+            for( final PhylogenyNodeIterator iter = g.iterateChildNodesForward(); iter.hasNext(); ) {
+                geneTreePostOrderTraversal( iter.next() );
+            }
+            final PhylogenyNode[] linked_nodes = new PhylogenyNode[ g.getNumberOfDescendants() ];
+            for( int i = 0; i < linked_nodes.length; ++i ) {
+                linked_nodes[ i ] = g.getChildNode( i ).getLink();
+            }
+            final int[] min_max = obtainMinMaxIdIndices( linked_nodes );
+            int min_i = min_max[ 0 ];
+            int max_i = min_max[ 1 ];
+            // initTransversalCounts();
+            while ( linked_nodes[ min_i ] != linked_nodes[ max_i ] ) {
+                increaseTraversalCount( linked_nodes[ max_i ] );
+                linked_nodes[ max_i ] = linked_nodes[ max_i ].getParent();
+                final int[] min_max_ = obtainMinMaxIdIndices( linked_nodes );
+                min_i = min_max_[ 0 ];
+                max_i = min_max_[ 1 ];
+            }
+            final PhylogenyNode s = linked_nodes[ max_i ];
+            g.setLink( s );
+            // Determines whether dup. or spec.
+            determineEvent( s, g );
+            // _transversal_counts.clear();
+        }
+    }
+
+    public int getSpeciationOrDuplicationEventsSum() {
+        return _speciation_or_duplication_events_sum;
+    }
+
+    public int getSpeciationsSum() {
+        return _speciations_sum;
+    }
+
+    private int getTraversalCount( final PhylogenyNode node ) {
+        if ( _transversal_counts.containsKey( node ) ) {
+            return _transversal_counts.get( node );
+        }
+        return 0;
+    }
+
+    private void increaseTraversalCount( final PhylogenyNode node ) {
+        if ( _transversal_counts.containsKey( node ) ) {
+            _transversal_counts.put( node, _transversal_counts.get( node ) + 1 );
+        }
+        else {
+            _transversal_counts.put( node, 1 );
+        }
+        // System.out.println( "count for node " + node.getID() + " is now "
+        // + getTraversalCount( node ) );
+    }
+
+    /**
+     * This allows for linking of internal nodes of the species tree (as opposed
+     * to just external nodes, as in the method it overrides.
+     * 
+     */
+    @Override
+    void linkNodesOfG() {
+        final HashMap<Taxonomy, PhylogenyNode> speciestree_ext_nodes = new HashMap<Taxonomy, PhylogenyNode>();
+        for( final PhylogenyNodeIterator iter = _species_tree.iteratorLevelOrder(); iter.hasNext(); ) {
+            final PhylogenyNode n = iter.next();
+            if ( n.getNodeData().isHasTaxonomy() ) {
+                if ( speciestree_ext_nodes.containsKey( n.getNodeData().getTaxonomy() ) ) {
+                    throw new IllegalArgumentException( "taxonomy [" + n.getNodeData().getTaxonomy()
+                            + "] is not unique in species phylogeny" );
+                }
+                speciestree_ext_nodes.put( n.getNodeData().getTaxonomy(), n );
+            }
+        }
+        // Retrieve the reference to the PhylogenyNode with a matching species
+        // name.
+        for( final PhylogenyNodeIterator iter = _gene_tree.iteratorExternalForward(); iter.hasNext(); ) {
+            final PhylogenyNode g = iter.next();
+            if ( !g.getNodeData().isHasTaxonomy() ) {
+                throw new IllegalArgumentException( "gene tree node " + g + " has no taxonomic data" );
+            }
+            final PhylogenyNode s = speciestree_ext_nodes.get( g.getNodeData().getTaxonomy() );
+            if ( s == null ) {
+                throw new IllegalArgumentException( "species " + g.getNodeData().getTaxonomy()
+                        + " not present in species tree." );
+            }
+            g.setLink( s );
+        }
+    }
+
+    @Override
+    public String toString() {
+        final StringBuffer sb = new StringBuffer();
+        sb.append( "Most parsimonious duplication model: " + _most_parsimonious_duplication_model );
+        sb.append( ForesterUtil.getLineSeparator() );
+        sb.append( "Speciations sum                    : " + getSpeciationsSum() );
+        sb.append( ForesterUtil.getLineSeparator() );
+        sb.append( "Duplications sum                   : " + getDuplicationsSum() );
+        sb.append( ForesterUtil.getLineSeparator() );
+        if ( !_most_parsimonious_duplication_model ) {
+            sb.append( "Speciation or duplications sum     : " + getSpeciationOrDuplicationEventsSum() );
+            sb.append( ForesterUtil.getLineSeparator() );
+        }
+        sb.append( "mapping cost L                     : " + computeMappingCostL() );
+        return sb.toString();
+    }
+
+    static int[] obtainMinMaxIdIndices( final PhylogenyNode[] linked_nodes ) {
+        int max_i = 0;
+        int min_i = 0;
+        int max_i_id = -Integer.MAX_VALUE;
+        int min_i_id = Integer.MAX_VALUE;
+        for( int i = 0; i < linked_nodes.length; ++i ) {
+            final int id_i = linked_nodes[ i ].getId();
+            if ( id_i > max_i_id ) {
+                max_i = i;
+                max_i_id = linked_nodes[ max_i ].getId();
+            }
+            if ( id_i < min_i_id ) {
+                min_i = i;
+                min_i_id = linked_nodes[ min_i ].getId();
+            }
+        }
+        return new int[] { min_i, max_i };
+    }
+    /**
+     * Updates the mapping function M after the root of the gene tree has been
+     * moved by one branch. It calculates M for the root of the gene tree and
+     * one of its two children.
+     * <p>
+     * To be used ONLY by method "SDIunrooted.fastInfer(Phylogeny,Phylogeny)".
+     * <p>
+     * (Last modfied: )
+     * 
+     * @param prev_root_was_dup
+     *            true if the previous root was a duplication, false otherwise
+     * @param prev_root_c1
+     *            child 1 of the previous root
+     * @param prev_root_c2
+     *            child 2 of the previous root
+     * @return number of duplications which have been assigned in gene tree
+     */
+    // int updateM( final boolean prev_root_was_dup,
+    // final PhylogenyNode prev_root_c1, final PhylogenyNode prev_root_c2 ) {
+    // final PhylogenyNode root = getGeneTree().getRoot();
+    // if ( ( root.getChildNode1() == prev_root_c1 )
+    // || ( root.getChildNode2() == prev_root_c1 ) ) {
+    // calculateMforNode( prev_root_c1 );
+    // }
+    // else {
+    // calculateMforNode( prev_root_c2 );
+    // }
+    // Event event = null;
+    // if ( prev_root_was_dup ) {
+    // event = Event.createSingleDuplicationEvent();
+    // }
+    // else {
+    // event = Event.createSingleSpeciationEvent();
+    // }
+    // root.getPhylogenyNodeData().setEvent( event );
+    // calculateMforNode( root );
+    // return getDuplications();
+    // } // updateM( boolean, PhylogenyNode, PhylogenyNode )
+    // Helper method for updateM( boolean, PhylogenyNode, PhylogenyNode )
+    // Calculates M for PhylogenyNode n, given that M for the two children
+    // of n has been calculated.
+    // (Last modified: 10/02/01)
+    // private void calculateMforNode( final PhylogenyNode n ) {
+    // if ( !n.isExternal() ) {
+    // boolean was_duplication = n.isDuplication();
+    // PhylogenyNode a = n.getChildNode1().getLink(), b = n
+    // .getChildNode2().getLink();
+    // while ( a != b ) {
+    // if ( a.getID() > b.getID() ) {
+    // a = a.getParent();
+    // }
+    // else {
+    // b = b.getParent();
+    // }
+    // }
+    // n.setLink( a );
+    // Event event = null;
+    // if ( ( a == n.getChildNode1().getLink() )
+    // || ( a == n.getChildNode2().getLink() ) ) {
+    // event = Event.createSingleDuplicationEvent();
+    // if ( !was_duplication ) {
+    // increaseDuplications();
+    // }
+    // }
+    // else {
+    // event = Event.createSingleSpeciationEvent();
+    // if ( was_duplication ) {
+    // decreaseDuplications();
+    // }
+    // }
+    // n.getPhylogenyNodeData().setEvent( event );
+    // }
+    // } // calculateMforNode( PhylogenyNode )
+} // End of class GSDI.
diff --git a/forester/java/src/org/forester/sdi/ORcount.java b/forester/java/src/org/forester/sdi/ORcount.java

new file mode 100644 (file)

index 0000000..4271890
--- /dev/null
+++ b/forester/java/src/org/forester/sdi/ORcount.java
@@ -0,0 +1,382 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// Copyright (C) 2000-2001 Washington University School of Medicine
+// and Howard Hughes Medical Institute
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.sdi;
+
+import java.io.File;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+
+import org.forester.io.parsers.PhylogenyParser;
+import org.forester.phylogeny.Phylogeny;
+import org.forester.phylogeny.PhylogenyMethods;
+import org.forester.phylogeny.PhylogenyNode;
+import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
+import org.forester.phylogeny.factories.PhylogenyFactory;
+import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
+import org.forester.util.ForesterUtil;
+
+/*
+ * Allows to <ul> <li> <li> <li> </ul>
+ * 
+ * @see SDIse
+ * 
+ * @see SDI
+ * 
+ * @author Christian M. Zmasek
+ * 
+ * @version 1.400 -- last modified: 10/29/2005
+ */
+public class ORcount {
+
+    private static final String[]                     group_1              = { "ANOGA", "DROME", "CAEBR", "CAEEL" };
+    private static final String[]                     group_2              = { "CIOIN", "FUGRU", "MOUSE", "RAT",
+            "HUMAN"                                                       };
+    private static final String[]                     all_species          = { "ANOGA", "DROME", "CAEBR", "CAEEL",
+            "CIOIN", "FUGRU", "MOUSE", "RAT", "HUMAN"                     };
+    private final Phylogeny[]                         _trees;
+    private HashMap<String, HashMap<Object, Integer>> _species             = null;
+    private ArrayList<String>                         _names               = null;
+    private int                                       _group1_vs_2_counter = 0;
+
+    /**
+     * Default contructor which
+     */
+    public ORcount( final Phylogeny[] trees ) {
+        _trees = trees;
+    } // ORcount( final Phylogeny[] trees )
+
+    private void count( final PhylogenyNode node ) {
+        final List<PhylogenyNode> external_nodes = node.getAllExternalDescendants();
+        for( int i = 1; i < external_nodes.size(); ++i ) {
+            for( int j = 0; j < i; ++j ) {
+                final PhylogenyNode node_i = external_nodes.get( i );
+                final PhylogenyNode node_j = external_nodes.get( j );
+                final String si = PhylogenyMethods.getSpecies( node_i );
+                final String sj = PhylogenyMethods.getSpecies( node_j );
+                count( si, sj, node_i.getName(), node_j.getName() );
+            }
+        }
+    } // count( PhylogenyNode )
+
+    private void count( final String a, final String b, final String seq_name_a, final String seq_name_b ) {
+        HashMap<Object, Integer> h1 = _species.get( a );
+        if ( h1 == null ) {
+            throw new RuntimeException( "Unexpected error: Species \"" + a + "\" not present in species matrix." );
+        }
+        Object h2 = h1.get( b );
+        String species_in_h1 = b;
+        // We only look at the half matrix, and we do not know/care about the
+        // order
+        // of the keys (species).
+        if ( h2 == null ) {
+            h1 = _species.get( b );
+            if ( h1 == null ) {
+                throw new RuntimeException( "Unexpected error: Species \"" + b + "\" not present in species matrix." );
+            }
+            h2 = h1.get( a );
+            species_in_h1 = a;
+        }
+        if ( h2 == null ) {
+            throw new RuntimeException( "Unexpected error: Species \"" + a + "\" not present in species matrix." );
+        }
+        h1.put( species_in_h1, new Integer( ( ( Integer ) h2 ).intValue() + 1 ) );
+        _names.add( a + "-" + seq_name_a + " = " + b + "-" + seq_name_b );
+    } // count( String, String )
+
+    public void countSharedAncestralClades( final Phylogeny tree,
+                                            final int bootstrap_threshold,
+                                            final String[] group_1,
+                                            final String[] group_2 ) {
+        if ( ( group_1 == null ) || ( group_2 == null ) ) {
+            throw new IllegalArgumentException( "String[](s) in arguments to method \"ORcount.countSharedAncestralClades\" is (are) null." );
+        }
+        if ( !tree.isRooted() ) {
+            throw new IllegalArgumentException( "Phylogeny must be rooted in order to count shared ancestral clades." );
+        }
+        final PhylogenyNodeIterator it = tree.iteratorPostorder();
+        tree.setIndicatorsToZero();
+        while ( it.hasNext() ) {
+            final PhylogenyNode current_node = it.next();
+            if ( current_node.getNumberOfDescendants() != 2 ) {
+                throw new IllegalArgumentException( "Phylogeny can not contain multifurcations in order to count shared ancestral clades." );
+            }
+            if ( !current_node.isExternal() ) {
+                final PhylogenyNode child1 = current_node.getChildNode1();
+                final PhylogenyNode child2 = current_node.getChildNode2();
+                if ( ( child1.getIndicator() == 1 ) || ( child2.getIndicator() == 1 ) ) {
+                    current_node.setIndicator( ( byte ) 1 );
+                }
+                else {
+                    final List<PhylogenyNode> external_nodes = current_node.getAllExternalDescendants();
+                    final String[] external_species = new String[ external_nodes.size() ];
+                    for( int i = 0; i < external_nodes.size(); ++i ) {
+                        final PhylogenyNode n = external_nodes.get( i );
+                        external_species[ i ] = PhylogenyMethods.getSpecies( n ).trim().toUpperCase();
+                    }
+                    if ( ForesterUtil.isIntersecting( external_species, group_1 )
+                            && ForesterUtil.isIntersecting( external_species, group_2 ) ) {
+                        current_node.setIndicator( ( byte ) 1 );
+                        if ( ( group_1.length == 1 ) && ( group_2.length == 1 ) ) {
+                            count( group_1[ 0 ], group_2[ 0 ], "name a", "name b" );
+                        }
+                        else {
+                            increaseGroup1Vs2Counter();
+                        }
+                    }
+                }
+            }
+        } // while
+    } // countSharedAncestralClades( Phylogeny, int )
+
+    public void countSharedAncestralClades( final Phylogeny[] trees, final int bootstrap_threshold ) {
+        for( int i = 1; i < ORcount.all_species.length; ++i ) {
+            for( int j = 0; j < i; ++j ) {
+                final String all_i = ORcount.all_species[ i ].trim().toUpperCase();
+                final String all_j = ORcount.all_species[ j ].trim().toUpperCase();
+                final String[] a = { all_i };
+                final String[] b = { all_j };
+                for( int k = 0; k < trees.length; ++k ) {
+                    countSharedAncestralClades( trees[ k ], bootstrap_threshold, a, b );
+                }
+            }
+        }
+        // print();
+        if ( ( ORcount.group_1 != null ) && ( ORcount.group_2 != null ) && ( ORcount.group_1.length > 0 )
+                && ( ORcount.group_2.length > 0 ) ) {
+            setGroup1Vs2Counter( 0 );
+            for( int k = 0; k < trees.length; ++k ) {
+                countSharedAncestralClades( trees[ k ], bootstrap_threshold, ORcount.group_1, ORcount.group_2 );
+            }
+            System.out.println( "\nCount [(" + ForesterUtil.stringArrayToString( ORcount.group_1 ) + ") vs ("
+                    + ForesterUtil.stringArrayToString( ORcount.group_2 ) + ")] = " + getGroup1Vs2Counter() );
+        }
+    }
+
+    public void countSuperOrthologousRelations( final int bootstrap_threshold ) {
+        reset();
+        for( int i = 0; i < _trees.length; ++i ) {
+            countSuperOrthologousRelations( _trees[ i ], bootstrap_threshold );
+        }
+    }
+
+    private void countSuperOrthologousRelations( final Phylogeny tree, final int bootstrap_threshold ) {
+        final PhylogenyNodeIterator it = tree.iteratorPostorder();
+        if ( !tree.isRooted() ) {
+            throw new IllegalArgumentException( "Phylogeny must be rooted in order to count 1:1 orthologous relationships." );
+        }
+        // The purpose of this is to find all substrees
+        // which contain only speciation events on all their nodes.
+        // All nodes in these subtrees are "painted" with 0's, wheres
+        // the rest od the nodes in painted with 1's.
+        tree.setIndicatorsToZero();
+        it.reset();
+        while ( it.hasNext() ) {
+            final PhylogenyNode current_node = it.next();
+            if ( current_node.getNumberOfDescendants() != 2 ) {
+                throw new IllegalArgumentException( "Phylogeny can not contain multifurcations in order to count 1:1 orthologous relationships." );
+            }
+            if ( !current_node.isExternal() && !current_node.isHasAssignedEvent() ) {
+                throw new IllegalArgumentException( "All nodes must have duplication or speciation assigned in order to count 1:1 orthologous relationships." );
+            }
+            if ( !current_node.isExternal()
+                    && ( current_node.isDuplication() || ( current_node.getChildNode1().getIndicator() == 1 ) || ( current_node
+                            .getChildNode2().getIndicator() == 1 ) ) ) {
+                current_node.setIndicator( ( byte ) 1 );
+            }
+        }
+        // These find the largest subtrees containing only speciations
+        // and uses their largest nodes to count all possible species
+        // combinations
+        // in their extant external nodes.
+        // ~~~ this could possibly be combined with the first iteration ~~
+        // <<<<<<<<<<<~~~~~~~~~~~~~~~<<<<<<<<<<<<<<<
+        it.reset();
+        while ( it.hasNext() ) {
+            final PhylogenyNode current_node = it.next();
+            if ( !current_node.isExternal()
+                    && ( current_node.getIndicator() == 0 )
+                    && ( current_node.isRoot() || ( current_node.getParent().getIndicator() == 1 ) )
+                    && ( ( bootstrap_threshold < 1 ) || ( ( PhylogenyMethods.getConfidenceValue( current_node ) >= bootstrap_threshold )
+                            && ( PhylogenyMethods.getConfidenceValue( current_node.getChildNode1() ) >= bootstrap_threshold ) && ( PhylogenyMethods
+                            .getConfidenceValue( current_node.getChildNode2() ) >= bootstrap_threshold ) ) ) ) {
+                count( current_node );
+            }
+        }
+    } // countOneToOneOrthologs( Phylogeny, int )
+
+    // This puts all the species found in Phylogeny array _trees into
+    // species HashMap.
+    private void getAllSpecies() {
+        if ( ( getTrees() == null ) || ( getTrees().length < 1 ) ) {
+            throw new RuntimeException( "Phylogeny array in method \"getAllSpecies( HashMap hash )\" is null or empty." );
+        }
+        setSpecies( new HashMap<String, HashMap<Object, Integer>>() );
+        for( int i = 0; i < getTrees().length; ++i ) {
+            PhylogenyNode node = getTrees()[ i ].getFirstExternalNode();
+            while ( node != null ) {
+                getSpecies().put( PhylogenyMethods.getSpecies( node ), null );
+                node = node.getNextExternalNode();
+            }
+        }
+    } // void getAllSpecies( HashMap hash )
+
+    private int getGroup1Vs2Counter() {
+        return _group1_vs_2_counter;
+    }
+
+    private HashMap<String, HashMap<Object, Integer>> getSpecies() {
+        return _species;
+    }
+
+    private Phylogeny[] getTrees() {
+        return _trees;
+    }
+
+    private void increaseGroup1Vs2Counter() {
+        _group1_vs_2_counter++;
+    }
+
+    private void printCount() {
+        if ( ( _species == null ) || ( _species.size() < 2 ) ) {
+            throw new RuntimeException( "Species HashMap in method \"setUpCountingMatrix()\" is null or contains less than two species." );
+        }
+        final Object[] species_array = _species.keySet().toArray();
+        final int s = species_array.length;
+        for( int i = 0; i < s - 1; ++i ) {
+            final String species = ( String ) species_array[ i ];
+            System.out.println();
+            System.out.println( species + ":" );
+            final HashMap<?, ?> h = _species.get( species );
+            // Setting up HashMaps linked to by hash (=_species)
+            // Diagonals are ignored, only half the matrix is needed.
+            for( int j = 1 + i; j < s; ++j ) {
+                final String sp = ( String ) species_array[ j ];
+                final int c = ( ( Integer ) h.get( sp ) ).intValue();
+                System.out.println( species + "-" + sp + ": " + c );
+            }
+        }
+    }
+
+    private void printNames() {
+        for( int i = 0; i < _names.size(); ++i ) {
+            System.out.println( i + ": " + _names.get( i ) );
+        }
+    }
+
+    public void reset() {
+        getAllSpecies();
+        setUpCountingMatrix();
+        setGroup1Vs2Counter( 0 );
+        _names = new ArrayList<String>();
+    }
+
+    private void setGroup1Vs2Counter( final int group1_vs_2_counter ) {
+        _group1_vs_2_counter = group1_vs_2_counter;
+    }
+
+    private void setSpecies( final HashMap<String, HashMap<Object, Integer>> species ) {
+        _species = species;
+    }
+
+    private void setUpCountingMatrix() {
+        if ( ( getSpecies() == null ) || ( getSpecies().size() < 2 ) ) {
+            throw new RuntimeException( "Species HashMap in method \"setUpCountingMatrix()\" is null or contains less than two species." );
+        }
+        final Object[] species_array = getSpecies().keySet().toArray();
+        final int s = species_array.length;
+        for( int i = 0; i < s; ++i ) {
+            final String species = ( String ) species_array[ i ];
+            final HashMap<Object, Integer> h = new HashMap<Object, Integer>();
+            // Setting up HashMaps linked to by hash (=_species)
+            // Diagonals are ignored, only half the matrix is needed.
+            for( int j = 1 + i; j < s; ++j ) {
+                h.put( species_array[ j ], new Integer( 0 ) );
+            }
+            getSpecies().put( species, h );
+        }
+    }
+
+    private static void errorInCommandLine() {
+        System.out.println( "\nORcount: Error in command line.\n" );
+        System.out.println( "Usage: \"\"" );
+        System.out.println( "\nOptions:" );
+        System.out.println( " -" );
+        System.out.println( "" );
+        System.exit( -1 );
+    } // errorInCommandLine()
+
+    /**
+     * Main method for this class.
+     * <p>
+     * (Last modified: 11/26/03)
+     * 
+     * @param args[1or2]
+     *            gene tree file name (in NHX format with species names in
+     *            species name fields and sequence names in sequence name
+     *            fields; unless -n option is used)
+     */
+    public static void main( final String args[] ) {
+        if ( args.length == 0 ) {
+            ORcount.errorInCommandLine();
+        }
+        final Phylogeny[] trees = new Phylogeny[ args.length ];
+        final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
+        for( int i = 0; i < trees.length; ++i ) {
+            try {
+                System.out.println( "Reading tree #" + i + "  [" + args[ i ] + "]" );
+                final PhylogenyParser pp = ForesterUtil.createParserDependingOnFileType( new File( args[ i ] ), true );
+                trees[ i ] = factory.create( new File( args[ i ] ), pp )[ 0 ];
+            }
+            catch ( final Exception e ) {
+                System.out.println( "\nFailed to read \"" + args[ i ] + "\". Terminating.\n" );
+                System.exit( -1 );
+            }
+        }
+        System.out.println( "Finished reading in trees.\n\n" );
+        final ORcount or_count = new ORcount( trees );
+        try {
+            System.out.println( "\n\n\n\"1:1 ORTHOLOGOUS GENE PAIRS\":\n" );
+            System.out.println( "\n\n\n\"SUPER ORTHOLOGOUS GENE PAIRS\":\n" );
+            or_count.countSuperOrthologousRelations( 0 );
+            or_count.printNames();
+            or_count.printCount();
+            // System.out.println( "\n\n\n\"SHARED ANCESTRAL CLADES\":\n");
+            // or_count.reset();
+            // or_count.countSharedAncestralClades( trees, 0 );
+        }
+        catch ( final Exception e ) {
+            System.out.println( "\nException. Terminating.\n" );
+            System.out.println( "\nException is: " + e + "\n" );
+            e.printStackTrace();
+            System.exit( -1 );
+        }
+        System.out.println( "\nDone." );
+        System.exit( 0 );
+    } // main ( String )
+} // End of class ORcount.
diff --git a/forester/java/src/org/forester/sdi/RIO.java b/forester/java/src/org/forester/sdi/RIO.java

new file mode 100644 (file)

index 0000000..d2c79ee
--- /dev/null
+++ b/forester/java/src/org/forester/sdi/RIO.java
@@ -0,0 +1,1126 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// Copyright (C) 2000-2001 Washington University School of Medicine
+// and Howard Hughes Medical Institute
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.sdi;
+
+import java.io.File;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+
+import org.forester.evoinference.matrix.distance.DistanceMatrix;
+import org.forester.io.parsers.SymmetricalDistanceMatrixParser;
+import org.forester.io.parsers.phyloxml.PhyloXmlParser;
+import org.forester.phylogeny.Phylogeny;
+import org.forester.phylogeny.PhylogenyMethods;
+import org.forester.phylogeny.PhylogenyNode;
+import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
+import org.forester.phylogeny.factories.PhylogenyFactory;
+import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
+import org.forester.util.ForesterUtil;
+
+/*
+ * @author Christian M. Zmasek
+ */
+public final class RIO {
+
+    private final static boolean                      ROOT_BY_MINIMIZING_MAPPING_COST = false;
+    private final static boolean                      ROOT_BY_MINIMIZING_SUM_OF_DUPS  = true;
+    private final static boolean                      ROOT_BY_MINIMIZING_TREE_HEIGHT  = true;
+    private final static boolean                      TIME                            = false;
+    private HashMap<String, HashMap<String, Integer>> _o_hash_maps;
+    private HashMap<String, HashMap<String, Integer>> _so_hash_maps;
+    private HashMap<String, HashMap<String, Integer>> _up_hash_maps;
+    private HashMap<String, HashMap<String, Integer>> _sn_hash_maps;                          // HashMap of HashMaps
+    private DistanceMatrix                            _m;
+    private HashMap<String, Double>                   _l;
+    private String[]                                  _seq_names;
+    private int                                       _bootstraps;
+    private int                                       _ext_nodes_;
+    private long                                      _time;
+
+    /**
+     * Default constructor.
+     */
+    public RIO() {
+        reset();
+    }
+
+    /**
+     * Returns the numbers of trees analyzed.
+     * 
+     * @return the numbers of trees analyzed
+     */
+    public final int getBootstraps() {
+        return _bootstraps;
+    }
+
+    // Helper method for inferredOrthologsToString.
+    // inferredOrthologsToArrayList,
+    // and inferredUltraParalogsToString.
+    private final double getBootstrapValueFromHash( final HashMap<String, Integer> h, final String name ) {
+        if ( !h.containsKey( name ) ) {
+            return 0.0;
+        }
+        final int i = h.get( name );
+        return ( i * 100.0 / getBootstraps() );
+    }
+
+    /**
+     * Returns the distance to a sequences/taxa after a distance list file has
+     * been read in with readDistanceList(File). Throws an exception if name is
+     * not found or if no list has been read in.
+     * 
+     * @param name
+     *            a sequence name
+     */
+    public final double getDistance( String name ) {
+        double distance = 0.0;
+        name = name.trim();
+        if ( _l == null ) {
+            throw new RuntimeException( "Distance list has probably not been read in (successfully)." );
+        }
+        if ( _l.get( name ) == null ) {
+            throw new IllegalArgumentException( name + " not found." );
+        }
+        distance = ( _l.get( name ) ).doubleValue();
+        return distance;
+    }
+
+    public final double getDistance( final String name1, final String name2 ) {
+        try {
+            return _m.getValue( _m.getIndex( name1 ), _m.getIndex( name2 ) );
+        }
+        catch ( final Exception e ) {
+            return 1;
+        }
+    }
+
+    /**
+     * Returns the numbers of number of ext nodes in gene trees analyzed (after
+     * stripping).
+     * 
+     * @return number of ext nodes in gene trees analyzed (after stripping)
+     */
+    public final int getExtNodesOfAnalyzedGeneTrees() {
+        return _ext_nodes_;
+    }
+
+    /**
+     * Returns a HashMap containing the inferred orthologs of the external gene
+     * tree node with the sequence name seq_name. Sequence names are the keys
+     * (String), numbers of observations are the values (Int). Orthologs are to
+     * be inferred by method "inferOrthologs". Throws an exception if seq_name
+     * is not found.
+     * 
+     * @param seq_name
+     *            sequence name of a external node of the gene trees
+     * @return HashMap containing the inferred orthologs
+     *         (name(String)->value(Int))
+     */
+    public final HashMap<String, Integer> getInferredOrthologs( final String seq_name ) {
+        if ( _o_hash_maps == null ) {
+            return null;
+        }
+        return _o_hash_maps.get( seq_name );
+    }
+
+    private final HashMap<String, Integer> getInferredSubtreeNeighbors( final String seq_name ) {
+        if ( _sn_hash_maps == null ) {
+            return null;
+        }
+        return _sn_hash_maps.get( seq_name );
+    }
+
+    /**
+     * Returns a HashMap containing the inferred "super orthologs" of the
+     * external gene tree node with the sequence name seq_name. Sequence names
+     * are the keys (String), numbers of observations are the values (Int).
+     * Super orthologs are to be inferred by method "inferOrthologs". Throws an
+     * exception if seq_name is not found.
+     * 
+     * @param seq_name
+     *            sequence name of a external node of the gene trees
+     * @return HashMap containing the inferred super orthologs
+     *         (name(String)->value(Int))
+     */
+    public final HashMap<String, Integer> getInferredSuperOrthologs( final String seq_name ) {
+        if ( _so_hash_maps == null ) {
+            return null;
+        }
+        return _so_hash_maps.get( seq_name );
+    }
+
+    /**
+     * Returns a HashMap containing the inferred "ultra paralogs" of the
+     * external gene tree node with the sequence name seq_name. Sequence names
+     * are the keys (String), numbers of observations are the values (Int).
+     * "ultra paralogs" are to be inferred by method "inferOrthologs". Throws an
+     * exception if seq_name is not found. 
+     * 
+     * @param seq_name
+     *            sequence name of a external node of the gene trees
+     * @return HashMap containing the inferred ultra paralogs
+     *         (name(String)->value(Int))
+     */
+    public final HashMap<String, Integer> getInferredUltraParalogs( final String seq_name ) {
+        if ( _up_hash_maps == null ) {
+            return null;
+        }
+        return _up_hash_maps.get( seq_name );
+    }
+
+    /**
+     * Returns the time (in ms) needed to run "inferOrthologs". Final variable
+     * TIME needs to be set to true.
+     * 
+     * @return time (in ms) needed to run method "inferOrthologs"
+     */
+    public long getTime() {
+        return _time;
+    }
+
+    /**
+     * Infers the orthologs (as well the "super orthologs", the "subtree
+     * neighbors", and the "ultra paralogs") for each external node of the gene
+     * Trees in multiple tree File gene_trees_file (=output of PHYLIP NEIGHBOR,
+     * for example). Tallies how many times each sequence is (super-)
+     * orthologous towards the query. Tallies how many times each sequence is
+     * ultra paralogous towards the query. Tallies how many times each sequence
+     * is a subtree neighbor of the query. Gene duplications are inferred using
+     * SDI. Modifies its argument species_tree. Is a little faster than
+     * "inferOrthologs(File,Phylogeny)" since orthologs are only inferred for
+     * query.
+     * <p>
+     * To obtain the results use the methods listed below.
+     * 
+     * @param gene_trees_file
+     *            a File containing gene Trees in NH format, which is the result
+     *            of performing a bootstrap analysis in PHYLIP
+     * @param species_tree
+     *            a species Phylogeny, which has species names in its species
+     *            fields
+     * @param query
+     *            the sequence name of the squence whose orthologs are to be
+     *            inferred
+     */
+    public void inferOrthologs( final File gene_trees_file, final Phylogeny species_tree, final String query )
+            throws IOException {
+        int bs = 0;
+        if ( RIO.TIME ) {
+            _time = System.currentTimeMillis();
+        }
+        if ( !gene_trees_file.exists() ) {
+            throw new IllegalArgumentException( gene_trees_file.getAbsolutePath() + " does not exist." );
+        }
+        else if ( !gene_trees_file.isFile() ) {
+            throw new IllegalArgumentException( gene_trees_file.getAbsolutePath() + " is not a file." );
+        }
+        // Read in first tree to get its sequence names
+        // and strip species_tree.
+        final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
+        final Phylogeny gene_tree = factory.create( gene_trees_file, new PhyloXmlParser() )[ 0 ];
+        // Removes from species_tree all species not found in gene_tree.
+        PhylogenyMethods.taxonomyBasedDeletionOfExternalNodes( gene_tree, species_tree );
+        // Removes from gene_tree all species not found in species_tree.
+        PhylogenyMethods.taxonomyBasedDeletionOfExternalNodes( species_tree, gene_tree );
+        _seq_names = getAllExternalSequenceNames( gene_tree );
+        if ( ( _seq_names == null ) || ( _seq_names.length < 1 ) ) {
+            return;
+        }
+        _o_hash_maps = new HashMap<String, HashMap<String, Integer>>();
+        _so_hash_maps = new HashMap<String, HashMap<String, Integer>>();
+        _up_hash_maps = new HashMap<String, HashMap<String, Integer>>();
+        _sn_hash_maps = new HashMap<String, HashMap<String, Integer>>();
+        _o_hash_maps.put( query, new HashMap<String, Integer>( _seq_names.length ) );
+        _so_hash_maps.put( query, new HashMap<String, Integer>( _seq_names.length ) );
+        _up_hash_maps.put( query, new HashMap<String, Integer>( _seq_names.length ) );
+        _sn_hash_maps.put( query, new HashMap<String, Integer>( _seq_names.length ) );
+        // Go through all gene trees in the file.
+        final Phylogeny[] gene_trees = factory.create( gene_trees_file, new PhyloXmlParser() );
+        for( final Phylogeny gt : gene_trees ) {
+            bs++;
+            // Removes from gene_tree all species not found in species_tree.
+            PhylogenyMethods.taxonomyBasedDeletionOfExternalNodes( species_tree, gt );
+            inferOrthologsHelper( gt, species_tree, query );
+            // System.out.println( bs );
+        }
+        setBootstraps( bs );
+        if ( RIO.TIME ) {
+            _time = ( System.currentTimeMillis() - _time );
+        }
+    }
+
+    // Helper method which performs the actual ortholog inference for
+    // the external node with seqname query.
+    private void inferOrthologsHelper( final Phylogeny gene_tree, final Phylogeny species_tree, final String query ) {
+        Phylogeny assigned_tree = null;
+        List<PhylogenyNode> nodes = null;
+        final SDIR sdiunrooted = new SDIR();
+        List<PhylogenyNode> orthologs = null;
+        List<PhylogenyNode> super_orthologs = null;
+        List<PhylogenyNode> ultra_paralogs = null;
+        List<PhylogenyNode> subtree_neighbors = null;
+        assigned_tree = sdiunrooted.infer( gene_tree,
+                                           species_tree,
+                                           RIO.ROOT_BY_MINIMIZING_MAPPING_COST,
+                                           RIO.ROOT_BY_MINIMIZING_SUM_OF_DUPS,
+                                           RIO.ROOT_BY_MINIMIZING_TREE_HEIGHT,
+                                           true,
+                                           1 )[ 0 ];
+        setExtNodesOfAnalyzedGeneTrees( assigned_tree.getNumberOfExternalNodes() );
+        nodes = assigned_tree.getNodesViaSequenceName( query );
+        if ( nodes.size() > 1 ) {
+            throw new IllegalArgumentException( "node named [" + query + "] not unique" );
+        }
+        else if ( nodes.isEmpty() ) {
+            throw new IllegalArgumentException( "no node containing a sequence named [" + query + "] found" );
+        }
+        final PhylogenyNode query_node = nodes.get( 0 );
+        final PhylogenyMethods methods = PhylogenyMethods.getInstance();
+        orthologs = methods.getOrthologousNodes( assigned_tree, query_node );
+        updateHash( _o_hash_maps, query, orthologs );
+        super_orthologs = PhylogenyMethods.getSuperOrthologousNodes( query_node );
+        updateHash( _so_hash_maps, query, super_orthologs );
+        subtree_neighbors = getSubtreeNeighbors( query_node, 2 );
+        updateHash( _sn_hash_maps, query, subtree_neighbors );
+        ultra_paralogs = PhylogenyMethods.getUltraParalogousNodes( query_node );
+        updateHash( _up_hash_maps, query, ultra_paralogs );
+    }
+
+    /**
+     * Returns an ArrayList containg the names of orthologs of the PhylogenyNode
+     * with seq name seq_name.
+     * 
+     * @param seq_name
+     *            sequence name of a external node of the gene trees
+     * @param threshold_orthologs
+     *            the minimal number of observations for a a sequence to be
+     *            reported as orthologous as percentage (0.0-100.0%)
+     * @return ArrayList containg the names of orthologs of the PhylogenyNode
+     *         with seq name seq_name
+     */
+    public ArrayList<String> inferredOrthologsToArrayList( final String seq_name, double threshold_orthologs ) {
+        HashMap<String, Integer> o_hashmap = null;
+        String name = null;
+        double o = 0.0;
+        final ArrayList<String> arraylist = new ArrayList<String>();
+        if ( _o_hash_maps == null ) {
+            throw new RuntimeException( "Orthologs have not been calculated (successfully)." );
+        }
+        if ( threshold_orthologs < 0.0 ) {
+            threshold_orthologs = 0.0;
+        }
+        else if ( threshold_orthologs > 100.0 ) {
+            threshold_orthologs = 100.0;
+        }
+        o_hashmap = getInferredOrthologs( seq_name );
+        if ( o_hashmap == null ) {
+            throw new RuntimeException( "Orthologs for " + seq_name + " were not established." );
+        }
+        if ( _seq_names.length > 0 ) {
+            I: for( int i = 0; i < _seq_names.length; ++i ) {
+                name = _seq_names[ i ];
+                if ( name.equals( seq_name ) ) {
+                    continue I;
+                }
+                o = getBootstrapValueFromHash( o_hashmap, name );
+                if ( o < threshold_orthologs ) {
+                    continue I;
+                }
+                arraylist.add( name );
+            }
+        }
+        return arraylist;
+    }
+
+    /**
+     * Returns a String containg the names of orthologs of the PhylogenyNode
+     * with seq name query_name. The String also contains how many times a
+     * particular ortholog has been observed.
+     * <p>
+     * <ul>
+     * The output order is (per line): Name, Ortholog, Subtree neighbor, Super
+     * ortholog, Distance
+     * </ul>
+     * <p>
+     * The sort priority of this is determined by sort in the following manner:
+     * <ul>
+     * <li>0 : Ortholog
+     * <li>1 : Ortholog, Super ortholog
+     * <li>2 : Super ortholog, Ortholog
+     * <li>3 : Ortholog, Distance
+     * <li>4 : Distance, Ortholog
+     * <li>5 : Ortholog, Super ortholog, Distance
+     * <li>6 : Ortholog, Distance, Super ortholog
+     * <li>7 : Super ortholog, Ortholog, Distance
+     * <li>8 : Super ortholog, Distance, Ortholog
+     * <li>9 : Distance, Ortholog, Super ortholog
+     * <li>10 : Distance, Super ortholog, Ortholog
+     * <li>11 : Ortholog, Subtree neighbor, Distance
+     * <li>12 : Ortholog, Subtree neighbor, Super ortholog, Distance (default)
+     * <li>13 : Ortholog, Super ortholog, Subtree neighbor, Distance
+     * <li>14 : Subtree neighbor, Ortholog, Super ortholog, Distance
+     * <li>15 : Subtree neighbor, Distance, Ortholog, Super ortholog
+     * <li>16 : Ortholog, Distance, Subtree neighbor, Super ortholog
+     * <li>17 : Ortholog, Subtree neighbor, Distance, Super ortholog
+     * </ul>
+     * <p>
+     * Returns "-" if no putative orthologs have been found (given
+     * threshold_orthologs).
+     * <p>
+     * Orthologs are to be inferred by method "inferOrthologs".
+     * <p>
+     * (Last modified: 05/08/01)
+     * 
+     * @param query_name
+     *            sequence name of a external node of the gene trees
+     * @param sort
+     *            order and sort priority
+     * @param threshold_orthologs
+     *            the minimal number of observations for a a sequence to be
+     *            reported as orthologous, in percents (0.0-100.0%)
+     * @param threshold_subtreeneighborings
+     *            the minimal number of observations for a a sequence to be
+     *            reported as orthologous, in percents (0.0-100.0%)
+     * @return String containing the inferred orthologs, String containing "-"
+     *         if no orthologs have been found null in case of error
+     * @see #inferOrthologs(File,Phylogeny,String)
+     * @see #inferOrthologs(Phylogeny[],Phylogeny)
+     * @see #inferOrthologs(File,Phylogeny)
+     * @see #getOrder(int)
+     */
+    public StringBuffer inferredOrthologsToString( final String query_name,
+                                                   int sort,
+                                                   double threshold_orthologs,
+                                                   double threshold_subtreeneighborings ) {
+        HashMap<String, Integer> o_hashmap = null;
+        HashMap<String, Integer> s_hashmap = null;
+        HashMap<String, Integer> n_hashmap = null;
+        String name = "";
+        double o = 0.0, // Orthologs.
+        s = 0.0, // Super orthologs.
+        sn = 0.0, // Subtree neighbors.
+        value1 = 0.0, value2 = 0.0, value3 = 0.0, value4 = 0.0, d = 0.0;
+        final ArrayList<Tuplet> nv = new ArrayList<Tuplet>();
+        if ( ( _o_hash_maps == null ) || ( _so_hash_maps == null ) || ( _sn_hash_maps == null ) ) {
+            throw new RuntimeException( "Orthologs have not been calculated (successfully)" );
+        }
+        if ( ( sort < 0 ) || ( sort > 17 ) ) {
+            sort = 12;
+        }
+        if ( ( sort > 2 ) && ( _m == null ) && ( _l == null ) ) {
+            throw new RuntimeException( "Distance list or matrix have not been read in (successfully)" );
+        }
+        if ( threshold_orthologs < 0.0 ) {
+            threshold_orthologs = 0.0;
+        }
+        else if ( threshold_orthologs > 100.0 ) {
+            threshold_orthologs = 100.0;
+        }
+        if ( threshold_subtreeneighborings < 0.0 ) {
+            threshold_subtreeneighborings = 0.0;
+        }
+        else if ( threshold_subtreeneighborings > 100.0 ) {
+            threshold_subtreeneighborings = 100.0;
+        }
+        o_hashmap = getInferredOrthologs( query_name );
+        s_hashmap = getInferredSuperOrthologs( query_name );
+        n_hashmap = getInferredSubtreeNeighbors( query_name );
+        if ( ( o_hashmap == null ) || ( s_hashmap == null ) || ( n_hashmap == null ) ) {
+            throw new RuntimeException( "Orthologs for " + query_name + " were not established" );
+        }
+        final StringBuffer orthologs = new StringBuffer();
+        if ( _seq_names.length > 0 ) {
+            I: for( int i = 0; i < _seq_names.length; ++i ) {
+                name = _seq_names[ i ];
+                if ( name.equals( query_name ) ) {
+                    continue I;
+                }
+                o = getBootstrapValueFromHash( o_hashmap, name );
+                if ( o < threshold_orthologs ) {
+                    continue I;
+                }
+                sn = getBootstrapValueFromHash( n_hashmap, name );
+                if ( sn < threshold_subtreeneighborings ) {
+                    continue I;
+                }
+                s = getBootstrapValueFromHash( s_hashmap, name );
+                if ( sort >= 3 ) {
+                    if ( _m != null ) {
+                        d = getDistance( query_name, name );
+                    }
+                    else {
+                        d = getDistance( name );
+                    }
+                }
+                switch ( sort ) {
+                    case 0:
+                        nv.add( new Tuplet( name, o, 5 ) );
+                        break;
+                    case 1:
+                        nv.add( new Tuplet( name, o, s, 5 ) );
+                        break;
+                    case 2:
+                        nv.add( new Tuplet( name, s, o, 5 ) );
+                        break;
+                    case 3:
+                        nv.add( new Tuplet( name, o, d, 1 ) );
+                        break;
+                    case 4:
+                        nv.add( new Tuplet( name, d, o, 0 ) );
+                        break;
+                    case 5:
+                        nv.add( new Tuplet( name, o, s, d, 2 ) );
+                        break;
+                    case 6:
+                        nv.add( new Tuplet( name, o, d, s, 1 ) );
+                        break;
+                    case 7:
+                        nv.add( new Tuplet( name, s, o, d, 2 ) );
+                        break;
+                    case 8:
+                        nv.add( new Tuplet( name, s, d, o, 1 ) );
+                        break;
+                    case 9:
+                        nv.add( new Tuplet( name, d, o, s, 0 ) );
+                        break;
+                    case 10:
+                        nv.add( new Tuplet( name, d, s, o, 0 ) );
+                        break;
+                    case 11:
+                        nv.add( new Tuplet( name, o, sn, d, 2 ) );
+                        break;
+                    case 12:
+                        nv.add( new Tuplet( name, o, sn, s, d, 3 ) );
+                        break;
+                    case 13:
+                        nv.add( new Tuplet( name, o, s, sn, d, 3 ) );
+                        break;
+                    case 14:
+                        nv.add( new Tuplet( name, sn, o, s, d, 3 ) );
+                        break;
+                    case 15:
+                        nv.add( new Tuplet( name, sn, d, o, s, 1 ) );
+                        break;
+                    case 16:
+                        nv.add( new Tuplet( name, o, d, sn, s, 1 ) );
+                        break;
+                    case 17:
+                        nv.add( new Tuplet( name, o, sn, d, s, 2 ) );
+                        break;
+                    default:
+                        nv.add( new Tuplet( name, o, 5 ) );
+                }
+            } // End of I for loop.
+            if ( ( nv != null ) && ( nv.size() > 0 ) ) {
+                orthologs.append( "[seq name]\t\t[ortho]\t[st-n]\t[sup-o]\t[dist]" + ForesterUtil.LINE_SEPARATOR );
+                final Tuplet[] nv_array = new Tuplet[ nv.size() ];
+                for( int j = 0; j < nv.size(); ++j ) {
+                    nv_array[ j ] = nv.get( j );
+                }
+                Arrays.sort( nv_array );
+                for( int i = 0; i < nv_array.length; ++i ) {
+                    name = nv_array[ i ].getKey();
+                    value1 = nv_array[ i ].getValue1();
+                    value2 = nv_array[ i ].getValue2();
+                    value3 = nv_array[ i ].getValue3();
+                    value4 = nv_array[ i ].getValue4();
+                    orthologs.append( addNameAndValues( name, value1, value2, value3, value4, sort ) );
+                }
+            }
+        }
+        // No orthologs found.
+        if ( ( orthologs == null ) || ( orthologs.length() < 1 ) ) {
+            orthologs.append( "-" );
+        }
+        return orthologs;
+    } // inferredOrthologsToString( String, int, double )
+
+    // Helper method for inferredOrthologTableToFile.
+    // Returns individual rows for the table as String.
+    private String inferredOrthologsToTableHelper( final String name2,
+                                                   final String[] names,
+                                                   final int j,
+                                                   final boolean super_orthologs ) {
+        HashMap<String, Integer> hashmap = null;
+        String name = null, orthologs = new String( "" );
+        int value = 0;
+        if ( !super_orthologs ) {
+            hashmap = getInferredOrthologs( name2 );
+        }
+        else {
+            hashmap = getInferredSuperOrthologs( name2 );
+        }
+        if ( hashmap == null ) {
+            throw new RuntimeException( "Unexpected failure in method inferredOrthologsToTableHelper" );
+        }
+        for( int i = 0; i < names.length; ++i ) {
+            name = names[ i ];
+            if ( !hashmap.containsKey( name ) ) {
+                value = 0;
+            }
+            else {
+                value = hashmap.get( name );
+            }
+            if ( i == j ) {
+                // Sanity check.
+                if ( value != 0 ) {
+                    throw new RuntimeException( "Failed sanity check in method inferredOrthologsToTableHelper: value not 0." );
+                }
+                orthologs += ( " " + "\t" );
+            }
+            else {
+                orthologs += ( value + "\t" );
+            }
+        }
+        return orthologs;
+    }
+
+    /**
+     * Writes the orthologs for each external node of the gene trees to outfile
+     * in the form of a table. Orthologs are to be inferred by method
+     * "inferOrthologs". Overwrites without asking! (Last modified: 12/07/00)
+     * 
+     * @param outfile
+     *            the File to write to
+     */
+    public void inferredOrthologTableToFile( final File outfile ) throws IOException {
+        if ( _o_hash_maps == null ) {
+            return;
+        }
+        inferredOrthologTableToFile( outfile, false );
+    }
+
+    // Helper for inferredOrthologTableToFile(File).
+    // (Last modified: 11/28/00)
+    private void inferredOrthologTableToFile( final File outfile, final boolean super_orthologs ) throws IOException {
+        String name = "", line = "";
+        PrintWriter out = null;
+        if ( _seq_names == null ) {
+            throw new RuntimeException( "inferredOrthologTableToFile: seq_names_ is null." );
+        }
+        Arrays.sort( _seq_names );
+        out = new PrintWriter( new FileWriter( outfile ), true );
+        if ( out == null ) {
+            throw new RuntimeException( "inferredOrthologTableToFile: failure to create PrintWriter." );
+        }
+        line = "\t\t\t\t";
+        for( int i = 0; i < _seq_names.length; ++i ) {
+            line += ( i + ")\t" );
+        }
+        line += "\n";
+        out.println( line );
+        for( int i = 0; i < _seq_names.length; ++i ) {
+            name = _seq_names[ i ];
+            if ( name.length() < 8 ) {
+                line = i + ")\t" + name + "\t\t\t";
+            }
+            else if ( name.length() < 16 ) {
+                line = i + ")\t" + name + "\t\t";
+            }
+            else {
+                line = i + ")\t" + name + "\t";
+            }
+            line += inferredOrthologsToTableHelper( name, _seq_names, i, super_orthologs );
+            out.println( line );
+        }
+        out.close();
+    }
+
+    /**
+     * Writes the "super orthologs" for each external nodes of the gene trees to
+     * outfile in the form of a table. Super orthologs are to be inferred by
+     * method "inferOrthologs". Overwrites without asking!
+     * 
+     * @param outfile
+     *            the File to write to
+     */
+    public void inferredSuperOrthologTableToFile( final File outfile ) throws IOException {
+        if ( _so_hash_maps == null ) {
+            return;
+        }
+        inferredOrthologTableToFile( outfile, true );
+    }
+
+    /**
+     * Returns a String containg the names of orthologs of the PhylogenyNode
+     * with seq name query_name. The String also contains how many times a
+     * particular ortholog has been observed. Returns "-" if no putative
+     * orthologs have been found (given threshold_orthologs).
+     * <p>
+     * Orthologs are to be inferred by method "inferOrthologs".
+     * 
+     * @param query_name
+     *            sequence name of a external node of the gene trees
+     * @param return_dists
+     * @param threshold_ultra_paralogs
+     *            between 1 and 100
+     * @return String containing the inferred orthologs, String containing "-"
+     *         if no orthologs have been found null in case of error
+     */
+    public String inferredUltraParalogsToString( final String query_name,
+                                                 final boolean return_dists,
+                                                 double threshold_ultra_paralogs ) {
+        HashMap<String, Integer> sp_hashmap = null;
+        String name = "", ultra_paralogs = "";
+        int sort = 0;
+        double sp = 0.0, value1 = 0.0, value2 = 0.0, d = 0.0;
+        final List<Tuplet> nv = new ArrayList<Tuplet>();
+        if ( threshold_ultra_paralogs < 1.0 ) {
+            threshold_ultra_paralogs = 1.0;
+        }
+        else if ( threshold_ultra_paralogs > 100.0 ) {
+            threshold_ultra_paralogs = 100.0;
+        }
+        if ( _up_hash_maps == null ) {
+            throw new RuntimeException( "Ultra paralogs have not been calculated (successfully)." );
+        }
+        if ( return_dists && ( _m == null ) && ( _l == null ) ) {
+            throw new RuntimeException( "Distance list or matrix have not been read in (successfully)." );
+        }
+        sp_hashmap = getInferredUltraParalogs( query_name );
+        if ( sp_hashmap == null ) {
+            throw new RuntimeException( "Ultra paralogs for " + query_name + " were not established" );
+        }
+        if ( _seq_names.length > 0 ) {
+            I: for( int i = 0; i < _seq_names.length; ++i ) {
+                name = _seq_names[ i ];
+                if ( name.equals( query_name ) ) {
+                    continue I;
+                }
+                sp = getBootstrapValueFromHash( sp_hashmap, name );
+                if ( sp < threshold_ultra_paralogs ) {
+                    continue I;
+                }
+                if ( return_dists ) {
+                    if ( _m != null ) {
+                        d = getDistance( query_name, name );
+                    }
+                    else {
+                        d = getDistance( name );
+                    }
+                    nv.add( new Tuplet( name, sp, d, 1 ) );
+                }
+                else {
+                    nv.add( new Tuplet( name, sp, 5 ) );
+                }
+            } // End of I for loop.
+            if ( ( nv != null ) && ( nv.size() > 0 ) ) {
+                final Tuplet[] nv_array = new Tuplet[ nv.size() ];
+                for( int j = 0; j < nv.size(); ++j ) {
+                    nv_array[ j ] = nv.get( j );
+                }
+                Arrays.sort( nv_array );
+                if ( return_dists ) {
+                    sort = 91;
+                }
+                else {
+                    sort = 90;
+                }
+                for( int i = 0; i < nv_array.length; ++i ) {
+                    name = nv_array[ i ].getKey();
+                    value1 = nv_array[ i ].getValue1();
+                    value2 = nv_array[ i ].getValue2();
+                    ultra_paralogs += addNameAndValues( name, value1, value2, 0.0, 0.0, sort );
+                }
+            }
+        }
+        // No ultra paralogs found.
+        if ( ( ultra_paralogs == null ) || ( ultra_paralogs.length() < 1 ) ) {
+            ultra_paralogs = "-";
+        }
+        return ultra_paralogs;
+    }
+
+    public final void readDistanceMatrix( final File matrix_file ) throws IOException {
+        DistanceMatrix[] matrices = null;
+        final SymmetricalDistanceMatrixParser parser = SymmetricalDistanceMatrixParser.createInstance();
+        matrices = parser.parse( matrix_file );
+        if ( ( matrices == null ) || ( matrices.length == 0 ) ) {
+            throw new IOException( "failed to parse distance matrix from [" + matrix_file + "]" );
+        }
+        if ( matrices.length > 1 ) {
+            throw new IOException( "[" + matrix_file + "] contains more than once distance matrix" );
+        }
+        _m = matrices[ 0 ];
+    }
+
+    /**
+     * Brings this into the same state as immediately after construction.
+     */
+    private final void reset() {
+        _o_hash_maps = null;
+        _so_hash_maps = null;
+        _up_hash_maps = null;
+        _seq_names = null;
+        _m = null;
+        _l = null;
+        _bootstraps = 1;
+        _ext_nodes_ = 0;
+        _time = 0;
+    }
+
+    /**
+     * Sets the numbers of trees analyzed.
+     * @param the
+     *            numbers of trees analyzed
+     */
+    private void setBootstraps( int i ) {
+        if ( i < 1 ) {
+            i = 1;
+        }
+        _bootstraps = i;
+    }
+
+    /**
+     * Sets number of ext nodes in gene trees analyzed (after stripping).
+     * @param the
+     *            number of ext nodes in gene trees analyzed (after stripping)
+     */
+    private void setExtNodesOfAnalyzedGeneTrees( int i ) {
+        if ( i < 1 ) {
+            i = 0;
+        }
+        _ext_nodes_ = i;
+    }
+
+    // Helper for doInferOrthologs( Phylogeny, Phylogeny, String )
+    // and doInferOrthologs( Phylogeny, Phylogeny ).
+    private void updateHash( final HashMap<String, HashMap<String, Integer>> counter_map,
+                             final String query_seq_name,
+                             final List<PhylogenyNode> nodes ) {
+        final HashMap<String, Integer> hash_map = counter_map.get( query_seq_name );
+        if ( hash_map == null ) {
+            throw new RuntimeException( "Unexpected failure in method updateHash." );
+        }
+        for( int j = 0; j < nodes.size(); ++j ) {
+            final String seq_name = ( nodes.get( j ) ).getNodeData().getSequence().getName();
+            if ( hash_map.containsKey( seq_name ) ) {
+                hash_map.put( seq_name, hash_map.get( seq_name ) + 1 );
+            }
+            else {
+                hash_map.put( seq_name, 1 );
+            }
+        }
+    }
+
+    // Helper method for inferredOrthologsToString
+    // and inferredUltraParalogsToString.
+    private final static String addNameAndValues( final String name,
+                                                  final double value1,
+                                                  final double value2,
+                                                  final double value3,
+                                                  final double value4,
+                                                  final int sort ) {
+        final java.text.DecimalFormat df = new java.text.DecimalFormat( "0.#####" );
+        df.setDecimalSeparatorAlwaysShown( false );
+        String line = "";
+        if ( name.length() < 8 ) {
+            line += ( name + "\t\t\t" );
+        }
+        else if ( name.length() < 16 ) {
+            line += ( name + "\t\t" );
+        }
+        else {
+            line += ( name + "\t" );
+        }
+        switch ( sort ) {
+            case 0:
+                line += addToLine( value1, df );
+                line += "-\t";
+                line += "-\t";
+                line += "-\t";
+                break;
+            case 1:
+                line += addToLine( value1, df );
+                line += "-\t";
+                line += addToLine( value2, df );
+                line += "-\t";
+                break;
+            case 2:
+                line += addToLine( value2, df );
+                line += "-\t";
+                line += addToLine( value1, df );
+                line += "-\t";
+                break;
+            case 3:
+                line += addToLine( value1, df );
+                line += "-\t";
+                line += "-\t";
+                line += addToLine( value2, df );
+                break;
+            case 4:
+                line += addToLine( value2, df );
+                line += "-\t";
+                line += "-\t";
+                line += addToLine( value1, df );
+                break;
+            case 5:
+                line += addToLine( value1, df );
+                line += "-\t";
+                line += addToLine( value2, df );
+                line += addToLine( value3, df );
+                break;
+            case 6:
+                line += addToLine( value1, df );
+                line += "-\t";
+                line += addToLine( value3, df );
+                line += addToLine( value2, df );
+                break;
+            case 7:
+                line += addToLine( value2, df );
+                line += "-\t";
+                line += addToLine( value1, df );
+                line += addToLine( value3, df );
+                break;
+            case 8:
+                line += addToLine( value3, df );
+                line += "-\t";
+                line += addToLine( value1, df );
+                line += addToLine( value2, df );
+                break;
+            case 9:
+                line += addToLine( value2, df );
+                line += "-\t";
+                line += addToLine( value3, df );
+                line += addToLine( value1, df );
+                break;
+            case 10:
+                line += addToLine( value3, df );
+                line += "-\t";
+                line += addToLine( value2, df );
+                line += addToLine( value1, df );
+                break;
+            case 11:
+                line += addToLine( value1, df );
+                line += addToLine( value2, df );
+                line += "-\t";
+                line += addToLine( value3, df );
+                break;
+            case 12:
+                line += addToLine( value1, df );
+                line += addToLine( value2, df );
+                line += addToLine( value3, df );
+                line += addToLine( value4, df );
+                break;
+            case 13:
+                line += addToLine( value1, df );
+                line += addToLine( value3, df );
+                line += addToLine( value2, df );
+                line += addToLine( value4, df );
+                break;
+            case 14:
+                line += addToLine( value2, df );
+                line += addToLine( value1, df );
+                line += addToLine( value3, df );
+                line += addToLine( value4, df );
+                break;
+            case 15:
+                line += addToLine( value3, df );
+                line += addToLine( value1, df );
+                line += addToLine( value4, df );
+                line += addToLine( value2, df );
+                break;
+            case 16:
+                line += addToLine( value1, df );
+                line += addToLine( value3, df );
+                line += addToLine( value4, df );
+                line += addToLine( value2, df );
+                break;
+            case 17:
+                line += addToLine( value1, df );
+                line += addToLine( value2, df );
+                line += addToLine( value4, df );
+                line += addToLine( value3, df );
+                break;
+            case 90:
+                line += addToLine( value1, df );
+                line += "-\t";
+                break;
+            case 91:
+                line += addToLine( value1, df );
+                line += addToLine( value2, df );
+                break;
+        }
+        line += ForesterUtil.LINE_SEPARATOR;
+        return line;
+    }
+
+    // Helper for addNameAndValues.
+    private final static String addToLine( final double value, final java.text.DecimalFormat df ) {
+        String s = "";
+        if ( value != Tuplet.DEFAULT ) {
+            s = df.format( value ) + "\t";
+        }
+        else {
+            s = "-\t";
+        }
+        return s;
+    }
+
+    private static String[] getAllExternalSequenceNames( final Phylogeny phy ) {
+        if ( phy.isEmpty() ) {
+            return null;
+        }
+        int i = 0;
+        final String[] names = new String[ phy.getNumberOfExternalNodes() ];
+        for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) {
+            names[ i++ ] = iter.next().getNodeData().getSequence().getName();
+        }
+        return names;
+    }
+
+    /**
+     * Returns the order in which ortholog (o), "super ortholog" (s) and
+     * distance (d) are returned and sorted (priority of sort always goes from
+     * left to right), given sort. For the meaning of sort
+     * 
+     * @see #inferredOrthologsToString(String,int,double,double)
+     *      
+     * @param sort
+     *            determines order and sort priority
+     * @return String indicating the order
+     */
+    public final static String getOrder( final int sort ) {
+        String order = "";
+        switch ( sort ) {
+            case 0:
+                order = "orthologies";
+                break;
+            case 1:
+                order = "orthologies > super orthologies";
+                break;
+            case 2:
+                order = "super orthologies > orthologies";
+                break;
+            case 3:
+                order = "orthologies > distance to query";
+                break;
+            case 4:
+                order = "distance to query > orthologies";
+                break;
+            case 5:
+                order = "orthologies > super orthologies > distance to query";
+                break;
+            case 6:
+                order = "orthologies > distance to query > super orthologies";
+                break;
+            case 7:
+                order = "super orthologies > orthologies > distance to query";
+                break;
+            case 8:
+                order = "super orthologies > distance to query > orthologies";
+                break;
+            case 9:
+                order = "distance to query > orthologies > super orthologies";
+                break;
+            case 10:
+                order = "distance to query > super orthologies > orthologies";
+                break;
+            case 11:
+                order = "orthologies > subtree neighbors > distance to query";
+                break;
+            case 12:
+                order = "orthologies > subtree neighbors > super orthologies > distance to query";
+                break;
+            case 13:
+                order = "orthologies > super orthologies > subtree neighbors > distance to query";
+                break;
+            case 14:
+                order = "subtree neighbors > orthologies > super orthologies > distance to query";
+                break;
+            case 15:
+                order = "subtree neighbors > distance to query > orthologies > super orthologies";
+                break;
+            case 16:
+                order = "orthologies > distance to query > subtree neighbors > super orthologies";
+                break;
+            case 17:
+                order = "orthologies > subtree neighbors > distance to query > super orthologies";
+                break;
+            default:
+                order = "orthologies";
+                break;
+        }
+        return order;
+    }
+
+    public final static StringBuffer getOrderHelp() {
+        final StringBuffer sb = new StringBuffer();
+        sb.append( "  0: orthologies" + ForesterUtil.LINE_SEPARATOR );
+        sb.append( "  1: orthologies > super orthologies" + ForesterUtil.LINE_SEPARATOR );
+        sb.append( "  2: super orthologies > orthologies" + ForesterUtil.LINE_SEPARATOR );
+        sb.append( "  3: orthologies > distance to query" + ForesterUtil.LINE_SEPARATOR );
+        sb.append( "  4: distance to query > orthologies" + ForesterUtil.LINE_SEPARATOR );
+        sb.append( "  5: orthologies > super orthologies > distance to query" + ForesterUtil.LINE_SEPARATOR );
+        sb.append( "  6: orthologies > distance to query > super orthologies" + ForesterUtil.LINE_SEPARATOR );
+        sb.append( "  7: super orthologies > orthologies > distance to query" + ForesterUtil.LINE_SEPARATOR );
+        sb.append( "  8: super orthologies > distance to query > orthologies" + ForesterUtil.LINE_SEPARATOR );
+        sb.append( "  9: distance to query > orthologies > super orthologies" + ForesterUtil.LINE_SEPARATOR );
+        sb.append( " 10: distance to query > super orthologies > orthologies" + ForesterUtil.LINE_SEPARATOR );
+        sb.append( " 11: orthologies > subtree neighbors > distance to query" + ForesterUtil.LINE_SEPARATOR );
+        sb.append( " 12: orthologies > subtree neighbors > super orthologies > distance to query"
+                + ForesterUtil.LINE_SEPARATOR );
+        sb.append( " 13: orthologies > super orthologies > subtree neighbors > distance to query"
+                + ForesterUtil.LINE_SEPARATOR );
+        sb.append( " 14: subtree neighbors > orthologies > super orthologies > distance to query"
+                + ForesterUtil.LINE_SEPARATOR );
+        sb.append( " 15: subtree neighbors > distance to query > orthologies > super orthologies"
+                + ForesterUtil.LINE_SEPARATOR );
+        sb.append( " 16: orthologies > distance to query > subtree neighbors > super orthologies"
+                + ForesterUtil.LINE_SEPARATOR );
+        sb.append( " 17: orthologies > subtree neighbors > distance to query > super orthologies"
+                + ForesterUtil.LINE_SEPARATOR );
+        return sb;
+    }
+
+    private final static List<PhylogenyNode> getSubtreeNeighbors( final PhylogenyNode query, final int level ) {
+        PhylogenyNode node = query;
+        if ( !node.isExternal() ) {
+            return null;
+        }
+        if ( !node.isRoot() ) {
+            node = node.getParent();
+        }
+        if ( level == 2 ) {
+            if ( !node.isRoot() ) {
+                node = node.getParent();
+            }
+        }
+        else {
+            throw new IllegalArgumentException( "currently only supporting level 2 subtree neighbors " );
+        }
+        final List<PhylogenyNode> sn = node.getAllExternalDescendants();
+        sn.remove( query );
+        return sn;
+    }
+}
diff --git a/forester/java/src/org/forester/sdi/RIOn.java b/forester/java/src/org/forester/sdi/RIOn.java

new file mode 100644 (file)

index 0000000..7855473
--- /dev/null
+++ b/forester/java/src/org/forester/sdi/RIOn.java
@@ -0,0 +1,132 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.sdi;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.forester.phylogeny.Phylogeny;
+import org.forester.phylogeny.PhylogenyMethods;
+import org.forester.phylogeny.PhylogenyNode;
+import org.forester.phylogeny.data.Event;
+import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
+import org.forester.util.GeneralTable;
+
+public class RIOn {
+
+    private final static boolean  ROOT_BY_MINIMIZING_MAPPING_COST = false;
+    private final static boolean  ROOT_BY_MINIMIZING_SUM_OF_DUPS  = true;
+    private final static boolean  ROOT_BY_MINIMIZING_TREE_HEIGHT  = true;
+    GeneralTable<String, Integer> _orthologs                      = null;
+    GeneralTable<String, Integer> _paralogs                       = null;
+    GeneralTable<String, Integer> _super_orthologs                = null;
+    GeneralTable<String, Integer> _ultra_paralogs                 = null;
+
+    private void doInferOrthologs( final Phylogeny gene_tree, final Phylogeny species_tree ) {
+        final SDIR sdiunrooted = new SDIR();
+        final Phylogeny assigned_tree = sdiunrooted.infer( gene_tree,
+                                                           species_tree,
+                                                           ROOT_BY_MINIMIZING_MAPPING_COST,
+                                                           ROOT_BY_MINIMIZING_SUM_OF_DUPS,
+                                                           ROOT_BY_MINIMIZING_TREE_HEIGHT,
+                                                           true,
+                                                           1 )[ 0 ];
+        final List<PhylogenyNode> external_nodes = new ArrayList<PhylogenyNode>();
+        for( final PhylogenyNodeIterator iterator = assigned_tree.iteratorExternalForward(); iterator.hasNext(); ) {
+            external_nodes.add( iterator.next() );
+        }
+        final PhylogenyMethods methods = PhylogenyMethods.getInstance();
+        for( int i = 0; i < external_nodes.size(); ++i ) {
+            for( int j = 0; j < external_nodes.size(); ++j ) {
+                if ( i != j ) {
+                    final PhylogenyNode node_i = external_nodes.get( i );
+                    final PhylogenyNode node_j = external_nodes.get( j );
+                    final PhylogenyNode lca = methods.obtainLCA( node_i, node_j );
+                    final Event event = lca.getNodeData().getEvent();
+                    final String node_i_name = node_i.getNodeData().getSequence().getName();
+                    final String node_j_name = node_j.getNodeData().getSequence().getName();
+                    if ( event.isDuplication() ) {
+                        increaseCounter( getOrthologs(), node_i_name, node_j_name );
+                    }
+                    else {
+                        increaseCounter( getParalogs(), node_i_name, node_j_name );
+                    }
+                }
+            }
+        }
+    }
+
+    public GeneralTable<String, Integer> getOrthologs() {
+        return _orthologs;
+    }
+
+    public GeneralTable<String, Integer> getParalogs() {
+        return _paralogs;
+    }
+
+    public GeneralTable<String, Integer> getSuperOrthologs() {
+        return _super_orthologs;
+    }
+
+    public GeneralTable<String, Integer> getUltraParalogs() {
+        return _ultra_paralogs;
+    }
+
+    private void increaseCounter( final GeneralTable<String, Integer> table,
+                                  final String node_i_name,
+                                  final String node_j_name ) {
+        final Integer value = table.getValue( node_i_name, node_j_name );
+        if ( value == null ) {
+            table.setValue( node_i_name, node_j_name, 1 );
+        }
+        else {
+            table.setValue( node_i_name, node_j_name, value.intValue() + 1 );
+        }
+    }
+
+    private void init() {
+        _orthologs = new GeneralTable<String, Integer>();
+        _paralogs = new GeneralTable<String, Integer>();
+        _super_orthologs = new GeneralTable<String, Integer>();
+        _ultra_paralogs = new GeneralTable<String, Integer>();
+    }
+
+    private void setOrthologs( final GeneralTable<String, Integer> orthologs ) {
+        _orthologs = orthologs;
+    }
+
+    private void setParalogs( final GeneralTable<String, Integer> paralogs ) {
+        _paralogs = paralogs;
+    }
+
+    private void setSuperOrthologs( final GeneralTable<String, Integer> super_orthologs ) {
+        _super_orthologs = super_orthologs;
+    }
+
+    private void setUltraParalogs( final GeneralTable<String, Integer> ultra_paralogs ) {
+        _ultra_paralogs = ultra_paralogs;
+    }
+}
diff --git a/forester/java/src/org/forester/sdi/SDI.java b/forester/java/src/org/forester/sdi/SDI.java

new file mode 100644 (file)

index 0000000..b21b9b5
--- /dev/null
+++ b/forester/java/src/org/forester/sdi/SDI.java
@@ -0,0 +1,318 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// Copyright (C) 2000-2001 Washington University School of Medicine
+// and Howard Hughes Medical Institute
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org
+
+package org.forester.sdi;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import org.forester.phylogeny.Phylogeny;
+import org.forester.phylogeny.PhylogenyNode;
+import org.forester.phylogeny.data.Taxonomy;
+import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
+import org.forester.util.ForesterUtil;
+
+public abstract class SDI {
+
+    final Phylogeny _gene_tree;
+    final Phylogeny _species_tree;
+    int             _duplications_sum; // Sum of duplications.
+    int             _mapping_cost;    // Mapping cost "L".
+
+    /**
+     * Constructor which sets the gene tree and the species tree to be compared.
+     * species_tree is the species tree to which the gene tree gene_tree will be
+     * compared to.
+     * Infers for each PhylogenyNode of gene_tree whether
+     * it represents a speciation or duplication event by calculating and
+     * interpreting the mapping function M. The most parsimonious sequence of
+     * speciation and duplication events is assumed.
+     * The mapping cost L can be
+     * calculated with method "computeMappingCost()".
+     * <p>
+     * Conditions:
+     * </p>
+     * <ul>
+     * <li>Both Trees must be rooted
+     * <li>Both Trees must have species names in the species name fields of all
+     * their external nodes
+     * </ul>
+     * 
+     * @param gene_tree
+     *            reference to a rooted binary gene Phylogeny to which assign
+     *            duplication vs speciation, must have species names in the
+     *            species name fields for all external nodes
+     * @param species_tree
+     *            reference to a rooted binary species Phylogeny which might get
+     *            stripped in the process, must have species names in the
+     *            species name fields for all external nodes
+     */
+    public SDI( final Phylogeny gene_tree, final Phylogeny species_tree ) {
+        if ( species_tree.isEmpty() || gene_tree.isEmpty() ) {
+            throw new IllegalArgumentException( "attempt to infer duplications using empty tree(s)" );
+        }
+        if ( !gene_tree.isRooted() ) {
+            throw new IllegalArgumentException( "attempt to infer duplications on unrooted gene tree" );
+        }
+        if ( !species_tree.isRooted() ) {
+            throw new IllegalArgumentException( "attempt to infer duplications on unrooted species tree" );
+        }
+        _gene_tree = gene_tree;
+        _species_tree = species_tree;
+        _duplications_sum = 0;
+        _mapping_cost = -1;
+    }
+
+    // Helper method for "computeMappingCost()".
+    private void computeMappingCostHelper( final PhylogenyNode g ) {
+        if ( !g.isExternal() ) {
+            computeMappingCostHelper( g.getChildNode1() );
+            computeMappingCostHelper( g.getChildNode2() );
+            if ( ( g.getLink() != g.getChildNode1().getLink() ) && ( g.getLink() != g.getChildNode2().getLink() ) ) {
+                _mapping_cost += ( g.getChildNode1().getLink().getId() + g.getChildNode2().getLink().getId()
+                        - ( 2 * g.getLink().getId() ) - 2 );
+            }
+            else if ( ( g.getLink() != g.getChildNode1().getLink() ) && ( g.getLink() == g.getChildNode2().getLink() ) ) {
+                _mapping_cost += ( g.getChildNode1().getLink().getId() - g.getLink().getId() + 1 );
+            }
+            else if ( ( g.getLink() == g.getChildNode1().getLink() ) && ( g.getLink() != g.getChildNode2().getLink() ) ) {
+                _mapping_cost += ( g.getChildNode2().getLink().getId() - g.getLink().getId() + 1 );
+            }
+            else {
+                _mapping_cost++;
+            }
+        }
+    }
+
+    /**
+     * Computes the cost of mapping the gene tree gene_tree onto the species
+     * tree species_tree. Before this method can be called, the mapping has to
+     * be calculated with method "infer(boolean)".
+     * <p>
+     * Reference. Zhang, L. (1997) On a Mirkin-Muchnik-Smith Conjecture for
+     * Comparing Molecular Phylogenies. Journal of Computational Biology 4
+     * 177-187.
+     * 
+     * @return the mapping cost "L"
+     */
+    public int computeMappingCostL() {
+        _species_tree.levelOrderReID();
+        _mapping_cost = 0;
+        computeMappingCostHelper( _gene_tree.getRoot() );
+        return _mapping_cost;
+    }
+
+    private TaxonomyComparisonBase determineTaxonomyComparisonBase() {
+        TaxonomyComparisonBase base = null;
+        boolean all_have_id = true;
+        boolean all_have_code = true;
+        boolean all_have_sn = true;
+        boolean all_have_cn = true;
+        for( final PhylogenyNodeIterator iter = _species_tree.iteratorExternalForward(); iter.hasNext(); ) {
+            final PhylogenyNode n = iter.next();
+            if ( n.getNodeData().isHasTaxonomy() ) {
+                final Taxonomy tax = n.getNodeData().getTaxonomy();
+                if ( ( tax.getIdentifier() == null ) || ForesterUtil.isEmpty( tax.getIdentifier().getValue() ) ) {
+                    all_have_id = false;
+                }
+                if ( ForesterUtil.isEmpty( tax.getTaxonomyCode() ) ) {
+                    all_have_code = false;
+                }
+                if ( ForesterUtil.isEmpty( tax.getScientificName() ) ) {
+                    all_have_sn = false;
+                }
+                if ( ForesterUtil.isEmpty( tax.getCommonName() ) ) {
+                    all_have_cn = false;
+                }
+            }
+            else {
+                throw new IllegalArgumentException( "species tree node [" + n + "] has no taxonomic data" );
+            }
+        }
+        for( final PhylogenyNodeIterator iter = _gene_tree.iteratorExternalForward(); iter.hasNext(); ) {
+            final PhylogenyNode n = iter.next();
+            if ( n.getNodeData().isHasTaxonomy() ) {
+                final Taxonomy tax = n.getNodeData().getTaxonomy();
+                if ( ( tax.getIdentifier() == null ) || ForesterUtil.isEmpty( tax.getIdentifier().getValue() ) ) {
+                    all_have_id = false;
+                }
+                if ( ForesterUtil.isEmpty( tax.getTaxonomyCode() ) ) {
+                    all_have_code = false;
+                }
+                if ( ForesterUtil.isEmpty( tax.getScientificName() ) ) {
+                    all_have_sn = false;
+                }
+                if ( ForesterUtil.isEmpty( tax.getCommonName() ) ) {
+                    all_have_cn = false;
+                }
+            }
+            else {
+                throw new IllegalArgumentException( "gene tree node [" + n + "] has no taxonomic data" );
+            }
+        }
+        if ( all_have_id ) {
+            base = TaxonomyComparisonBase.ID;
+        }
+        else if ( all_have_code ) {
+            base = TaxonomyComparisonBase.CODE;
+        }
+        else if ( all_have_sn ) {
+            base = TaxonomyComparisonBase.SCIENTIFIC_NAME;
+        }
+        else if ( all_have_cn ) {
+            base = TaxonomyComparisonBase.COMMON_NAME;
+        }
+        else {
+            throw new IllegalArgumentException( "gene tree and species tree have incomparable taxonomies" );
+        }
+        return base;
+    }
+
+    /**
+     * Returns the number of duplications.
+     * 
+     * @return number of duplications
+     */
+    public int getDuplicationsSum() {
+        return _duplications_sum;
+    }
+
+    /**
+     * Returns the gene tree.
+     * 
+     * @return gene tree
+     */
+    public Phylogeny getGeneTree() {
+        return _gene_tree;
+    }
+
+    /**
+     * Returns the species tree.
+     * 
+     * @return species tree
+     */
+    public Phylogeny getSpeciesTree() {
+        return _species_tree;
+    }
+
+    /**
+     * Calculates the mapping function for the external nodes of the gene tree:
+     * links (sets the field "link" of PhylogenyNode) each external
+     * PhylogenyNode of gene_tree to the external PhylogenyNode of species_tree
+     * which has the same species name.
+     */
+    void linkNodesOfG() {
+        final Map<String, PhylogenyNode> speciestree_ext_nodes = new HashMap<String, PhylogenyNode>();
+        final TaxonomyComparisonBase tax_comp_base = determineTaxonomyComparisonBase();
+        // Put references to all external nodes of the species tree into a map.
+        // Stringyfied taxonomy is the key, node is the value.
+        for( final PhylogenyNodeIterator iter = _species_tree.iteratorExternalForward(); iter.hasNext(); ) {
+            final PhylogenyNode s = iter.next();
+            final String tax_str = taxonomyToString( s, tax_comp_base );
+            if ( speciestree_ext_nodes.containsKey( tax_str ) ) {
+                throw new IllegalArgumentException( "taxonomy [" + s.getNodeData().getTaxonomy()
+                        + "] is not unique in species phylogeny" );
+            }
+            speciestree_ext_nodes.put( tax_str, s );
+        }
+        // Retrieve the reference to the node with a matching stringyfied taxonomy.
+        for( final PhylogenyNodeIterator iter = _gene_tree.iteratorExternalForward(); iter.hasNext(); ) {
+            final PhylogenyNode g = iter.next();
+            final String tax_str = taxonomyToString( g, tax_comp_base );
+            final PhylogenyNode s = speciestree_ext_nodes.get( tax_str );
+            if ( s == null ) {
+                throw new IllegalArgumentException( "taxonomy [" + g.getNodeData().getTaxonomy()
+                        + "] not present in species tree" );
+            }
+            g.setLink( s );
+        }
+    }
+
+    /**
+     * Calculates the mapping function for the external nodes of the gene tree:
+     * links (sets the field "link" of PhylogenyNode) each external by taxonomy
+     * identifier
+     * PhylogenyNode of gene_tree to the external PhylogenyNode of species_tree
+     * which has the same species name.
+     * Olivier CHABROL : olivier.chabrol@univ-provence.fr
+     */
+    void linkNodesOfGByTaxonomyIdentifier() {
+        final HashMap<String, PhylogenyNode> speciestree_ext_nodes = new HashMap<String, PhylogenyNode>();
+        if ( _species_tree.getFirstExternalNode().isRoot() ) {
+            speciestree_ext_nodes.put( _species_tree.getFirstExternalNode().getNodeData().getTaxonomy().getIdentifier()
+                    .getValue(), _species_tree.getFirstExternalNode() );
+        }
+        else {
+            for( final PhylogenyNodeIterator iter = _species_tree.iteratorExternalForward(); iter.hasNext(); ) {
+                final PhylogenyNode s = iter.next();
+                speciestree_ext_nodes.put( s.getNodeData().getTaxonomy().getIdentifier().getValue(), s );
+            }
+        }
+        for( final PhylogenyNodeIterator iter = _gene_tree.iteratorExternalForward(); iter.hasNext(); ) {
+            final PhylogenyNode g = iter.next();
+            final PhylogenyNode s = speciestree_ext_nodes
+                    .get( g.getNodeData().getTaxonomy().getIdentifier().getValue() );
+            if ( s == null ) {
+                String message = "species [" + g.getNodeData().getTaxonomy().getIdentifier().getValue();
+                message += "] not present in species tree";
+                throw new IllegalArgumentException( message );
+            }
+            g.setLink( s );
+        }
+    }
+
+    @Override
+    public String toString() {
+        final StringBuffer sb = new StringBuffer();
+        sb.append( getClass() );
+        sb.append( ForesterUtil.LINE_SEPARATOR );
+        sb.append( "Duplications sum                   : " + getDuplicationsSum() );
+        sb.append( ForesterUtil.LINE_SEPARATOR );
+        sb.append( "mapping cost L                     : " + computeMappingCostL() );
+        return sb.toString();
+    }
+
+    private static String taxonomyToString( final PhylogenyNode n, final TaxonomyComparisonBase base ) {
+        final Taxonomy tax = n.getNodeData().getTaxonomy();
+        switch ( base ) {
+            case ID:
+                return tax.getIdentifier().getValue();
+            case CODE:
+                return tax.getTaxonomyCode();
+            case SCIENTIFIC_NAME:
+                return tax.getScientificName();
+            case COMMON_NAME:
+                return tax.getCommonName();
+            default:
+                throw new IllegalArgumentException( "unknown comparison base for taxonomies: " + base );
+        }
+    }
+
+    enum TaxonomyComparisonBase {
+        ID, CODE, SCIENTIFIC_NAME, COMMON_NAME;
+    }
+}
diff --git a/forester/java/src/org/forester/sdi/SDIR.java b/forester/java/src/org/forester/sdi/SDIR.java

new file mode 100644 (file)

index 0000000..f75f6f8
--- /dev/null
+++ b/forester/java/src/org/forester/sdi/SDIR.java
@@ -0,0 +1,579 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// Copyright (C) 2000-2001 Washington University School of Medicine
+// and Howard Hughes Medical Institute
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.sdi;
+
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+import org.forester.phylogeny.Phylogeny;
+import org.forester.phylogeny.PhylogenyBranch;
+import org.forester.phylogeny.PhylogenyNode;
+import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
+
+/*
+ * Allows to infer duplications - speciations on a unrooted gene tree. It
+ * reroots the gene trees on each of its branches and performs SDIse on each of
+ * the resulting trees. Trees which minimize a certain criterion are returned as
+ * the "correctly" rooted ones. The criterions are: <ul> <li>Sum of duplications
+ * <li>Mapping cost L <li>Phylogeny height - which is the largest distance from
+ * root to external node (minimizing of which is the same as "midpoint rooting")
+ * </ul>
+ * 
+ * @see SDIse
+ * 
+ * @see SDI
+ * 
+ * @author Christian M. Zmasek
+ */
+public class SDIR {
+
+    private final static double ZERO_DIFF = 1.0E-6; // Due to inaccurate
+    // calculations on
+    // Java's side, not
+    // everything that should
+    // be 0.0 is 0.0.
+    private int                 _count;
+    private int                 _min_dup;
+    private int                 _min_cost;
+    private double              _min_height;
+    private double              _min_diff;
+    private long                _time_sdi;
+
+    /**
+     * Default contructor which creates an "empty" object..
+     */
+    public SDIR() {
+        init();
+    }
+
+    /**
+     * Returns the number of differently rooted trees which minimize the
+     * (rooting) "criterion" - as determined by method "infer".
+     * 
+     * @see #infer(Phylogeny,Phylogeny,boolean,boolean,boolean,boolean,int,boolean)
+     * @return number of differently rooted trees which minimized the criterion
+     */
+    public int getCount() {
+        return _count;
+    }
+
+    /**
+     * Returns the (absolue value of the) minimal difference in tree heights of
+     * the two subtrees at the root (of the (re)rooted gene tree) - as
+     * determined by method "infer" - if minimize_height is set to true.
+     * <p>
+     * If a tree is midpoint rooted this number is zero.
+     * <p>
+     * <B>IMPORTANT </B>: If minimize_mapping_cost or minimize_sum_of_dup are
+     * also set to true, then this returns the minimal difference in tree
+     * heights of the trees which minimize the first criterion, and is therefore
+     * not necessarily zero.
+     * <p>
+     * (Last modified: 01/22/00)
+     * 
+     * @see #infer(Phylogeny,Phylogeny,boolean,boolean,boolean,boolean,int,boolean)
+     * @return the minimal difference in tree heights -- IF calculated by
+     *         "infer"
+     */
+    public double getMinimalDiffInSubTreeHeights() {
+        return _min_diff;
+    }
+
+    /**
+     * Returns the minimal number of duplications - as determined by method
+     * "infer".
+     * <p>
+     * <B>IMPORTANT </B>: If the tree is not rooted by minimizing the sum of
+     * duplications or the mapping cost L, then this number is NOT NECESSARILY
+     * the MINIMAL number of duplications.
+     * 
+     * @see #infer(Phylogeny,Phylogeny,boolean,boolean,boolean,boolean,int,boolean)
+     * @return (minimal) number of duplications
+     */
+    public int getMinimalDuplications() {
+        return _min_dup;
+    }
+
+    /**
+     * Returns the minimal mapping cost L - as determined by method "infer" - if
+     * minimize_mapping_cost is set to true.
+     * <p>
+     * (Last modified: 11/07/00)
+     * 
+     * @see #infer(Phylogeny,Phylogeny,boolean,boolean,boolean,boolean,int,boolean)
+     * @return the minimal mapping cost "L" -- IF calculated by "infer"
+     */
+    public int getMinimalMappingCost() {
+        return _min_cost;
+    }
+
+    /**
+     * Returns the minimal tree height - as determined by method "infer" - if
+     * minimize_height is set to true. <B>IMPORTANT </B>: If
+     * minimize_mapping_cost or minimize_sum_of_dup are also set to true, then
+     * this returns the minimal tree height of the trees which minimize the
+     * first criterion.
+     * <p>
+     * (Last modified: 01/12/00)
+     * 
+     * @see #infer(Phylogeny,Phylogeny,boolean,boolean,boolean,boolean,int,boolean)
+     * @return the minimal tree height -- IF calculated by "infer"
+     */
+    public double getMinimalTreeHeight() {
+        return _min_height;
+    }
+
+    /**
+     * Returns the sum of times (in ms) needed to run method infer of class SDI.
+     * Final variable TIME needs to be set to true.
+     * 
+     * @return sum of times (in ms) needed to run method infer of class SDI
+     */
+    public long getTimeSumSDI() {
+        return _time_sdi;
+    }
+
+    /**
+     * Infers gene duplications on a possibly unrooted gene Phylogeny gene_tree.
+     * The tree is rooted be minimizing either the sum of duplications, the
+     * mapping cost L, or the tree height (or combinations thereof). If
+     * return_trees is set to true, it returns an array of possibly more than
+     * one differently rooted Trees. <br>
+     * The maximal number of returned trees is set with max_trees_to_return.
+     * <br>
+     * Phylogeny species_tree is a species Phylogeny to which the gene Phylogeny
+     * gene_tree is compared to. <br>
+     * If both minimize_sum_of_dup and minimize_mapping_cost are true, the tree
+     * is rooted by minimizing the mapping cost L.<br>
+     * If minimize_sum_of_dup, minimize_mapping_cost, and minimize_height are
+     * false tree gene_tree is assumed to be alreadty rooted and no attempts at
+     * rooting are made, and only one tree is returned. <br>
+     * <p>
+     * Conditions:
+     * </p>
+     * <ul>
+     * <li>Both Trees must be completely binary (except deepest node of gene
+     * tree)
+     * <li>The species Phylogeny must be rooted
+     * <li>Both Trees must have species names in the species name fields of
+     * their nodes
+     * <li>Both Trees must not have any collapses nodes
+     * </ul>
+     * <p>
+     * (Last modified: 10/01/01)
+     * 
+     * @param gene_tree
+     *            a binary (except deepest node) gene Phylogeny
+     * @param species_tree
+     *            a rooted binary species Phylogeny
+     * @param minimize_mapping_cost
+     *            set to true to root by minimizing the mapping cost L (and also
+     *            the sum of duplications)
+     * @param minimize_sum_of_dup
+     *            set to true to root by minimizing the sum of duplications
+     * @param minimize_height
+     *            set to true to root by minimizing the tree height - if
+     *            minimize_mapping_cost is set to true or minimize_sum_of_dup is
+     *            set to true, then out of the resulting trees with minimal
+     *            mapping cost or minimal number of duplications the tree with
+     *            the minimal height is chosen
+     * @param return_trees
+     *            set to true to return Array of Trees, otherwise null is
+     *            returned
+     * @param max_trees_to_return
+     *            maximal number of Trees to return (=maximal size of returned
+     *            Array) must be no lower than 1
+     * @return array of rooted Trees with duplication vs. speciation assigned if
+     *         return_trees is set to true, null otherwise
+     */
+    public Phylogeny[] infer( final Phylogeny gene_tree,
+                              final Phylogeny species_tree,
+                              final boolean minimize_mapping_cost,
+                              boolean minimize_sum_of_dup,
+                              final boolean minimize_height,
+                              final boolean return_trees,
+                              int max_trees_to_return ) {
+        init();
+        SDIse sdise = null;
+        final ArrayList<Phylogeny> trees = new ArrayList<Phylogeny>();
+        Phylogeny[] tree_array = null;
+        List<PhylogenyBranch> branches = null;
+        Phylogeny g = null;
+        PhylogenyNode prev_root = null;
+        PhylogenyNode prev_root_c1 = null;
+        PhylogenyNode prev_root_c2 = null;
+        int duplications = 0;
+        int cost = 0;
+        int counter = 0;
+        int min_duplications = Integer.MAX_VALUE;
+        int min_cost = Integer.MAX_VALUE;
+        int j = 0;
+        double height = 0.0;
+        double diff = 0.0;
+        double min_height = Double.MAX_VALUE;
+        double min_diff = 0.0;
+        double[] height__diff = new double[ 2 ];
+        boolean smaller = false;
+        boolean equal = false;
+        boolean prev_root_was_dup = false;
+        if ( max_trees_to_return < 1 ) {
+            max_trees_to_return = 1;
+        }
+        if ( minimize_mapping_cost && minimize_sum_of_dup ) {
+            minimize_sum_of_dup = false;
+        }
+        if ( !minimize_mapping_cost && !minimize_sum_of_dup && !minimize_height ) {
+            throw new IllegalArgumentException( "parameter to minimize not given for rooting of phylogeny" );
+        }
+        g = gene_tree.copy();
+        if ( g.getNumberOfExternalNodes() <= 1 ) {
+            g.setRooted( true );
+            setMinimalDuplications( 0 );
+            setMinimalTreeHeight( 0.0 );
+            tree_array = new Phylogeny[ 1 ];
+            tree_array[ 0 ] = g;
+            return tree_array;
+        }
+        for( final PhylogenyNodeIterator iter = g.iteratorPostorder(); iter.hasNext(); ) {
+            final PhylogenyNode n = iter.next();
+            if ( n.isRoot() ) {
+                if ( ( n.getNumberOfDescendants() != 2 ) && ( n.getNumberOfDescendants() != 3 ) ) {
+                    throw new IllegalArgumentException( "attempt to run SDI on gene tree with "
+                            + n.getNumberOfDescendants() + " child nodes at its root" );
+                }
+            }
+            else if ( !n.isExternal() && ( n.getNumberOfDescendants() != 2 ) ) {
+                throw new IllegalArgumentException( "attempt to run SDI on gene tree which is not completely binary [found node with "
+                        + n.getNumberOfDescendants() + " child nodes]" );
+            }
+        }
+        for( final PhylogenyNodeIterator iter = species_tree.iteratorPostorder(); iter.hasNext(); ) {
+            final PhylogenyNode n = iter.next();
+            if ( !n.isExternal() && ( n.getNumberOfDescendants() != 2 ) ) {
+                throw new IllegalArgumentException( "attempt to run SDI with a species tree which is not completely binary (after stripping) [found node with "
+                        + n.getNumberOfDescendants() + " child nodes]" );
+            }
+        }
+        g.reRoot( g.getFirstExternalNode() );
+        branches = SDIR.getBranchesInPreorder( g );
+        if ( minimize_mapping_cost || minimize_sum_of_dup ) {
+            sdise = new SDIse( g, species_tree );
+            duplications = sdise.getDuplicationsSum();
+        }
+        final Set<PhylogenyBranch> used_root_placements = new HashSet<PhylogenyBranch>();
+        F: for( j = 0; j < branches.size(); ++j ) {
+            prev_root = g.getRoot();
+            prev_root_c1 = prev_root.getChildNode1();
+            prev_root_c2 = prev_root.getChildNode2();
+            prev_root_was_dup = prev_root.isDuplication();
+            final PhylogenyBranch current_branch = branches.get( j );
+            g.reRoot( current_branch );
+            if ( minimize_mapping_cost || minimize_sum_of_dup ) {
+                duplications = sdise.updateM( prev_root_was_dup, prev_root_c1, prev_root_c2 );
+            }
+            if ( !used_root_placements.contains( current_branch ) ) {
+                if ( minimize_mapping_cost ) {
+                    cost = sdise.computeMappingCostL();
+                    if ( minimize_height && ( cost <= min_cost ) ) {
+                        height__diff = SDIR.moveRootOnBranchToMinHeight( g );
+                        height = height__diff[ 0 ];
+                        diff = height__diff[ 1 ];
+                    }
+                    if ( cost == min_cost ) {
+                        if ( minimize_height ) {
+                            smaller = equal = false;
+                            if ( height < min_height ) {
+                                min_height = height;
+                                counter = 1;
+                                smaller = true;
+                            }
+                            else if ( height == min_height ) {
+                                counter++;
+                                equal = true;
+                            }
+                            if ( Math.abs( diff ) < min_diff ) {
+                                min_diff = Math.abs( diff );
+                            }
+                        }
+                        if ( return_trees ) {
+                            if ( minimize_height ) {
+                                if ( smaller ) {
+                                    trees.clear();
+                                    trees.add( g.copy() );
+                                }
+                                else if ( equal && ( trees.size() < max_trees_to_return ) ) {
+                                    trees.add( g.copy() );
+                                }
+                            }
+                            else {
+                                counter++;
+                                if ( trees.size() < max_trees_to_return ) {
+                                    trees.add( g.copy() );
+                                }
+                            }
+                        }
+                        else if ( !minimize_height ) {
+                            counter++;
+                        }
+                    }
+                    else if ( cost < min_cost ) {
+                        if ( minimize_height ) {
+                            min_height = height;
+                            min_diff = Math.abs( diff );
+                        }
+                        if ( return_trees ) {
+                            trees.clear();
+                            trees.add( g.copy() );
+                        }
+                        counter = 1;
+                        min_cost = cost;
+                    }
+                    if ( duplications < min_duplications ) {
+                        min_duplications = duplications;
+                    }
+                }
+                else if ( minimize_sum_of_dup ) {
+                    if ( minimize_height && ( duplications <= min_duplications ) ) {
+                        height__diff = SDIR.moveRootOnBranchToMinHeight( g );
+                        height = height__diff[ 0 ];
+                        diff = height__diff[ 1 ];
+                    }
+                    if ( duplications == min_duplications ) {
+                        if ( minimize_height ) {
+                            smaller = equal = false;
+                            if ( height < min_height ) {
+                                min_height = height;
+                                counter = 1;
+                                smaller = true;
+                            }
+                            else if ( height == min_height ) {
+                                counter++;
+                                equal = true;
+                            }
+                            if ( Math.abs( diff ) < min_diff ) {
+                                min_diff = Math.abs( diff );
+                            }
+                        }
+                        if ( return_trees ) {
+                            if ( minimize_height ) {
+                                if ( smaller ) {
+                                    trees.clear();
+                                    trees.add( g.copy() );
+                                }
+                                else if ( equal && ( trees.size() < max_trees_to_return ) ) {
+                                    trees.add( g.copy() );
+                                }
+                            }
+                            else {
+                                counter++;
+                                if ( trees.size() < max_trees_to_return ) {
+                                    trees.add( g.copy() );
+                                }
+                            }
+                        }
+                        else if ( !minimize_height ) {
+                            counter++;
+                        }
+                    }
+                    else if ( duplications < min_duplications ) {
+                        if ( minimize_height ) {
+                            min_height = height;
+                            min_diff = Math.abs( diff );
+                        }
+                        if ( return_trees ) {
+                            trees.clear();
+                            trees.add( g.copy() );
+                        }
+                        counter = 1;
+                        min_duplications = duplications;
+                    }
+                }
+                else if ( minimize_height ) {
+                    height__diff = SDIR.moveRootOnBranchToMinHeight( g );
+                    height = height__diff[ 0 ];
+                    diff = height__diff[ 1 ];
+                    if ( Math.abs( diff ) < SDIR.ZERO_DIFF ) {
+                        sdise = new SDIse( g, species_tree );
+                        min_duplications = sdise.getDuplicationsSum();
+                        min_height = height;
+                        min_diff = Math.abs( diff );
+                        counter = 1;
+                        if ( return_trees ) {
+                            trees.add( g.copy() );
+                        }
+                        break F;
+                    }
+                }
+            } // if ( used_root_placements.containsKey( current_branch ) )
+            used_root_placements.add( current_branch );
+        } // End of huge for loop "F".
+        if ( return_trees ) {
+            trees.trimToSize();
+            tree_array = new Phylogeny[ trees.size() ];
+            for( int i = 0; i < trees.size(); ++i ) {
+                tree_array[ i ] = trees.get( i );
+                tree_array[ i ].recalculateNumberOfExternalDescendants( false );
+            }
+        }
+        setCount( counter );
+        setMinimalDuplications( min_duplications );
+        setMinimalMappingCost( min_cost );
+        setMinimalTreeHeight( min_height );
+        setMinimalDiffInSubTreeHeights( Math.abs( min_diff ) );
+        return tree_array;
+    }
+
+    private void init() {
+        _count = -1;
+        _min_dup = Integer.MAX_VALUE;
+        _min_cost = Integer.MAX_VALUE;
+        _min_height = Double.MAX_VALUE;
+        _min_diff = Double.MAX_VALUE;
+        _time_sdi = -1;
+    }
+
+    private void setCount( final int i ) {
+        _count = i;
+    }
+
+    private void setMinimalDiffInSubTreeHeights( final double d ) {
+        _min_diff = d;
+    }
+
+    private void setMinimalDuplications( final int i ) {
+        _min_dup = i;
+    }
+
+    private void setMinimalMappingCost( final int i ) {
+        _min_cost = i;
+    }
+
+    private void setMinimalTreeHeight( final double d ) {
+        _min_height = d;
+    }
+
+    // This was totally changed on 2006/10/03.
+    // Places references to all Branches of Phylogeny t into a List.
+    // The order is preorder.
+    // Trees are treated as if they were unrooted (i.e. child 1 and
+    // child 2 of the root are treated as if they were connected
+    // directly).
+    // The resulting List allows to visit all branches without ever
+    // traversing more than one node at a time.
+    public static List<PhylogenyBranch> getBranchesInPreorder( final Phylogeny t ) {
+        final ArrayList<PhylogenyBranch> branches = new ArrayList<PhylogenyBranch>();
+        if ( t.isEmpty() || ( t.getNumberOfExternalNodes() <= 1 ) ) {
+            return branches;
+        }
+        if ( t.getNumberOfExternalNodes() == 2 ) {
+            branches.add( new PhylogenyBranch( t.getRoot().getChildNode1(), t.getRoot().getChildNode2() ) );
+            return branches;
+        }
+        final Set<Integer> one = new HashSet<Integer>();
+        final Set<Integer> two = new HashSet<Integer>();
+        PhylogenyNode node = t.getRoot();
+        while ( !node.isRoot() || !two.contains( node.getId() ) ) {
+            if ( !node.isExternal() && !two.contains( node.getId() ) ) {
+                if ( !one.contains( node.getId() ) && !two.contains( node.getId() ) ) {
+                    one.add( node.getId() );
+                    node = node.getChildNode1();
+                }
+                else {
+                    two.add( node.getId() );
+                    node = node.getChildNode2();
+                }
+                if ( !node.getParent().isRoot() ) {
+                    branches.add( new PhylogenyBranch( node, node.getParent() ) );
+                }
+                else if ( !node.isExternal() ) {
+                    branches.add( new PhylogenyBranch( t.getRoot().getChildNode1(), t.getRoot().getChildNode2() ) );
+                }
+            }
+            else {
+                if ( !node.getParent().isRoot() && !node.isExternal() ) {
+                    branches.add( new PhylogenyBranch( node, node.getParent() ) );
+                }
+                node = node.getParent();
+            }
+        }
+        return branches;
+    }
+
+    // This places the root of t on its branch in such a way that it
+    // minimizes the tree height as good as possible.
+    // Returns the height and the difference in heights of the resulting
+    // modified Phylogeny t.
+    private static double[] moveRootOnBranchToMinHeight( final Phylogeny t ) {
+        final PhylogenyNode root = t.getRoot();
+        if ( root.getNumberOfDescendants() != 2 ) {
+            throw new IllegalArgumentException( "attempt to move root to minimize height on root where number of child nodes does not equal two" );
+        }
+        final PhylogenyNode child0 = root.getChildNode( 0 );
+        final PhylogenyNode child1 = root.getChildNode( 1 );
+        final double newdist = 0.5 * ( ( child0.getDistanceToParent() > 0 ? child0.getDistanceToParent() : 0 ) + ( child1
+                .getDistanceToParent() > 0 ? child1.getDistanceToParent() : 0 ) );
+        child0.setDistanceToParent( newdist );
+        child1.setDistanceToParent( newdist );
+        final double d = child0.getDistanceToParent();
+        double diff = 0.0;
+        double height = 0.0;
+        final double[] height_diff = new double[ 2 ];
+        final double l0 = t.calculateSubtreeHeight( t.getRoot().getChildNode( 0 ) );
+        final double l1 = t.calculateSubtreeHeight( t.getRoot().getChildNode( 1 ) );
+        diff = l0 - l1;
+        height = t.getHeight();
+        if ( d > 0.0 ) {
+            if ( ( 2 * d ) > Math.abs( diff ) ) {
+                child0.setDistanceToParent( d - ( diff / 2.0 ) );
+                child1.setDistanceToParent( d + ( diff / 2.0 ) );
+                height_diff[ 0 ] = height - Math.abs( diff / 2 );
+                height_diff[ 1 ] = 0.0;
+            }
+            else {
+                if ( diff > 0 ) {
+                    child0.setDistanceToParent( 0.0 );
+                    child1.setDistanceToParent( 2 * d );
+                    height_diff[ 1 ] = diff - ( 2 * d );
+                }
+                else {
+                    child0.setDistanceToParent( 2 * d );
+                    child1.setDistanceToParent( 0.0 );
+                    height_diff[ 1 ] = diff + ( 2 * d );
+                }
+                height_diff[ 0 ] = height - d;
+            }
+        }
+        else {
+            height_diff[ 0 ] = height;
+            height_diff[ 1 ] = diff;
+        }
+        return height_diff;
+    }
+}
diff --git a/forester/java/src/org/forester/sdi/SDIse.java b/forester/java/src/org/forester/sdi/SDIse.java

new file mode 100644 (file)

index 0000000..4509685
--- /dev/null
+++ b/forester/java/src/org/forester/sdi/SDIse.java
@@ -0,0 +1,203 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// Copyright (C) 2000-2001 Washington University School of Medicine
+// and Howard Hughes Medical Institute
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.sdi;
+
+import org.forester.phylogeny.Phylogeny;
+import org.forester.phylogeny.PhylogenyNode;
+import org.forester.phylogeny.data.Event;
+
+/*
+ * Implements our algorithm for speciation - duplication inference (SDI). <p>
+ * Reference: </p> <ul> <li>Zmasek, C.M. and Eddy, S.R. (2001) "A simple
+ * algorithm to infer gene duplication and speciation events on a gene tree".
+ * Bioinformatics, in press. </ul> <p> The initialization is accomplished by:
+ * </p> <ul> <li>method "linkExtNodesOfG()" of class SDI: setting the links for
+ * the external nodes of the gene tree <li>"preorderReID(int)" from class
+ * Phylogeny: numbering of nodes of the species tree in preorder <li>the
+ * optional stripping of the species tree is accomplished by method
+ * "stripTree(Phylogeny,Phylogeny)" of class Phylogeny </ul> <p> The recursion
+ * part is accomplished by this class' method
+ * "geneTreePostOrderTraversal(PhylogenyNode)". <p> Requires JDK 1.2 or greater.
+ * 
+ * @see SDI#linkNodesOfG()
+ * 
+ * @see Phylogeny#preorderReID(int)
+ * 
+ * @see
+ * PhylogenyMethods#taxonomyBasedDeletionOfExternalNodes(Phylogeny,Phylogeny)
+ * 
+ * @see #geneTreePostOrderTraversal(PhylogenyNode)
+ * 
+ * @author Christian M. Zmasek
+ * 
+ * @version 1.102 -- last modified: 10/02/01
+ */
+public class SDIse extends SDI {
+
+    /**
+     * Constructor which sets the gene tree and the species tree to be compared.
+     * species_tree is the species tree to which the gene tree gene_tree will be
+     * compared to - with method "infer(boolean)". Both Trees must be completely
+     * binary and rooted. The actual inference is accomplished with method
+     * "infer(boolean)". The mapping cost L can then be calculated with method
+     * "computeMappingCost()".
+     * <p>
+     * (Last modified: 01/11/01)
+     * 
+     * @see #infer(boolean)
+     * @see SDI#computeMappingCostL()
+     * @param gene_tree
+     *            reference to a rooted binary gene Phylogeny to which assign
+     *            duplication vs speciation, must have species names in the
+     *            species name fields for all external nodes
+     * @param species_tree
+     *            reference to a rooted binary species Phylogeny which might get
+     *            stripped in the process, must have species names in the
+     *            species name fields for all external nodes
+     */
+    public SDIse( final Phylogeny gene_tree, final Phylogeny species_tree ) {
+        super( gene_tree, species_tree );
+        _duplications_sum = 0;
+        getSpeciesTree().preOrderReId();
+        linkNodesOfG();
+        geneTreePostOrderTraversal( getGeneTree().getRoot() );
+    }
+
+    // Helper method for updateM( boolean, PhylogenyNode, PhylogenyNode )
+    // Calculates M for PhylogenyNode n, given that M for the two children
+    // of n has been calculated.
+    // (Last modified: 10/02/01)
+    private void calculateMforNode( final PhylogenyNode n ) {
+        if ( !n.isExternal() ) {
+            final boolean was_duplication = n.isDuplication();
+            PhylogenyNode a = n.getChildNode1().getLink(), b = n.getChildNode2().getLink();
+            while ( a != b ) {
+                if ( a.getId() > b.getId() ) {
+                    a = a.getParent();
+                }
+                else {
+                    b = b.getParent();
+                }
+            }
+            n.setLink( a );
+            Event event = null;
+            if ( ( a == n.getChildNode1().getLink() ) || ( a == n.getChildNode2().getLink() ) ) {
+                event = Event.createSingleDuplicationEvent();
+                if ( !was_duplication ) {
+                    ++_duplications_sum;
+                }
+            }
+            else {
+                event = Event.createSingleSpeciationEvent();
+                if ( was_duplication ) {
+                    --_duplications_sum;
+                }
+            }
+            n.getNodeData().setEvent( event );
+        }
+    } // calculateMforNode( PhylogenyNode )
+
+    /**
+     * Traverses the subtree of PhylogenyNode g in postorder, calculating the
+     * mapping function M, and determines which nodes represent speciation
+     * events and which ones duplication events.
+     * <p>
+     * Preconditions: Mapping M for external nodes must have been calculated and
+     * the species tree must be labelled in preorder.
+     * <p>
+     * (Last modified: 01/11/01)
+     * 
+     * @param g
+     *            starting node of a gene tree - normally the root
+     */
+    void geneTreePostOrderTraversal( final PhylogenyNode g ) {
+        PhylogenyNode a, b;
+        if ( !g.isExternal() ) {
+            geneTreePostOrderTraversal( g.getChildNode( 0 ) );
+            geneTreePostOrderTraversal( g.getChildNode( 1 ) );
+            a = g.getChildNode( 0 ).getLink();
+            b = g.getChildNode( 1 ).getLink();
+            while ( a != b ) {
+                if ( a.getId() > b.getId() ) {
+                    a = a.getParent();
+                }
+                else {
+                    b = b.getParent();
+                }
+            }
+            g.setLink( a );
+            // Determines whether dup. or spec.
+            Event event = null;
+            if ( ( a == g.getChildNode( 0 ).getLink() ) || ( a == g.getChildNode( 1 ).getLink() ) ) {
+                event = Event.createSingleDuplicationEvent();
+                ++_duplications_sum;
+            }
+            else {
+                event = Event.createSingleSpeciationEvent();
+            }
+            g.getNodeData().setEvent( event );
+        }
+    } // geneTreePostOrderTraversal( PhylogenyNode )
+
+    /**
+     * Updates the mapping function M after the root of the gene tree has been
+     * moved by one branch. It calculates M for the root of the gene tree and
+     * one of its two children.
+     * <p>
+     * To be used ONLY by method "SDIunrooted.fastInfer(Phylogeny,Phylogeny)".
+     * <p>
+     * (Last modfied: 10/02/01)
+     * 
+     * @param prev_root_was_dup
+     *            true if the previous root was a duplication, false otherwise
+     * @param prev_root_c1
+     *            child 1 of the previous root
+     * @param prev_root_c2
+     *            child 2 of the previous root
+     * @return number of duplications which have been assigned in gene tree
+     */
+    int updateM( final boolean prev_root_was_dup, final PhylogenyNode prev_root_c1, final PhylogenyNode prev_root_c2 ) {
+        final PhylogenyNode root = getGeneTree().getRoot();
+        if ( ( root.getChildNode1() == prev_root_c1 ) || ( root.getChildNode2() == prev_root_c1 ) ) {
+            calculateMforNode( prev_root_c1 );
+        }
+        else {
+            calculateMforNode( prev_root_c2 );
+        }
+        Event event = null;
+        if ( prev_root_was_dup ) {
+            event = Event.createSingleDuplicationEvent();
+        }
+        else {
+            event = Event.createSingleSpeciationEvent();
+        }
+        root.getNodeData().setEvent( event );
+        calculateMforNode( root );
+        return getDuplicationsSum();
+    } // updateM( boolean, PhylogenyNode, PhylogenyNode )
+} // End of class SDIse.
diff --git a/forester/java/src/org/forester/sdi/Shin.java b/forester/java/src/org/forester/sdi/Shin.java

new file mode 100644 (file)

index 0000000..436e8d5
--- /dev/null
+++ b/forester/java/src/org/forester/sdi/Shin.java
@@ -0,0 +1,134 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.sdi;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+import org.forester.io.parsers.phyloxml.PhyloXmlParser;
+import org.forester.io.writers.PhylogenyWriter;
+import org.forester.phylogeny.Phylogeny;
+import org.forester.phylogeny.PhylogenyMethods;
+import org.forester.phylogeny.PhylogenyNode;
+import org.forester.phylogeny.data.Taxonomy;
+import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
+import org.forester.phylogeny.factories.PhylogenyFactory;
+import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
+import org.forester.util.ForesterUtil;
+
+public class Shin {
+
+    public Shin() {
+    }
+
+    private void analyze( final Phylogeny gene_tree,
+                          final String gene_tree_file_name,
+                          final Phylogeny[] species_trees,
+                          final File out_dir ) throws IOException {
+        final boolean minimize_cost = true;
+        final boolean minimize_sum_of_dup = true;
+        final boolean minimize_height = true;
+        final int trees_to_return = 1;
+        System.out.println( gene_tree_file_name + ": " + gene_tree.getName() );
+        final Set<Taxonomy> species_tree_species = getAllExternalSpecies( species_trees[ 0 ] );
+        final PhylogenyWriter w = new PhylogenyWriter();
+        for( final Phylogeny species_tree : species_trees ) {
+            PhylogenyMethods.deleteExternalNodesPositiveSelection( species_tree_species, gene_tree );
+            if ( gene_tree.isEmpty() ) {
+                System.out.println( " >> empty: " + gene_tree_file_name + ": " + gene_tree.getName() );
+                continue;
+            }
+            final File outfile = new File( out_dir + ForesterUtil.FILE_SEPARATOR + gene_tree_file_name );
+            if ( outfile.exists() ) {
+                System.out
+                        .println( " >> already exists, skipping: " + gene_tree_file_name + ": " + gene_tree.getName() );
+            }
+            final SDIR sdir = new SDIR();
+            final Phylogeny[] analyzed_gene_trees = sdir.infer( gene_tree,
+                                                                species_tree,
+                                                                minimize_cost,
+                                                                minimize_sum_of_dup,
+                                                                minimize_height,
+                                                                true,
+                                                                trees_to_return );
+            final int duplications = sdir.getMinimalDuplications();
+            final int mapping_cost = sdir.getMinimalMappingCost();
+            final List<Phylogeny> phys = new ArrayList<Phylogeny>();
+            for( final Phylogeny phy : analyzed_gene_trees ) {
+                phys.add( phy );
+            }
+            w.toPhyloXML( outfile, phys, 0, ForesterUtil.LINE_SEPARATOR );
+        }
+    }
+
+    private void checkSpeciesTreesForEqualNumberOfExtNodes( final Phylogeny[] species_trees ) {
+        int ext_nodes = -1;
+        for( final Phylogeny phylogeny : species_trees ) {
+            if ( ext_nodes < 0 ) {
+                ext_nodes = phylogeny.getNumberOfExternalNodes();
+            }
+            else if ( ext_nodes != phylogeny.getNumberOfExternalNodes() ) {
+                throw new IllegalArgumentException( "species trees must have all the same number of external nodes" );
+            }
+        }
+    }
+
+    public void method1( final List<File> gene_tree_files, final Phylogeny[] species_trees, final File out_dir )
+            throws IOException {
+        checkSpeciesTreesForEqualNumberOfExtNodes( species_trees );
+        final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
+        for( final File gene_tree_file : gene_tree_files ) {
+            if ( ForesterUtil.isReadableFile( gene_tree_file ) != null ) {
+                throw new IOException( "[" + gene_tree_file + "] is not readable" );
+            }
+            Phylogeny[] gene_trees = null;
+            gene_trees = factory.create( gene_tree_file, new PhyloXmlParser() );
+            if ( gene_trees.length != 1 ) {
+                throw new IOException( "[" + gene_tree_file + "] contains " + gene_trees.length
+                        + " gene trees, expecting precisely one" );
+            }
+            analyze( gene_trees[ 0 ], gene_tree_file.getName(), species_trees, out_dir );
+        }
+    }
+
+    private static Set<Taxonomy> getAllExternalSpecies( final Phylogeny phy ) {
+        final Set<Taxonomy> specs = new HashSet<Taxonomy>();
+        for( final PhylogenyNodeIterator it = phy.iteratorExternalForward(); it.hasNext(); ) {
+            final PhylogenyNode n = it.next();
+            if ( n.getNodeData().isHasTaxonomy() ) {
+                specs.add( n.getNodeData().getTaxonomy() );
+            }
+            else {
+                throw new IllegalArgumentException( "node " + n.getId() + " has no taxonomic data" );
+            }
+        }
+        return specs;
+    }
+}
diff --git a/forester/java/src/org/forester/sdi/TaxonomyAssigner.java b/forester/java/src/org/forester/sdi/TaxonomyAssigner.java

new file mode 100644 (file)

index 0000000..3e3fe86
--- /dev/null
+++ b/forester/java/src/org/forester/sdi/TaxonomyAssigner.java
@@ -0,0 +1,71 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.sdi;
+
+import org.forester.phylogeny.Phylogeny;
+import org.forester.phylogeny.PhylogenyNode;
+import org.forester.phylogeny.data.Taxonomy;
+import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
+
+public class TaxonomyAssigner extends SDI {
+
+    public TaxonomyAssigner( final Phylogeny gene_tree, final Phylogeny species_tree ) {
+        super( gene_tree, species_tree );
+        getSpeciesTree().preOrderReId();
+        linkNodesOfG();
+        geneTreePostOrderTraversal( getGeneTree().getRoot() );
+    }
+
+    void geneTreePostOrderTraversal( final PhylogenyNode g ) {
+        if ( !g.isExternal() ) {
+            for( final PhylogenyNodeIterator iter = g.iterateChildNodesForward(); iter.hasNext(); ) {
+                geneTreePostOrderTraversal( iter.next() );
+            }
+            final PhylogenyNode[] linked_nodes = new PhylogenyNode[ g.getNumberOfDescendants() ];
+            for( int i = 0; i < linked_nodes.length; ++i ) {
+                linked_nodes[ i ] = g.getChildNode( i ).getLink();
+            }
+            final int[] min_max = GSDI.obtainMinMaxIdIndices( linked_nodes );
+            int min_i = min_max[ 0 ];
+            int max_i = min_max[ 1 ];
+            while ( linked_nodes[ min_i ] != linked_nodes[ max_i ] ) {
+                linked_nodes[ max_i ] = linked_nodes[ max_i ].getParent();
+                final int[] min_max_ = GSDI.obtainMinMaxIdIndices( linked_nodes );
+                min_i = min_max_[ 0 ];
+                max_i = min_max_[ 1 ];
+            }
+            final PhylogenyNode s = linked_nodes[ max_i ];
+            g.setLink( s );
+            if ( s.getNodeData().isHasTaxonomy() ) {
+                g.getNodeData().setTaxonomy( ( Taxonomy ) s.getNodeData().getTaxonomy().copy() );
+            }
+        }
+    }
+
+    public static void execute( final Phylogeny gene_tree, final Phylogeny species_tree ) {
+        new TaxonomyAssigner( gene_tree, species_tree );
+    }
+}
diff --git a/forester/java/src/org/forester/sdi/TestGSDI.java b/forester/java/src/org/forester/sdi/TestGSDI.java

new file mode 100644 (file)

index 0000000..6cf3c72
--- /dev/null
+++ b/forester/java/src/org/forester/sdi/TestGSDI.java
@@ -0,0 +1,1215 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.sdi;
+
+import java.io.IOException;
+
+import org.forester.development.DevelopmentTools;
+import org.forester.io.parsers.nhx.NHXParser;
+import org.forester.phylogeny.Phylogeny;
+import org.forester.phylogeny.PhylogenyMethods;
+import org.forester.phylogeny.data.Event;
+import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
+import org.forester.phylogeny.factories.PhylogenyFactory;
+
+public final class TestGSDI {
+
+    private final static Phylogeny createPhylogeny( final String nhx ) throws IOException {
+        final Phylogeny p = ParserBasedPhylogenyFactory.getInstance().create( nhx, new NHXParser() )[ 0 ];
+        p.setRooted( true );
+        return p;
+    }
+
+    private final static Event getEvent( final Phylogeny p, final String n1, final String n2 ) {
+        return PhylogenyMethods.getInstance().obtainLCA( p.getNode( n1 ), p.getNode( n2 ) ).getNodeData().getEvent();
+    }
+
+    public static boolean test() {
+        if ( !TestGSDI.testGSDI_general() ) {
+            return false;
+        }
+        if ( !TestGSDI.testGSDI_against_binary_gene_tree() ) {
+            return false;
+        }
+        return true;
+    }
+
+    private static boolean testGSDI_against_binary_gene_tree() {
+        try {
+            final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
+            final String multi_species_2_str = "(((((([&&NHX:S=1],[&&NHX:S=2]),"
+                    + "([&&NHX:S=3],[&&NHX:S=4],[&&NHX:S=5])),"
+                    + "([&&NHX:S=6],[&&NHX:S=7],[&&NHX:S=8],[&&NHX:S=9])),"
+                    + "([&&NHX:S=10],[&&NHX:S=11])),"
+                    + "([&&NHX:S=12],[&&NHX:S=13],[&&NHX:S=14])),"
+                    + "([&&NHX:S=15],([&&NHX:S=16],[&&NHX:S=17]),([&&NHX:S=18],[&&NHX:S=19],[&&NHX:S=20]),([&&NHX:S=21],[&&NHX:S=22],[&&NHX:S=23],[&&NHX:S=24])));";
+            final String gene_2_1_str = "(((((([&&NHX:S=1],[&&NHX:S=2])1_2,([&&NHX:S=3],[&&NHX:S=4])),"
+                    + "([&&NHX:S=6],[&&NHX:S=7])6_7_8_9)1_9,([&&NHX:S=10],[&&NHX:S=11])),"
+                    + "([&&NHX:S=12],[&&NHX:S=13])12_13_14)1_14,"
+                    + "([&&NHX:S=15],([&&NHX:S=21],[&&NHX:S=24])21_22_23_24)15_24);";
+            final Phylogeny multi_species_2 = factory.create( multi_species_2_str, new NHXParser() )[ 0 ];
+            final Phylogeny gene_2_1 = factory.create( gene_2_1_str, new NHXParser() )[ 0 ];
+            multi_species_2.setRooted( true );
+            gene_2_1.setRooted( true );
+            final GSDI sdi = new GSDI( gene_2_1, multi_species_2, false );
+            if ( sdi.getSpeciationOrDuplicationEventsSum() != 0 ) {
+                return false;
+            }
+            if ( sdi.getDuplicationsSum() != 0 ) {
+                return false;
+            }
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace( System.out );
+            return false;
+        }
+        return true;
+    }
+
+    private static boolean testGSDI_general() {
+        try {
+            final PhylogenyMethods pm = PhylogenyMethods.getInstance();
+            final String s1_ = "((([&&NHX:S=A2],[&&NHX:S=A1]),[&&NHX:S=B],[&&NHX:S=C]),[&&NHX:S=D])";
+            final Phylogeny s1 = ParserBasedPhylogenyFactory.getInstance().create( s1_, new NHXParser() )[ 0 ];
+            s1.setRooted( true );
+            final Phylogeny g1 = TestGSDI
+                    .createPhylogeny( "((((B[&&NHX:S=B],A1[&&NHX:S=A1]),C[&&NHX:S=C]),A2[&&NHX:S=A2]),D[&&NHX:S=D])" );
+            final GSDI sdi1 = new GSDI( g1, s1, false );
+            if ( sdi1.getDuplicationsSum() != 1 ) {
+                return false;
+            }
+            if ( !pm.obtainLCA( g1.getNode( "B" ), g1.getNode( "A1" ) ).getNodeData().getEvent().isSpeciation() ) {
+                return false;
+            }
+            if ( !pm.obtainLCA( g1.getNode( "C" ), g1.getNode( "A1" ) ).getNodeData().getEvent()
+                    .isSpeciationOrDuplication() ) {
+                return false;
+            }
+            if ( !( pm.obtainLCA( g1.getNode( "A2" ), g1.getNode( "A1" ) ).getNodeData().getEvent().isDuplication() ) ) {
+                return false;
+            }
+            if ( !pm.obtainLCA( g1.getNode( "D" ), g1.getNode( "A1" ) ).getNodeData().getEvent().isSpeciation() ) {
+                return false;
+            }
+            final Phylogeny g2 = TestGSDI
+                    .createPhylogeny( "((((A2[&&NHX:S=A2],A1[&&NHX:S=A1]),B[&&NHX:S=B]),C[&&NHX:S=C]),D[&&NHX:S=D])" );
+            final GSDI sdi2 = new GSDI( g2, s1, false );
+            if ( sdi2.getDuplicationsSum() != 0 ) {
+                return false;
+            }
+            if ( !pm.obtainLCA( g2.getNode( "A1" ), g2.getNode( "A2" ) ).getNodeData().getEvent().isSpeciation() ) {
+                return false;
+            }
+            if ( !pm.obtainLCA( g2.getNode( "A1" ), g2.getNode( "B" ) ).getNodeData().getEvent().isSpeciation() ) {
+                return false;
+            }
+            if ( !pm.obtainLCA( g2.getNode( "A1" ), g2.getNode( "C" ) ).getNodeData().getEvent()
+                    .isSpeciationOrDuplication() ) {
+                return false;
+            }
+            if ( !pm.obtainLCA( g2.getNode( "A1" ), g2.getNode( "D" ) ).getNodeData().getEvent().isSpeciation() ) {
+                return false;
+            }
+            final Phylogeny g3 = TestGSDI
+                    .createPhylogeny( "((((A2[&&NHX:S=A2],A1[&&NHX:S=A1]),C[&&NHX:S=C]),B[&&NHX:S=B]),D[&&NHX:S=D])" );
+            final GSDI sdi3 = new GSDI( g3, s1, false );
+            if ( sdi3.getDuplicationsSum() != 0 ) {
+                return false;
+            }
+            if ( !pm.obtainLCA( g3.getNode( "A1" ), g3.getNode( "A2" ) ).getNodeData().getEvent().isSpeciation() ) {
+                return false;
+            }
+            if ( !pm.obtainLCA( g3.getNode( "A1" ), g3.getNode( "C" ) ).getNodeData().getEvent().isSpeciation() ) {
+                return false;
+            }
+            if ( !pm.obtainLCA( g3.getNode( "A1" ), g3.getNode( "B" ) ).getNodeData().getEvent()
+                    .isSpeciationOrDuplication() ) {
+                return false;
+            }
+            if ( !pm.obtainLCA( g3.getNode( "A1" ), g3.getNode( "D" ) ).getNodeData().getEvent().isSpeciation() ) {
+                return false;
+            }
+            final Phylogeny g4 = TestGSDI
+                    .createPhylogeny( "(((B[&&NHX:S=B],C1[&&NHX:S=C]),C2[&&NHX:S=C]),D[&&NHX:S=D])" );
+            final GSDI sdi4 = new GSDI( g4, s1, false );
+            if ( sdi4.getDuplicationsSum() != 1 ) {
+                return false;
+            }
+            if ( !pm.obtainLCA( g4.getNode( "B" ), g4.getNode( "C1" ) ).getNodeData().getEvent().isSpeciation() ) {
+                return false;
+            }
+            if ( !pm.obtainLCA( g4.getNode( "B" ), g4.getNode( "C2" ) ).getNodeData().getEvent().isDuplication() ) {
+                return false;
+            }
+            if ( !pm.obtainLCA( g4.getNode( "B" ), g4.getNode( "D" ) ).getNodeData().getEvent().isSpeciation() ) {
+                return false;
+            }
+            final Phylogeny g5 = TestGSDI
+                    .createPhylogeny( "(((D1[&&NHX:S=D],A1[&&NHX:S=A1]),B[&&NHX:S=B]),((D2[&&NHX:S=D],D3[&&NHX:S=D]),C[&&NHX:S=C]))" );
+            final GSDI sdi5 = new GSDI( g5, s1, false );
+            if ( sdi5.getDuplicationsSum() != 3 ) {
+                return false;
+            }
+            if ( !pm.obtainLCA( g5.getNode( "D1" ), g5.getNode( "A1" ) ).getNodeData().getEvent().isSpeciation() ) {
+                return false;
+            }
+            if ( !pm.obtainLCA( g5.getNode( "D1" ), g5.getNode( "B" ) ).getNodeData().getEvent().isDuplication() ) {
+                return false;
+            }
+            if ( !pm.obtainLCA( g5.getNode( "D1" ), g5.getNode( "D2" ) ).getNodeData().getEvent().isDuplication() ) {
+                return false;
+            }
+            if ( !pm.obtainLCA( g5.getNode( "D2" ), g5.getNode( "D3" ) ).getNodeData().getEvent().isDuplication() ) {
+                return false;
+            }
+            if ( !pm.obtainLCA( g5.getNode( "C" ), g5.getNode( "D3" ) ).getNodeData().getEvent().isSpeciation() ) {
+                return false;
+            }
+            final Phylogeny species7 = TestGSDI.createPhylogeny( "(((((((([&&NHX:S=a1],[&&NHX:S=a2]),"
+                    + "([&&NHX:S=b1],[&&NHX:S=b2])),[&&NHX:S=x]),(([&&NHX:S=m1],[&&NHX:S=m2]),"
+                    + "([&&NHX:S=n1],[&&NHX:S=n2]))),(([&&NHX:S=i1],[&&NHX:S=i2]),"
+                    + "([&&NHX:S=j1],[&&NHX:S=j2]))),(([&&NHX:S=e1],[&&NHX:S=e2]),"
+                    + "([&&NHX:S=f1],[&&NHX:S=f2]))),[&&NHX:S=y]),[&&NHX:S=z])" );
+            final Phylogeny gene7_2 = TestGSDI
+                    .createPhylogeny( "(((((((((a1[&&NHX:S=a1],a2[&&NHX:S=a2]),b1[&&NHX:S=b1]),x[&&NHX:S=x]),m1[&&NHX:S=m1]),i1[&&NHX:S=i1]),j2[&&NHX:S=j2]),e1[&&NHX:S=e1]),y[&&NHX:S=y]),z[&&NHX:S=z])" );
+            gene7_2.setRooted( true );
+            final GSDI sdi7_2 = new GSDI( gene7_2, species7, false );
+            if ( sdi7_2.getDuplicationsSum() != 1 ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( gene7_2, "a1", "a2" ).isSpeciation() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( gene7_2, "a1", "b1" ).isSpeciation() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( gene7_2, "a1", "x" ).isSpeciation() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( gene7_2, "a1", "m1" ).isSpeciation() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( gene7_2, "a1", "i1" ).isSpeciation() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( gene7_2, "a1", "j2" ).isDuplication() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( gene7_2, "a1", "e1" ).isSpeciation() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( gene7_2, "a1", "y" ).isSpeciation() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( gene7_2, "a1", "z" ).isSpeciation() ) {
+                return false;
+            }
+            final String s2_ = "((" + "([&&NHX:S=a1],[&&NHX:S=a2],[&&NHX:S=a3],[&&NHX:S=a4]),"
+                    + "([&&NHX:S=b1],[&&NHX:S=b2],[&&NHX:S=b3],[&&NHX:S=b4]),"
+                    + "([&&NHX:S=c1],[&&NHX:S=c2],[&&NHX:S=c3],[&&NHX:S=c4]),"
+                    + "([&&NHX:S=d1],[&&NHX:S=d2],[&&NHX:S=d3],[&&NHX:S=d4])),("
+                    + "([&&NHX:S=e1],[&&NHX:S=e2],[&&NHX:S=e3],[&&NHX:S=e4]),"
+                    + "([&&NHX:S=f1],[&&NHX:S=f2],[&&NHX:S=f3],[&&NHX:S=f4]),"
+                    + "([&&NHX:S=g1],[&&NHX:S=g2],[&&NHX:S=g3],[&&NHX:S=g4]),"
+                    + "([&&NHX:S=h1],[&&NHX:S=h2],[&&NHX:S=h3],[&&NHX:S=h4])),("
+                    + "([&&NHX:S=i1],[&&NHX:S=i2],[&&NHX:S=i3],[&&NHX:S=i4]),"
+                    + "([&&NHX:S=j1],[&&NHX:S=j2],[&&NHX:S=j3],[&&NHX:S=j4]),"
+                    + "([&&NHX:S=k1],[&&NHX:S=k2],[&&NHX:S=k3],[&&NHX:S=k4]),"
+                    + "([&&NHX:S=l1],[&&NHX:S=l2],[&&NHX:S=l3],[&&NHX:S=l4])),("
+                    + "([&&NHX:S=m1],[&&NHX:S=m2],[&&NHX:S=m3],[&&NHX:S=m4]),"
+                    + "([&&NHX:S=n1],[&&NHX:S=n2],[&&NHX:S=n3],[&&NHX:S=n4]),"
+                    + "([&&NHX:S=o1],[&&NHX:S=o2],[&&NHX:S=o3],[&&NHX:S=o4]),"
+                    + "([&&NHX:S=p1],[&&NHX:S=p2],[&&NHX:S=p3],[&&NHX:S=p4])"
+                    + "),[&&NHX:S=x],[&&NHX:S=y],[&&NHX:S=z])";
+            final Phylogeny s2 = ParserBasedPhylogenyFactory.getInstance().create( s2_, new NHXParser() )[ 0 ];
+            s2.setRooted( true );
+            final Phylogeny g2_0 = TestGSDI.createPhylogeny( "(m1[&&NHX:S=m1],m3[&&NHX:S=m3])" );
+            final GSDI sdi2_0 = new GSDI( g2_0, s2, false );
+            if ( sdi2_0.getDuplicationsSum() != 0 ) {
+                return false;
+            }
+            if ( sdi2_0.getSpeciationOrDuplicationEventsSum() != 0 ) {
+                return false;
+            }
+            if ( sdi2_0.getSpeciationsSum() != 1 ) {
+                return false;
+            }
+            if ( !pm.obtainLCA( g2_0.getNode( "m1" ), g2_0.getNode( "m3" ) ).getNodeData().getEvent().isSpeciation() ) {
+                return false;
+            }
+            final Phylogeny g2_1 = TestGSDI.createPhylogeny( "(e2[&&NHX:S=e2],h2[&&NHX:S=h2])" );
+            final GSDI sdi2_1 = new GSDI( g2_1, s2, false );
+            if ( sdi2_1.getDuplicationsSum() != 0 ) {
+                return false;
+            }
+            if ( sdi2_1.getSpeciationOrDuplicationEventsSum() != 0 ) {
+                return false;
+            }
+            if ( sdi2_1.getSpeciationsSum() != 1 ) {
+                return false;
+            }
+            if ( !pm.obtainLCA( g2_1.getNode( "e2" ), g2_1.getNode( "h2" ) ).getNodeData().getEvent().isSpeciation() ) {
+                return false;
+            }
+            final Phylogeny g2_2 = TestGSDI.createPhylogeny( "(e2[&&NHX:S=e2],p4[&&NHX:S=p4])" );
+            final GSDI sdi2_2 = new GSDI( g2_2, s2, false );
+            if ( sdi2_2.getDuplicationsSum() != 0 ) {
+                return false;
+            }
+            if ( sdi2_2.getSpeciationOrDuplicationEventsSum() != 0 ) {
+                return false;
+            }
+            if ( sdi2_2.getSpeciationsSum() != 1 ) {
+                return false;
+            }
+            if ( !pm.obtainLCA( g2_2.getNode( "e2" ), g2_2.getNode( "p4" ) ).getNodeData().getEvent().isSpeciation() ) {
+                return false;
+            }
+            final Phylogeny g2_3 = TestGSDI.createPhylogeny( "(e2a[&&NHX:S=e2],e2b[&&NHX:S=e2])" );
+            final GSDI sdi2_3 = new GSDI( g2_3, s2, false );
+            if ( sdi2_3.getDuplicationsSum() != 1 ) {
+                return false;
+            }
+            if ( sdi2_3.getSpeciationOrDuplicationEventsSum() != 0 ) {
+                return false;
+            }
+            if ( sdi2_3.getSpeciationsSum() != 0 ) {
+                return false;
+            }
+            if ( !pm.obtainLCA( g2_3.getNode( "e2a" ), g2_3.getNode( "e2b" ) ).getNodeData().getEvent().isDuplication() ) {
+                return false;
+            }
+            final Phylogeny g2_4 = TestGSDI.createPhylogeny( "((j1[&&NHX:S=j1],j4[&&NHX:S=j4]),i3[&&NHX:S=i3])" );
+            final GSDI sdi2_4 = new GSDI( g2_4, s2, false );
+            if ( sdi2_4.getDuplicationsSum() != 0 ) {
+                return false;
+            }
+            if ( sdi2_4.getSpeciationOrDuplicationEventsSum() != 0 ) {
+                return false;
+            }
+            if ( sdi2_4.getSpeciationsSum() != 2 ) {
+                return false;
+            }
+            if ( !pm.obtainLCA( g2_4.getNode( "j1" ), g2_4.getNode( "j4" ) ).getNodeData().getEvent().isSpeciation() ) {
+                return false;
+            }
+            if ( !pm.obtainLCA( g2_4.getNode( "j1" ), g2_4.getNode( "i3" ) ).getNodeData().getEvent().isSpeciation() ) {
+                return false;
+            }
+            final Phylogeny g2_5 = TestGSDI.createPhylogeny( "((j1[&&NHX:S=j1],j4[&&NHX:S=j4]),f3[&&NHX:S=f3])" );
+            final GSDI sdi2_5 = new GSDI( g2_5, s2, false );
+            if ( sdi2_5.getDuplicationsSum() != 0 ) {
+                return false;
+            }
+            if ( sdi2_5.getSpeciationOrDuplicationEventsSum() != 0 ) {
+                return false;
+            }
+            if ( sdi2_5.getSpeciationsSum() != 2 ) {
+                return false;
+            }
+            if ( !pm.obtainLCA( g2_5.getNode( "j1" ), g2_5.getNode( "j4" ) ).getNodeData().getEvent().isSpeciation() ) {
+                return false;
+            }
+            if ( !pm.obtainLCA( g2_5.getNode( "j1" ), g2_5.getNode( "f3" ) ).getNodeData().getEvent().isSpeciation() ) {
+                return false;
+            }
+            final Phylogeny g2_6 = TestGSDI.createPhylogeny( "((j3[&&NHX:S=j3],i4[&&NHX:S=i4]),f3[&&NHX:S=f3])" );
+            final GSDI sdi2_6 = new GSDI( g2_6, s2, false );
+            if ( sdi2_6.getDuplicationsSum() != 0 ) {
+                return false;
+            }
+            if ( sdi2_6.getSpeciationOrDuplicationEventsSum() != 0 ) {
+                return false;
+            }
+            if ( sdi2_6.getSpeciationsSum() != 2 ) {
+                return false;
+            }
+            if ( !pm.obtainLCA( g2_6.getNode( "j3" ), g2_6.getNode( "i4" ) ).getNodeData().getEvent().isSpeciation() ) {
+                return false;
+            }
+            if ( !pm.obtainLCA( g2_6.getNode( "j3" ), g2_6.getNode( "f3" ) ).getNodeData().getEvent().isSpeciation() ) {
+                return false;
+            }
+            final Phylogeny g2_7 = TestGSDI.createPhylogeny( "((j1[&&NHX:S=j1],k1[&&NHX:S=k1]),i1[&&NHX:S=i1])" );
+            final GSDI sdi2_7 = new GSDI( g2_7, s2, false );
+            if ( sdi2_7.getDuplicationsSum() != 0 ) {
+                return false;
+            }
+            if ( sdi2_7.getSpeciationOrDuplicationEventsSum() != 1 ) {
+                return false;
+            }
+            if ( sdi2_7.getSpeciationsSum() != 1 ) {
+                return false;
+            }
+            if ( !pm.obtainLCA( g2_7.getNode( "j1" ), g2_7.getNode( "k1" ) ).getNodeData().getEvent().isSpeciation() ) {
+                return false;
+            }
+            if ( !pm.obtainLCA( g2_7.getNode( "j1" ), g2_7.getNode( "i1" ) ).getNodeData().getEvent()
+                    .isSpeciationOrDuplication() ) {
+                return false;
+            }
+            final Phylogeny g2_8 = TestGSDI.createPhylogeny( "(j1[&&NHX:S=j1],(k1[&&NHX:S=k1],i1[&&NHX:S=i1]))" );
+            final GSDI sdi2_8 = new GSDI( g2_8, s2, false );
+            if ( sdi2_8.getDuplicationsSum() != 0 ) {
+                return false;
+            }
+            if ( sdi2_8.getSpeciationOrDuplicationEventsSum() != 1 ) {
+                return false;
+            }
+            if ( sdi2_8.getSpeciationsSum() != 1 ) {
+                return false;
+            }
+            if ( !pm.obtainLCA( g2_8.getNode( "j1" ), g2_8.getNode( "k1" ) ).getNodeData().getEvent()
+                    .isSpeciationOrDuplication() ) {
+                return false;
+            }
+            if ( !pm.obtainLCA( g2_8.getNode( "k1" ), g2_8.getNode( "i1" ) ).getNodeData().getEvent().isSpeciation() ) {
+                return false;
+            }
+            final Phylogeny g2_9 = TestGSDI.createPhylogeny( "((j1[&&NHX:S=j1],k4[&&NHX:S=k4]),f2[&&NHX:S=f2])" );
+            final GSDI sdi2_9 = new GSDI( g2_9, s2, false );
+            if ( sdi2_9.getDuplicationsSum() != 0 ) {
+                return false;
+            }
+            if ( sdi2_9.getSpeciationOrDuplicationEventsSum() != 0 ) {
+                return false;
+            }
+            if ( sdi2_9.getSpeciationsSum() != 2 ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_9, "j1", "k4" ).isSpeciation() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_9, "j1", "f2" ).isSpeciation() ) {
+                return false;
+            }
+            final Phylogeny g2_10 = TestGSDI.createPhylogeny( "((m1[&&NHX:S=m1],k4[&&NHX:S=k4]),f2[&&NHX:S=f2])" );
+            final GSDI sdi2_10 = new GSDI( g2_10, s2, false );
+            if ( sdi2_10.getDuplicationsSum() != 0 ) {
+                return false;
+            }
+            if ( sdi2_10.getSpeciationOrDuplicationEventsSum() != 1 ) {
+                return false;
+            }
+            if ( sdi2_10.getSpeciationsSum() != 1 ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_10, "m1", "k4" ).isSpeciation() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_10, "m1", "f2" ).isSpeciationOrDuplication() ) {
+                return false;
+            }
+            final Phylogeny g2_11 = TestGSDI.createPhylogeny( "((m1[&&NHX:S=m1],k4[&&NHX:S=k4]),x[&&NHX:S=x])" );
+            final GSDI sdi2_11 = new GSDI( g2_11, s2, false );
+            if ( sdi2_11.getDuplicationsSum() != 0 ) {
+                return false;
+            }
+            if ( sdi2_11.getSpeciationOrDuplicationEventsSum() != 1 ) {
+                return false;
+            }
+            if ( sdi2_11.getSpeciationsSum() != 1 ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_11, "m1", "k4" ).isSpeciation() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_11, "m1", "x" ).isSpeciationOrDuplication() ) {
+                return false;
+            }
+            final Phylogeny g2_12 = TestGSDI.createPhylogeny( "(m1[&&NHX:S=m1],(k4[&&NHX:S=k4],x[&&NHX:S=x]))" );
+            final GSDI sdi2_12 = new GSDI( g2_12, s2, false );
+            if ( sdi2_12.getDuplicationsSum() != 0 ) {
+                return false;
+            }
+            if ( sdi2_12.getSpeciationOrDuplicationEventsSum() != 1 ) {
+                return false;
+            }
+            if ( sdi2_12.getSpeciationsSum() != 1 ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_12, "x", "k4" ).isSpeciation() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_12, "m1", "x" ).isSpeciationOrDuplication() ) {
+                return false;
+            }
+            final Phylogeny g2_13 = TestGSDI.createPhylogeny( "(x[&&NHX:S=x],(y[&&NHX:S=y],z[&&NHX:S=z]))" );
+            final GSDI sdi2_13 = new GSDI( g2_13, s2, false );
+            if ( sdi2_13.getDuplicationsSum() != 0 ) {
+                return false;
+            }
+            if ( sdi2_13.getSpeciationOrDuplicationEventsSum() != 1 ) {
+                return false;
+            }
+            if ( sdi2_13.getSpeciationsSum() != 1 ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_13, "y", "z" ).isSpeciation() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_13, "x", "z" ).isSpeciationOrDuplication() ) {
+                return false;
+            }
+            final Phylogeny g2_14 = TestGSDI.createPhylogeny( "(a1_1[&&NHX:S=a1],(b1[&&NHX:S=b1],a1[&&NHX:S=a1]))" );
+            final GSDI sdi2_14 = new GSDI( g2_14, s2, false );
+            if ( sdi2_14.getDuplicationsSum() != 1 ) {
+                return false;
+            }
+            if ( sdi2_14.getSpeciationOrDuplicationEventsSum() != 0 ) {
+                return false;
+            }
+            if ( sdi2_14.getSpeciationsSum() != 1 ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_14, "b1", "a1" ).isSpeciation() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_14, "b1", "a1_1" ).isDuplication() ) {
+                return false;
+            }
+            final Phylogeny g2_15 = TestGSDI.createPhylogeny( "(a2[&&NHX:S=a2],(b1[&&NHX:S=b1],a1[&&NHX:S=a1]))" );
+            final GSDI sdi2_15 = new GSDI( g2_15, s2, false );
+            if ( sdi2_15.getDuplicationsSum() != 1 ) {
+                return false;
+            }
+            if ( sdi2_15.getSpeciationOrDuplicationEventsSum() != 0 ) {
+                return false;
+            }
+            if ( sdi2_15.getSpeciationsSum() != 1 ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_15, "b1", "a1" ).isSpeciation() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_15, "b1", "a2" ).isDuplication() ) {
+                return false;
+            }
+            final Phylogeny g2_16 = TestGSDI.createPhylogeny( "(n2[&&NHX:S=n2],(j3[&&NHX:S=j3],n1[&&NHX:S=n1]))" );
+            final GSDI sdi2_16 = new GSDI( g2_16, s2, false );
+            if ( sdi2_16.getDuplicationsSum() != 1 ) {
+                return false;
+            }
+            if ( sdi2_16.getSpeciationOrDuplicationEventsSum() != 0 ) {
+                return false;
+            }
+            if ( sdi2_16.getSpeciationsSum() != 1 ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_16, "j3", "n1" ).isSpeciation() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_16, "j3", "n2" ).isDuplication() ) {
+                return false;
+            }
+            final Phylogeny g2_17 = TestGSDI.createPhylogeny( "(p4[&&NHX:S=p4],(j3[&&NHX:S=j3],n1[&&NHX:S=n1]))" );
+            final GSDI sdi2_17 = new GSDI( g2_17, s2, false );
+            if ( sdi2_17.getDuplicationsSum() != 1 ) {
+                return false;
+            }
+            if ( sdi2_17.getSpeciationOrDuplicationEventsSum() != 0 ) {
+                return false;
+            }
+            if ( sdi2_17.getSpeciationsSum() != 1 ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_17, "j3", "n1" ).isSpeciation() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_17, "j3", "p4" ).isDuplication() ) {
+                return false;
+            }
+            final Phylogeny g2_18 = TestGSDI
+                    .createPhylogeny( "((n11[&&NHX:S=n1],n12[&&NHX:S=n1]),(n13[&&NHX:S=n1],n14[&&NHX:S=n1]))" );
+            final GSDI sdi2_18 = new GSDI( g2_18, s2, false );
+            if ( sdi2_18.getDuplicationsSum() != 3 ) {
+                return false;
+            }
+            if ( sdi2_18.getSpeciationOrDuplicationEventsSum() != 0 ) {
+                return false;
+            }
+            if ( sdi2_18.getSpeciationsSum() != 0 ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_18, "n11", "n12" ).isDuplication() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_18, "n13", "n14" ).isDuplication() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_18, "n11", "n13" ).isDuplication() ) {
+                return false;
+            }
+            final Phylogeny g2_19 = TestGSDI
+                    .createPhylogeny( "((n11[&&NHX:S=n1],n21[&&NHX:S=n2]),(n12[&&NHX:S=n1],n22[&&NHX:S=n2]))" );
+            final GSDI sdi2_19 = new GSDI( g2_19, s2, false );
+            if ( sdi2_19.getDuplicationsSum() != 1 ) {
+                return false;
+            }
+            if ( sdi2_19.getSpeciationOrDuplicationEventsSum() != 0 ) {
+                return false;
+            }
+            if ( sdi2_19.getSpeciationsSum() != 2 ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_19, "n11", "n21" ).isSpeciation() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_19, "n12", "n22" ).isSpeciation() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_19, "n11", "n12" ).isDuplication() ) {
+                return false;
+            }
+            final Phylogeny g2_20 = TestGSDI
+                    .createPhylogeny( "((n11[&&NHX:S=n1],n2[&&NHX:S=n2]),(n12[&&NHX:S=n1],n3[&&NHX:S=n3]))" );
+            final GSDI sdi2_20 = new GSDI( g2_20, s2, false );
+            if ( sdi2_20.getDuplicationsSum() != 1 ) {
+                return false;
+            }
+            if ( sdi2_20.getSpeciationOrDuplicationEventsSum() != 0 ) {
+                return false;
+            }
+            if ( sdi2_20.getSpeciationsSum() != 2 ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_20, "n11", "n2" ).isSpeciation() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_20, "n12", "n3" ).isSpeciation() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_20, "n11", "n12" ).isDuplication() ) {
+                return false;
+            }
+            final Phylogeny g2_21 = TestGSDI
+                    .createPhylogeny( "((n1[&&NHX:S=n1],n2[&&NHX:S=n2]),(n3[&&NHX:S=n3],a1[&&NHX:S=a1]))" );
+            final GSDI sdi2_21 = new GSDI( g2_21, s2, false );
+            if ( sdi2_21.getDuplicationsSum() != 1 ) {
+                return false;
+            }
+            if ( sdi2_21.getSpeciationOrDuplicationEventsSum() != 0 ) {
+                return false;
+            }
+            if ( sdi2_21.getSpeciationsSum() != 2 ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_21, "n1", "n2" ).isSpeciation() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_21, "n3", "a1" ).isSpeciation() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_21, "n2", "a1" ).isDuplication() ) {
+                return false;
+            }
+            final Phylogeny g2_22 = TestGSDI
+                    .createPhylogeny( "((n1[&&NHX:S=n1],n2[&&NHX:S=n2]),(n3[&&NHX:S=n3],n4[&&NHX:S=n4]))" );
+            final GSDI sdi2_22 = new GSDI( g2_22, s2, false );
+            if ( sdi2_22.getDuplicationsSum() != 0 ) {
+                return false;
+            }
+            if ( sdi2_22.getSpeciationOrDuplicationEventsSum() != 1 ) {
+                return false;
+            }
+            if ( sdi2_22.getSpeciationsSum() != 2 ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_22, "n1", "n2" ).isSpeciation() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_22, "n3", "n4" ).isSpeciation() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_22, "n1", "n3" ).isSpeciationOrDuplication() ) {
+                return false;
+            }
+            final Phylogeny g2_23 = TestGSDI
+                    .createPhylogeny( "((a1[&&NHX:S=a1],b1[&&NHX:S=b1]),(c1[&&NHX:S=c1],d1[&&NHX:S=d1]))" );
+            final GSDI sdi2_23 = new GSDI( g2_23, s2, false );
+            if ( sdi2_23.getDuplicationsSum() != 0 ) {
+                return false;
+            }
+            if ( sdi2_23.getSpeciationOrDuplicationEventsSum() != 1 ) {
+                return false;
+            }
+            if ( sdi2_23.getSpeciationsSum() != 2 ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_23, "a1", "b1" ).isSpeciation() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_23, "c1", "d1" ).isSpeciation() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_23, "a1", "c1" ).isSpeciationOrDuplication() ) {
+                return false;
+            }
+            final Phylogeny g2_24 = TestGSDI
+                    .createPhylogeny( "((a1[&&NHX:S=a1],e1[&&NHX:S=e1]),(i1[&&NHX:S=i1],m1[&&NHX:S=m1]))" );
+            final GSDI sdi2_24 = new GSDI( g2_24, s2, false );
+            if ( sdi2_24.getDuplicationsSum() != 0 ) {
+                return false;
+            }
+            if ( sdi2_24.getSpeciationOrDuplicationEventsSum() != 1 ) {
+                return false;
+            }
+            if ( sdi2_24.getSpeciationsSum() != 2 ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_24, "a1", "e1" ).isSpeciation() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_24, "i1", "m1" ).isSpeciation() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_24, "a1", "i1" ).isSpeciationOrDuplication() ) {
+                return false;
+            }
+            final Phylogeny g2_25 = TestGSDI
+                    .createPhylogeny( "((a1[&&NHX:S=a1],a4[&&NHX:S=a4]),(b1[&&NHX:S=b1],c1[&&NHX:S=c1]))" );
+            final GSDI sdi2_25 = new GSDI( g2_25, s2, false );
+            if ( sdi2_25.getDuplicationsSum() != 0 ) {
+                return false;
+            }
+            if ( sdi2_25.getSpeciationOrDuplicationEventsSum() != 1 ) {
+                return false;
+            }
+            if ( sdi2_25.getSpeciationsSum() != 2 ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_25, "a1", "a4" ).isSpeciation() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_25, "b1", "c1" ).isSpeciation() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_25, "a1", "b1" ).isSpeciationOrDuplication() ) {
+                return false;
+            }
+            final Phylogeny g2_26 = TestGSDI
+                    .createPhylogeny( "(((a1[&&NHX:S=a1],a4[&&NHX:S=a4]),b1[&&NHX:S=b1]),e1[&&NHX:S=e1])" );
+            final GSDI sdi2_26 = new GSDI( g2_26, s2, false );
+            if ( sdi2_26.getDuplicationsSum() != 0 ) {
+                return false;
+            }
+            if ( sdi2_26.getSpeciationOrDuplicationEventsSum() != 0 ) {
+                return false;
+            }
+            if ( sdi2_26.getSpeciationsSum() != 3 ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_26, "a1", "a4" ).isSpeciation() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_26, "a1", "b1" ).isSpeciation() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_26, "a1", "e1" ).isSpeciation() ) {
+                return false;
+            }
+            final Phylogeny g2_27 = TestGSDI
+                    .createPhylogeny( "(((a1[&&NHX:S=a1],a4[&&NHX:S=a4]),b1[&&NHX:S=b1]),c1[&&NHX:S=c1])" );
+            final GSDI sdi2_27 = new GSDI( g2_27, s2, false );
+            if ( sdi2_27.getDuplicationsSum() != 0 ) {
+                return false;
+            }
+            if ( sdi2_27.getSpeciationOrDuplicationEventsSum() != 1 ) {
+                return false;
+            }
+            if ( sdi2_27.getSpeciationsSum() != 2 ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_27, "a1", "a4" ).isSpeciation() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_27, "a1", "b1" ).isSpeciation() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_27, "a1", "c1" ).isSpeciationOrDuplication() ) {
+                return false;
+            }
+            final Phylogeny g2_28 = TestGSDI
+                    .createPhylogeny( "(((a1[&&NHX:S=a1],b1[&&NHX:S=b1]),c1[&&NHX:S=c1]),e1[&&NHX:S=e1])" );
+            final GSDI sdi2_28 = new GSDI( g2_28, s2, false );
+            if ( sdi2_28.getDuplicationsSum() != 0 ) {
+                return false;
+            }
+            if ( sdi2_28.getSpeciationOrDuplicationEventsSum() != 1 ) {
+                return false;
+            }
+            if ( sdi2_28.getSpeciationsSum() != 2 ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_28, "a1", "b1" ).isSpeciation() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_28, "a1", "c1" ).isSpeciationOrDuplication() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_28, "a1", "e1" ).isSpeciation() ) {
+                return false;
+            }
+            final Phylogeny g2_29 = TestGSDI
+                    .createPhylogeny( "(((a1[&&NHX:S=a1],b1[&&NHX:S=b1]),c1[&&NHX:S=c1]),d1[&&NHX:S=d1])" );
+            final GSDI sdi2_29 = new GSDI( g2_29, s2, false );
+            if ( sdi2_29.getDuplicationsSum() != 0 ) {
+                return false;
+            }
+            if ( sdi2_29.getSpeciationOrDuplicationEventsSum() != 2 ) {
+                return false;
+            }
+            if ( sdi2_29.getSpeciationsSum() != 1 ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_29, "a1", "b1" ).isSpeciation() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_29, "a1", "c1" ).isSpeciationOrDuplication() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_29, "a1", "d1" ).isSpeciationOrDuplication() ) {
+                return false;
+            }
+            final Phylogeny g2_30 = TestGSDI
+                    .createPhylogeny( "(((a1[&&NHX:S=a1],b1[&&NHX:S=b1]),c1[&&NHX:S=c1]),a2[&&NHX:S=a2])" );
+            final GSDI sdi2_30 = new GSDI( g2_30, s2, false );
+            if ( sdi2_30.getDuplicationsSum() != 1 ) {
+                return false;
+            }
+            if ( sdi2_30.getSpeciationOrDuplicationEventsSum() != 1 ) {
+                return false;
+            }
+            if ( sdi2_30.getSpeciationsSum() != 1 ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_30, "a1", "b1" ).isSpeciation() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_30, "a1", "c1" ).isSpeciationOrDuplication() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_30, "a1", "a2" ).isDuplication() ) {
+                return false;
+            }
+            final Phylogeny g2_31 = TestGSDI
+                    .createPhylogeny( "(((a1[&&NHX:S=a1],b1[&&NHX:S=b1]),c1[&&NHX:S=c1]),c2[&&NHX:S=c2])" );
+            final GSDI sdi2_31 = new GSDI( g2_31, s2, false );
+            if ( sdi2_31.getDuplicationsSum() != 1 ) {
+                return false;
+            }
+            if ( sdi2_31.getSpeciationOrDuplicationEventsSum() != 1 ) {
+                return false;
+            }
+            if ( sdi2_31.getSpeciationsSum() != 1 ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_31, "a1", "b1" ).isSpeciation() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_31, "a1", "c1" ).isSpeciationOrDuplication() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_31, "a1", "c2" ).isDuplication() ) {
+                return false;
+            }
+            final Phylogeny g2_32 = TestGSDI
+                    .createPhylogeny( "((((((((((a1[&&NHX:S=a1],a2[&&NHX:S=a2]),b1[&&NHX:S=b1]),c1[&&NHX:S=c1]),d1[&&NHX:S=d1]),x[&&NHX:S=x]),p1[&&NHX:S=p1]),i1[&&NHX:S=i1]),e1[&&NHX:S=e1]),y[&&NHX:S=y]),z[&&NHX:S=z])" );
+            final GSDI sdi2_32 = new GSDI( g2_32, s2, false );
+            if ( sdi2_32.getDuplicationsSum() != 0 ) {
+                return false;
+            }
+            if ( sdi2_32.getSpeciationOrDuplicationEventsSum() != 7 ) {
+                return false;
+            }
+            if ( sdi2_32.getSpeciationsSum() != 3 ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_32, "a1", "a2" ).isSpeciation() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_32, "a1", "b1" ).isSpeciation() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_32, "a1", "c1" ).isSpeciationOrDuplication() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_32, "a1", "d1" ).isSpeciationOrDuplication() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_32, "a1", "x" ).isSpeciation() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_32, "a1", "p1" ).isSpeciationOrDuplication() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_32, "a1", "i1" ).isSpeciationOrDuplication() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_32, "a1", "e1" ).isSpeciationOrDuplication() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_32, "a1", "y" ).isSpeciationOrDuplication() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_32, "a1", "z" ).isSpeciationOrDuplication() ) {
+                return false;
+            }
+            final Phylogeny g2_33 = TestGSDI
+                    .createPhylogeny( "(((((((((((a1[&&NHX:S=a1],a2[&&NHX:S=a2]),b1[&&NHX:S=b1]),c1[&&NHX:S=c1]),d1[&&NHX:S=d1]),x[&&NHX:S=x]),p1[&&NHX:S=p1]),i1[&&NHX:S=i1]),k2[&&NHX:S=k2]),e1[&&NHX:S=e1]),y[&&NHX:S=y]),z[&&NHX:S=z])" );
+            final GSDI sdi2_33 = new GSDI( g2_33, s2, false );
+            if ( sdi2_33.getDuplicationsSum() != 1 ) {
+                return false;
+            }
+            if ( sdi2_33.getSpeciationOrDuplicationEventsSum() != 7 ) {
+                return false;
+            }
+            if ( sdi2_33.getSpeciationsSum() != 3 ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_33, "a1", "a2" ).isSpeciation() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_33, "a1", "b1" ).isSpeciation() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_33, "a1", "c1" ).isSpeciationOrDuplication() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_33, "a1", "d1" ).isSpeciationOrDuplication() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_33, "a1", "x" ).isSpeciation() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_33, "a1", "p1" ).isSpeciationOrDuplication() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_33, "a1", "i1" ).isSpeciationOrDuplication() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_33, "a1", "k2" ).isDuplication() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_33, "a1", "e1" ).isSpeciationOrDuplication() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_33, "a1", "y" ).isSpeciationOrDuplication() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_33, "a1", "z" ).isSpeciationOrDuplication() ) {
+                return false;
+            }
+            final Phylogeny g2_34 = TestGSDI
+                    .createPhylogeny( "(((n1_0[&&NHX:S=n1],n2_0[&&NHX:S=n2]),(n1_1[&&NHX:S=n1],n3_0[&&NHX:S=n3])),n4_0[&&NHX:S=n4])" );
+            final GSDI sdi2_34 = new GSDI( g2_34, s2, false );
+            if ( sdi2_34.getDuplicationsSum() != 1 ) {
+                return false;
+            }
+            if ( sdi2_34.getSpeciationOrDuplicationEventsSum() != 1 ) {
+                return false;
+            }
+            if ( sdi2_34.getSpeciationsSum() != 2 ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_34, "n1_0", "n2_0" ).isSpeciation() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_34, "n1_1", "n3_0" ).isSpeciation() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_34, "n1_0", "n1_1" ).isDuplication() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_34, "n1_0", "n4_0" ).isSpeciationOrDuplication() ) {
+                return false;
+            }
+            final Phylogeny g2_35 = TestGSDI
+                    .createPhylogeny( "((((n1_0[&&NHX:S=n1],n2_0[&&NHX:S=n2]),(n1_1[&&NHX:S=n1],n3_0[&&NHX:S=n3])),n4_0[&&NHX:S=n4]),a1_0[&&NHX:S=a1])" );
+            final GSDI sdi2_35 = new GSDI( g2_35, s2, false );
+            if ( sdi2_35.getDuplicationsSum() != 1 ) {
+                return false;
+            }
+            if ( sdi2_35.getSpeciationOrDuplicationEventsSum() != 1 ) {
+                return false;
+            }
+            if ( sdi2_35.getSpeciationsSum() != 3 ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_35, "n1_0", "n2_0" ).isSpeciation() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_35, "n1_1", "n3_0" ).isSpeciation() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_35, "n1_0", "n1_1" ).isDuplication() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_35, "n1_0", "n4_0" ).isSpeciationOrDuplication() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_35, "n1_0", "a1_0" ).isSpeciation() ) {
+                return false;
+            }
+            final Phylogeny g2_36 = TestGSDI
+                    .createPhylogeny( "(((a1_0[&&NHX:S=a1],b1_0[&&NHX:S=b1]),(a1_1[&&NHX:S=a1],c1_0[&&NHX:S=c1])),d1_0[&&NHX:S=d1])" );
+            final GSDI sdi2_36 = new GSDI( g2_36, s2, false );
+            if ( sdi2_36.getDuplicationsSum() != 1 ) {
+                return false;
+            }
+            if ( sdi2_36.getSpeciationOrDuplicationEventsSum() != 1 ) {
+                return false;
+            }
+            if ( sdi2_36.getSpeciationsSum() != 2 ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_36, "a1_0", "b1_0" ).isSpeciation() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_36, "a1_1", "c1_0" ).isSpeciation() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_36, "a1_0", "c1_0" ).isDuplication() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_36, "a1_0", "d1_0" ).isSpeciationOrDuplication() ) {
+                return false;
+            }
+            final Phylogeny g2_37 = TestGSDI
+                    .createPhylogeny( "(((a1_0[&&NHX:S=a1],b1_0[&&NHX:S=b1]),(a2_0[&&NHX:S=a2],c1_0[&&NHX:S=c1])),d1_0[&&NHX:S=d1])" );
+            final GSDI sdi2_37 = new GSDI( g2_37, s2, false );
+            if ( sdi2_37.getDuplicationsSum() != 1 ) {
+                return false;
+            }
+            if ( sdi2_37.getSpeciationOrDuplicationEventsSum() != 1 ) {
+                return false;
+            }
+            if ( sdi2_37.getSpeciationsSum() != 2 ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_37, "a1_0", "b1_0" ).isSpeciation() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_37, "a2_0", "c1_0" ).isSpeciation() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_37, "a1_0", "c1_0" ).isDuplication() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_37, "a1_0", "d1_0" ).isSpeciationOrDuplication() ) {
+                return false;
+            }
+            final Phylogeny g2_38 = TestGSDI
+                    .createPhylogeny( "(((([&&NHX:S=n1],[&&NHX:S=n1]),([&&NHX:S=n1],[&&NHX:S=n1])),[&&NHX:S=n1]),[&&NHX:S=n1])" );
+            final GSDI sdi2_38 = new GSDI( g2_38, s2, false );
+            if ( sdi2_38.getDuplicationsSum() != 5 ) {
+                return false;
+            }
+            if ( sdi2_38.getSpeciationOrDuplicationEventsSum() != 0 ) {
+                return false;
+            }
+            if ( sdi2_38.getSpeciationsSum() != 0 ) {
+                return false;
+            }
+            final Phylogeny g2_100 = TestGSDI
+                    .createPhylogeny( "(((e1[&&NHX:S=e1],f2[&&NHX:S=f2]),(d3[&&NHX:S=d3],g4[&&NHX:S=g4])),(((a1[&&NHX:S=a1],h2[&&NHX:S=h2]),c3[&&NHX:S=c3]),(i4[&&NHX:S=i4],b1[&&NHX:S=b1])))" );
+            final GSDI sdi2_100 = new GSDI( g2_100, s2, false );
+            if ( sdi2_100.getDuplicationsSum() != 4 ) {
+                return false;
+            }
+            if ( sdi2_100.getSpeciationOrDuplicationEventsSum() != 0 ) {
+                return false;
+            }
+            if ( sdi2_100.getSpeciationsSum() != 4 ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_100, "e1", "f2" ).isSpeciation() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_100, "d3", "g4" ).isSpeciation() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_100, "e1", "d3" ).isDuplication() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_100, "a1", "h2" ).isSpeciation() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_100, "a1", "c3" ).isDuplication() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_100, "i4", "b1" ).isSpeciation() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_100, "a1", "i4" ).isDuplication() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_100, "e1", "a1" ).isDuplication() ) {
+                return false;
+            }
+            final Phylogeny g2_101 = TestGSDI
+                    .createPhylogeny( "(((e1[&&NHX:S=e1],f2[&&NHX:S=f2]),(d3[&&NHX:S=d3],g4[&&NHX:S=g4])),(((a1[&&NHX:S=a1],b2[&&NHX:S=b2]),c3[&&NHX:S=c3]),(i4[&&NHX:S=i4],j1[&&NHX:S=j1])))" );
+            final GSDI sdi2_101 = new GSDI( g2_101, s2, false );
+            if ( sdi2_101.getDuplicationsSum() != 2 ) {
+                return false;
+            }
+            if ( sdi2_101.getSpeciationOrDuplicationEventsSum() != 1 ) {
+                return false;
+            }
+            if ( sdi2_101.getSpeciationsSum() != 5 ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_101, "e1", "f2" ).isSpeciation() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_101, "d3", "g4" ).isSpeciation() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_101, "e1", "d3" ).isDuplication() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_101, "a1", "b2" ).isSpeciation() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_101, "a1", "c3" ).isSpeciationOrDuplication() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_101, "i4", "j1" ).isSpeciation() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_101, "a1", "i4" ).isSpeciation() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g2_101, "e1", "a1" ).isDuplication() ) {
+                return false;
+            }
+            final Phylogeny s_7_4 = DevelopmentTools.createBalancedPhylogeny( 7, 4 );
+            DevelopmentTools.numberSpeciesInOrder( s_7_4 );
+            final Phylogeny g_7_4_1 = TestGSDI
+                    .createPhylogeny( "(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((("
+                            + "1[&&NHX:S=1],2[&&NHX:S=2]),3[&&NHX:S=3]),4[&&NHX:S=4]),5[&&NHX:S=5]),"
+                            + "6[&&NHX:S=6]),7[&&NHX:S=7]),8[&&NHX:S=8]),9[&&NHX:S=9]),10[&&NHX:S=10]),11[&&NHX:S=11]),"
+                            + "12[&&NHX:S=12]),13[&&NHX:S=13]),14[&&NHX:S=14]),15[&&NHX:S=15]),16[&&NHX:S=16]),17[&&NHX:S=17]),"
+                            + "18[&&NHX:S=18]),19[&&NHX:S=19]),20[&&NHX:S=20]),21[&&NHX:S=21]),22[&&NHX:S=22]),23[&&NHX:S=23]),"
+                            + "24[&&NHX:S=24]),25[&&NHX:S=25]),26[&&NHX:S=26]),27[&&NHX:S=27]),28[&&NHX:S=28]),29[&&NHX:S=29]),"
+                            + "30[&&NHX:S=30]),31[&&NHX:S=31]),32[&&NHX:S=32]),33[&&NHX:S=33]),34[&&NHX:S=34]),35[&&NHX:S=35]),"
+                            + "36[&&NHX:S=36]),37[&&NHX:S=37]),38[&&NHX:S=38]),39[&&NHX:S=39]),40[&&NHX:S=40]),41[&&NHX:S=41]),"
+                            + "42[&&NHX:S=42]),43[&&NHX:S=43]),44[&&NHX:S=44]),45[&&NHX:S=45]),46[&&NHX:S=46]),47[&&NHX:S=47]),"
+                            + "48[&&NHX:S=48]),49[&&NHX:S=49]),50[&&NHX:S=50]),51[&&NHX:S=51]),52[&&NHX:S=52]),53[&&NHX:S=53]),"
+                            + "54[&&NHX:S=54]),55[&&NHX:S=55]),56[&&NHX:S=56]),57[&&NHX:S=57]),58[&&NHX:S=58]),59[&&NHX:S=59]),"
+                            + "60[&&NHX:S=60]),61[&&NHX:S=61]),62[&&NHX:S=62]),63[&&NHX:S=63]),64[&&NHX:S=64]),65[&&NHX:S=65])" );
+            final GSDI sdi7_4_1 = new GSDI( g_7_4_1, s_7_4, false );
+            if ( sdi7_4_1.getDuplicationsSum() != 54 ) {
+                return false;
+            }
+            if ( sdi7_4_1.getSpeciationOrDuplicationEventsSum() != 6 ) {
+                return false;
+            }
+            if ( sdi7_4_1.getSpeciationsSum() != 4 ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g_7_4_1, "1", "2" ).isSpeciation() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g_7_4_1, "1", "3" ).isSpeciationOrDuplication() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g_7_4_1, "1", "4" ).isSpeciationOrDuplication() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g_7_4_1, "1", "5" ).isSpeciation() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g_7_4_1, "1", "6" ).isDuplication() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g_7_4_1, "1", "9" ).isSpeciationOrDuplication() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g_7_4_1, "1", "13" ).isSpeciationOrDuplication() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g_7_4_1, "1", "17" ).isSpeciation() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g_7_4_1, "1", "33" ).isSpeciationOrDuplication() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g_7_4_1, "1", "49" ).isSpeciationOrDuplication() ) {
+                return false;
+            }
+            if ( !TestGSDI.getEvent( g_7_4_1, "1", "65" ).isSpeciation() ) {
+                return false;
+            }
+            final Phylogeny g_7_4_2 = TestGSDI
+                    .createPhylogeny( "((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((("
+                            + "1[&&NHX:S=1],2[&&NHX:S=2]),3[&&NHX:S=3]),4[&&NHX:S=4]),5[&&NHX:S=5]),"
+                            + "6[&&NHX:S=6]),7[&&NHX:S=7]),8[&&NHX:S=8]),9[&&NHX:S=9]),10[&&NHX:S=10]),11[&&NHX:S=11]),"
+                            + "12[&&NHX:S=12]),13[&&NHX:S=13]),14[&&NHX:S=14]),15[&&NHX:S=15]),16[&&NHX:S=16]),17[&&NHX:S=17]),"
+                            + "18[&&NHX:S=18]),19[&&NHX:S=19]),20[&&NHX:S=20]),21[&&NHX:S=21]),22[&&NHX:S=22]),23[&&NHX:S=23]),"
+                            + "24[&&NHX:S=24]),25[&&NHX:S=25]),26[&&NHX:S=26]),27[&&NHX:S=27]),28[&&NHX:S=28]),29[&&NHX:S=29]),"
+                            + "30[&&NHX:S=30]),31[&&NHX:S=31]),32[&&NHX:S=32]),33[&&NHX:S=33]),34[&&NHX:S=34]),35[&&NHX:S=35]),"
+                            + "36[&&NHX:S=36]),37[&&NHX:S=37]),38[&&NHX:S=38]),39[&&NHX:S=39]),40[&&NHX:S=40]),41[&&NHX:S=41]),"
+                            + "42[&&NHX:S=42]),43[&&NHX:S=43]),44[&&NHX:S=44]),45[&&NHX:S=45]),46[&&NHX:S=46]),47[&&NHX:S=47]),"
+                            + "48[&&NHX:S=48]),49[&&NHX:S=49]),50[&&NHX:S=50]),51[&&NHX:S=51]),52[&&NHX:S=52]),53[&&NHX:S=53]),"
+                            + "54[&&NHX:S=54]),55[&&NHX:S=55]),56[&&NHX:S=56]),57[&&NHX:S=57]),58[&&NHX:S=58]),59[&&NHX:S=59]),"
+                            + "60[&&NHX:S=60]),61[&&NHX:S=61]),62[&&NHX:S=62]),63[&&NHX:S=63]),64[&&NHX:S=64]),65[&&NHX:S=65]),"
+                            + "66[&&NHX:S=66]),257[&&NHX:S=257]),258[&&NHX:S=258]),513[&&NHX:S=513]),514[&&NHX:S=514]),769[&&NHX:S=769]),770[&&NHX:S=770])" );
+            final GSDI sdi7_4_2 = new GSDI( g_7_4_2, s_7_4, false );
+            if ( sdi7_4_2.getDuplicationsSum() != 58 ) {
+                return false;
+            }
+            if ( sdi7_4_2.getSpeciationOrDuplicationEventsSum() != 8 ) {
+                return false;
+            }
+            if ( sdi7_4_2.getSpeciationsSum() != 5 ) {
+                return false;
+            }
+            // final String g2_0_ =
+            // "(([&&NHX:S=a1],[&&NHX:S=a2]),([&&NHX:S=o2],[&&NHX:S=o4]))";
+            // final Phylogeny g2_0 = factory.create( g2_0_, new NHXParser() )[
+            // 0 ];
+            // g2_0.setRooted( true );
+            // final GSDI sdi2_0 = new GSDI( g2_0, s2, false );
+            // if ( sdi2_0.getDuplicationsSum() != 0 ) {
+            // return false;
+            // }
+            // final String g2_1_= "";
+            // final Phylogeny g2_1 = factory.create( g2_1_, new NHXParser() )[
+            // 0 ];
+            // g2_1.setRooted( true );
+            // final GSDI sdi2_1 = new GSDI( g2_1, s2, false );
+            // if ( sdi2_1.getDuplicationsSum() != 0 ) {
+            // return false;
+            // }
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace( System.out );
+            return false;
+        }
+        return true;
+    }
+}
diff --git a/forester/java/src/org/forester/sdi/Tuplet.java b/forester/java/src/org/forester/sdi/Tuplet.java

new file mode 100644 (file)

index 0000000..37426e4
--- /dev/null
+++ b/forester/java/src/org/forester/sdi/Tuplet.java
@@ -0,0 +1,168 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// Copyright (C) 2000-2001 Washington University School of Medicine
+// and Howard Hughes Medical Institute
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.sdi;
+
+class Tuplet implements Comparable<Tuplet> {
+
+    public static final int DEFAULT = -999;
+    private final String    _key;
+    private final double    _value1;
+    private final double    _value2;
+    private final double    _value3;
+    private final double    _value4;
+    private int[]           _p;            // Since
+
+    Tuplet() {
+        setSigns();
+        _key = "";
+        _value1 = Tuplet.DEFAULT;
+        _value2 = Tuplet.DEFAULT;
+        _value3 = Tuplet.DEFAULT;
+        _value4 = Tuplet.DEFAULT;
+    }
+
+    // distance
+    // needs to be
+    // sorted in
+    // different
+    // direction than other values, and it is not
+    // known which value will be the distance.
+    Tuplet( final String name,
+            final double value1,
+            final double value2,
+            final double value3,
+            final double value4,
+            final int c ) {
+        setSigns();
+        _key = name;
+        _value1 = value1;
+        _value2 = value2;
+        _value3 = value3;
+        _value4 = value4;
+        if ( ( c >= 0 ) && ( c <= 3 ) ) {
+            _p[ c ] = -1;
+        }
+    }
+
+    Tuplet( final String name, final double value1, final double value2, final double value3, final int c ) {
+        setSigns();
+        _key = name;
+        _value1 = value1;
+        _value2 = value2;
+        _value3 = value3;
+        _value4 = Tuplet.DEFAULT;
+        if ( ( c >= 0 ) && ( c <= 2 ) ) {
+            _p[ c ] = -1;
+        }
+    }
+
+    Tuplet( final String name, final double value1, final double value2, final int c ) {
+        setSigns();
+        _key = name;
+        _value1 = value1;
+        _value2 = value2;
+        _value3 = Tuplet.DEFAULT;
+        _value4 = Tuplet.DEFAULT;
+        if ( ( c >= 0 ) && ( c <= 1 ) ) {
+            _p[ c ] = -1;
+        }
+    }
+
+    Tuplet( final String name, final double value1, final int c ) {
+        setSigns();
+        _key = name;
+        _value1 = value1;
+        _value2 = Tuplet.DEFAULT;
+        _value3 = Tuplet.DEFAULT;
+        _value4 = Tuplet.DEFAULT;
+        if ( c == 0 ) {
+            _p[ 0 ] = -1;
+        }
+    }
+
+    public int compareTo( final Tuplet n ) {
+        if ( ( getValue1() != Tuplet.DEFAULT ) && ( n.getValue1() != Tuplet.DEFAULT ) ) {
+            if ( getValue1() < n.getValue1() ) {
+                return _p[ 0 ];
+            }
+            if ( getValue1() > n.getValue1() ) {
+                return ( -_p[ 0 ] );
+            }
+        }
+        if ( ( getValue2() != Tuplet.DEFAULT ) && ( n.getValue2() != Tuplet.DEFAULT ) ) {
+            if ( getValue2() < n.getValue2() ) {
+                return _p[ 1 ];
+            }
+            if ( getValue2() > n.getValue2() ) {
+                return ( -_p[ 1 ] );
+            }
+        }
+        if ( ( getValue3() != Tuplet.DEFAULT ) && ( n.getValue3() != Tuplet.DEFAULT ) ) {
+            if ( getValue3() < n.getValue3() ) {
+                return _p[ 2 ];
+            }
+            if ( getValue3() > n.getValue3() ) {
+                return ( -_p[ 2 ] );
+            }
+        }
+        if ( ( getValue4() != Tuplet.DEFAULT ) && ( n.getValue4() != Tuplet.DEFAULT ) ) {
+            if ( getValue4() < n.getValue4() ) {
+                return _p[ 3 ];
+            }
+            if ( getValue4() > n.getValue4() ) {
+                return ( -_p[ 3 ] );
+            }
+        }
+        return ( getKey().compareTo( n.getKey() ) );
+    }
+
+    String getKey() {
+        return _key;
+    }
+
+    double getValue1() {
+        return _value1;
+    }
+
+    double getValue2() {
+        return _value2;
+    }
+
+    double getValue3() {
+        return _value3;
+    }
+
+    double getValue4() {
+        return _value4;
+    }
+
+    private void setSigns() {
+        _p = new int[ 4 ];
+        _p[ 0 ] = _p[ 1 ] = _p[ 2 ] = _p[ 3 ] = +1;
+    }
+}
\ No newline at end of file
diff --git a/forester/java/src/org/forester/sequence/BasicSequence.java b/forester/java/src/org/forester/sequence/BasicSequence.java

new file mode 100644 (file)

index 0000000..4cc03a7
--- /dev/null
+++ b/forester/java/src/org/forester/sequence/BasicSequence.java
@@ -0,0 +1,91 @@
+// $Id:
+//
+// forester -- software libraries and applications
+// for genomics and evolutionary biology research.
+//
+// Copyright (C) 2010 Christian M Zmasek
+// Copyright (C) 2010 Sanford-Burnham Medical Research Institute
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.sequence;
+
+public class BasicSequence implements Sequence {
+
+    private final char[] _mol_sequence;
+    private final Object _identifier;
+    private final TYPE   _type;
+
+    private BasicSequence( final Object identifier, final String mol_sequence, final TYPE type ) {
+        _mol_sequence = mol_sequence.toCharArray();
+        _identifier = identifier;
+        _type = type;
+    }
+
+    // Only use if you know what you are doing!
+    public BasicSequence( final Object identifier, final char[] mol_sequence, final TYPE type ) {
+        _mol_sequence = mol_sequence;
+        _identifier = identifier;
+        _type = type;
+    }
+
+    public Object getIdentifier() {
+        return _identifier;
+    }
+
+    public int getLength() {
+        return _mol_sequence.length;
+    }
+
+    public char[] getMolecularSequence() {
+        return _mol_sequence;
+    }
+
+    public char getResidueAt( final int position ) {
+        return _mol_sequence[ position ];
+    }
+
+    public TYPE getType() {
+        return _type;
+    }
+
+    @Override
+    public String toString() {
+        final StringBuffer sb = new StringBuffer();
+        sb.append( _identifier.toString() );
+        sb.append( " " );
+        sb.append( new String( _mol_sequence ) );
+        return sb.toString();
+    }
+
+    public static Sequence createAaSequence( final Object identifier, final String mol_sequence ) {
+        return new BasicSequence( identifier, mol_sequence.toUpperCase().replaceAll( "\\.", GAP_STR )
+                .replaceAll( AA_REGEXP, Character.toString( UNSPECIFIED_AA ) ), TYPE.AA );
+    }
+
+    public static Sequence createDnaSequence( final Object identifier, final String mol_sequence ) {
+        return new BasicSequence( identifier, mol_sequence.toUpperCase().replaceAll( "\\.", GAP_STR )
+                .replaceAll( DNA_REGEXP, Character.toString( UNSPECIFIED_NUC ) ), TYPE.DNA );
+    }
+
+    public static Sequence createRnaSequence( final Object identifier, final String mol_sequence ) {
+        return new BasicSequence( identifier, mol_sequence.toUpperCase().replaceAll( "\\.", GAP_STR )
+                .replaceAll( RNA_REGEXP, Character.toString( UNSPECIFIED_NUC ) ), TYPE.RNA );
+    }
+}
diff --git a/forester/java/src/org/forester/sequence/Sequence.java b/forester/java/src/org/forester/sequence/Sequence.java

new file mode 100644 (file)

index 0000000..3ee5893
--- /dev/null
+++ b/forester/java/src/org/forester/sequence/Sequence.java
@@ -0,0 +1,53 @@
+// $Id:
+//
+// forester -- software libraries and applications
+// for genomics and evolutionary biology research.
+//
+// Copyright (C) 2010 Christian M Zmasek
+// Copyright (C) 2010 Sanford-Burnham Medical Research Institute
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.sequence;
+
+public interface Sequence {
+
+    public static final char   UNSPECIFIED_AA  = 'X';
+    public static final char   UNSPECIFIED_NUC = 'N';
+    public static final char   GAP             = '-';
+    public static final String GAP_STR         = Character.toString( GAP );
+    public static final char   TERMINATE       = '*';
+    static final String        AA_REGEXP       = "[^ARNDBCQEZGHILKMFPSTWYVXU\\-\\*]";
+    static final String        DNA_REGEXP      = "[^ACGTRYMKWSN\\-\\*]";
+    static final String        RNA_REGEXP      = "[^ACGURYMKWSN\\-\\*]";
+
+    public abstract Object getIdentifier();
+
+    public abstract int getLength();
+
+    public abstract char[] getMolecularSequence();
+
+    public abstract char getResidueAt( final int position );
+
+    public abstract TYPE getType();
+
+    public enum TYPE {
+        RNA, DNA, AA;
+    }
+}
\ No newline at end of file
diff --git a/forester/java/src/org/forester/surfacing/AdjactantDirectedBinaryDomainCombination.java b/forester/java/src/org/forester/surfacing/AdjactantDirectedBinaryDomainCombination.java

new file mode 100644 (file)

index 0000000..43bb0e3
--- /dev/null
+++ b/forester/java/src/org/forester/surfacing/AdjactantDirectedBinaryDomainCombination.java
@@ -0,0 +1,54 @@
+// $Id:
+// Exp $
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.surfacing;
+
+public class AdjactantDirectedBinaryDomainCombination extends BasicBinaryDomainCombination {
+
+    public AdjactantDirectedBinaryDomainCombination( final DomainId n_terminal, final DomainId c_terminal ) {
+        super();
+        if ( ( n_terminal == null ) || ( c_terminal == null ) ) {
+            throw new IllegalArgumentException( "attempt to create binary domain combination using null" );
+        }
+        _id_0 = n_terminal;
+        _id_1 = c_terminal;
+    }
+
+    public AdjactantDirectedBinaryDomainCombination( final String n_terminal, final String c_terminal ) {
+        this( new DomainId( n_terminal ), new DomainId( c_terminal ) );
+    }
+
+    public static AdjactantDirectedBinaryDomainCombination createInstance( final String ids ) {
+        if ( ids.indexOf( BinaryDomainCombination.SEPARATOR ) < 1 ) {
+            throw new IllegalArgumentException( "Unexpected format for binary domain combination [" + ids + "]" );
+        }
+        final String[] ids_ary = ids.split( BinaryDomainCombination.SEPARATOR );
+        if ( ids_ary.length != 2 ) {
+            throw new IllegalArgumentException( "Unexpected format for binary domain combination [" + ids + "]" );
+        }
+        return new AdjactantDirectedBinaryDomainCombination( ids_ary[ 0 ], ids_ary[ 1 ] );
+    }
+}
diff --git a/forester/java/src/org/forester/surfacing/AdjactantDirectedCombinableDomains.java b/forester/java/src/org/forester/surfacing/AdjactantDirectedCombinableDomains.java

new file mode 100644 (file)

index 0000000..adfd02c
--- /dev/null
+++ b/forester/java/src/org/forester/surfacing/AdjactantDirectedCombinableDomains.java
@@ -0,0 +1,49 @@
+// $Id:
+// cmzmasek Exp $
+//
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org
+
+package org.forester.surfacing;
+
+import java.util.ArrayList;
+import java.util.List;
+
+public class AdjactantDirectedCombinableDomains extends BasicCombinableDomains {
+
+    public AdjactantDirectedCombinableDomains( final DomainId n_terminal_key_domain, final Species species ) {
+        super( n_terminal_key_domain, species );
+    }
+
+    @Override
+    public List<BinaryDomainCombination> toBinaryDomainCombinations() {
+        final List<BinaryDomainCombination> binary_combinations = new ArrayList<BinaryDomainCombination>( getNumberOfCombinableDomains() );
+        for( final DomainId domain : getCombiningDomains().keySet() ) {
+            // Precondition (!): key domain is most upstream domain.
+            //TODO ensure this is true.
+            binary_combinations.add( new AdjactantDirectedBinaryDomainCombination( getKeyDomain(), domain ) );
+        }
+        return binary_combinations;
+    }
+}
diff --git a/forester/java/src/org/forester/surfacing/BasicBinaryDomainCombination.java b/forester/java/src/org/forester/surfacing/BasicBinaryDomainCombination.java

new file mode 100644 (file)

index 0000000..9436ce9
--- /dev/null
+++ b/forester/java/src/org/forester/surfacing/BasicBinaryDomainCombination.java
@@ -0,0 +1,170 @@
+// $Id:
+// Exp $
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.surfacing;
+
+import org.forester.util.ForesterUtil;
+
+public class BasicBinaryDomainCombination implements BinaryDomainCombination {
+
+    DomainId _id_0;
+    DomainId _id_1;
+
+    BasicBinaryDomainCombination() {
+        _id_0 = null;
+        _id_1 = null;
+    }
+
+    public BasicBinaryDomainCombination( final DomainId id_0, final DomainId id_1 ) {
+        if ( ( id_0 == null ) || ( id_1 == null ) ) {
+            throw new IllegalArgumentException( "attempt to create binary domain combination using null" );
+        }
+        if ( id_0.compareTo( id_1 ) < 0 ) {
+            _id_0 = id_0;
+            _id_1 = id_1;
+        }
+        else {
+            _id_0 = id_1;
+            _id_1 = id_0;
+        }
+    }
+
+    public BasicBinaryDomainCombination( final String id_0, final String id_1 ) {
+        this( new DomainId( id_0 ), new DomainId( id_1 ) );
+    }
+
+    @Override
+    public int compareTo( final BinaryDomainCombination binary_domain_combination ) {
+        if ( binary_domain_combination.getClass() != this.getClass() ) {
+            throw new IllegalArgumentException( "attempt to compare [" + binary_domain_combination.getClass() + "] to "
+                    + "[" + this.getClass() + "]" );
+        }
+        if ( equals( binary_domain_combination ) ) {
+            return 0;
+        }
+        final int x = getId0().compareTo( binary_domain_combination.getId0() );
+        if ( x != 0 ) {
+            return x;
+        }
+        else {
+            return getId1().compareTo( binary_domain_combination.getId1() );
+        }
+    }
+
+    @Override
+    public boolean equals( final Object o ) {
+        if ( this == o ) {
+            return true;
+        }
+        else if ( o == null ) {
+            throw new IllegalArgumentException( "attempt to check [" + this.getClass() + "] equality to null" );
+        }
+        else if ( o.getClass() != this.getClass() ) {
+            throw new IllegalArgumentException( "attempt to check [" + this.getClass() + "] equality to ["
+                    + o.getClass() + "]" );
+        }
+        else {
+            return ( getId0().equals( ( ( BinaryDomainCombination ) o ).getId0() ) )
+                    && ( getId1().equals( ( ( BinaryDomainCombination ) o ).getId1() ) );
+        }
+    }
+
+    public DomainId getId0() {
+        return _id_0;
+    }
+
+    public DomainId getId1() {
+        return _id_1;
+    }
+
+    @Override
+    public int hashCode() {
+        return getId0().hashCode() + ( 19 * getId1().hashCode() );
+    }
+
+    public StringBuffer toGraphDescribingLanguage( final OutputFormat format,
+                                                   final String node_attribute,
+                                                   final String edge_attribute ) {
+        final StringBuffer sb = new StringBuffer();
+        switch ( format ) {
+            case DOT:
+                if ( ForesterUtil.isEmpty( node_attribute ) ) {
+                    sb.append( getId0() );
+                    sb.append( " -- " );
+                    sb.append( getId1() );
+                    if ( !ForesterUtil.isEmpty( edge_attribute ) ) {
+                        sb.append( " " );
+                        sb.append( edge_attribute );
+                    }
+                    sb.append( ";" );
+                }
+                else {
+                    sb.append( getId0() );
+                    sb.append( " " );
+                    sb.append( node_attribute );
+                    sb.append( ";" );
+                    sb.append( ForesterUtil.LINE_SEPARATOR );
+                    sb.append( getId1() );
+                    sb.append( " " );
+                    sb.append( node_attribute );
+                    sb.append( ";" );
+                    sb.append( ForesterUtil.LINE_SEPARATOR );
+                    sb.append( getId0() );
+                    sb.append( " -- " );
+                    sb.append( getId1() );
+                    if ( !ForesterUtil.isEmpty( edge_attribute ) ) {
+                        sb.append( " " );
+                        sb.append( edge_attribute );
+                    }
+                    sb.append( ";" );
+                }
+                break;
+            default:
+                throw new AssertionError( "unknown format:" + format );
+        }
+        return sb;
+    }
+
+    @Override
+    public String toString() {
+        final StringBuffer sb = new StringBuffer();
+        sb.append( getId0() );
+        sb.append( BinaryDomainCombination.SEPARATOR );
+        sb.append( getId1() );
+        return sb.toString();
+    }
+
+    public static BinaryDomainCombination createInstance( final String ids ) {
+        if ( ids.indexOf( BinaryDomainCombination.SEPARATOR ) < 1 ) {
+            throw new IllegalArgumentException( "Unexpected format for binary domain combination [" + ids + "]" );
+        }
+        final String[] ids_ary = ids.split( BinaryDomainCombination.SEPARATOR );
+        if ( ids_ary.length != 2 ) {
+            throw new IllegalArgumentException( "Unexpected format for binary domain combination [" + ids + "]" );
+        }
+        return new BasicBinaryDomainCombination( ids_ary[ 0 ], ids_ary[ 1 ] );
+    }
+}
diff --git a/forester/java/src/org/forester/surfacing/BasicCombinableDomains.java b/forester/java/src/org/forester/surfacing/BasicCombinableDomains.java

new file mode 100644 (file)

index 0000000..9f108cc
--- /dev/null
+++ b/forester/java/src/org/forester/surfacing/BasicCombinableDomains.java
@@ -0,0 +1,185 @@
+// $Id:
+//
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.surfacing;
+
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+import java.util.SortedMap;
+import java.util.TreeMap;
+
+import org.forester.util.DescriptiveStatistics;
+
+public class BasicCombinableDomains implements CombinableDomains {
+
+    final private DomainId                   _key_domain;
+    private int                              _key_domain_count;
+    private int                              _key_domain_proteins_count;
+    final private Species                    _species;
+    final private TreeMap<DomainId, Integer> _combining_domains;
+    private DescriptiveStatistics            _key_domain_confidence_statistics;
+
+    public BasicCombinableDomains( final DomainId key_domain, final Species species ) {
+        _key_domain = key_domain;
+        _species = species;
+        _combining_domains = new TreeMap<DomainId, Integer>();
+        init();
+    }
+
+    public void addCombinableDomain( final DomainId protein_domain ) {
+        if ( getCombiningDomains().containsKey( protein_domain ) ) {
+            getCombiningDomains().put( protein_domain, getCombiningDomains().get( protein_domain ) + 1 );
+        }
+        else {
+            getCombiningDomains().put( protein_domain, 1 );
+        }
+    }
+
+    public List<DomainId> getAllDomains() {
+        final List<DomainId> domains = getCombinableDomains();
+        if ( !domains.contains( getKeyDomain() ) ) {
+            domains.add( getKeyDomain() );
+        }
+        return domains;
+    }
+
+    public List<DomainId> getCombinableDomains() {
+        final List<DomainId> domains = new ArrayList<DomainId>( getNumberOfCombinableDomains() );
+        for( final DomainId domain : getCombiningDomains().keySet() ) {
+            domains.add( domain );
+        }
+        return domains;
+    }
+
+    public SortedMap<DomainId, Integer> getCombinableDomainsIds() {
+        final SortedMap<DomainId, Integer> ids = new TreeMap<DomainId, Integer>();
+        for( final DomainId domain : getCombiningDomains().keySet() ) {
+            final DomainId pd = domain;
+            ids.put( pd, getCombiningDomains().get( pd ) );
+        }
+        return ids;
+    }
+
+    public StringBuilder getCombiningDomainIdsAsStringBuilder() {
+        final StringBuilder sb = new StringBuilder();
+        for( final Iterator<DomainId> iter = getCombiningDomains().keySet().iterator(); iter.hasNext(); ) {
+            final DomainId key = iter.next();
+            sb.append( key.toString() );
+            sb.append( " [" );
+            final int count = getCombiningDomains().get( key );
+            sb.append( count );
+            sb.append( "]" );
+            if ( iter.hasNext() ) {
+                sb.append( ", " );
+            }
+        }
+        return sb;
+    }
+
+    protected TreeMap<DomainId, Integer> getCombiningDomains() {
+        return _combining_domains;
+    }
+
+    public DomainId getKeyDomain() {
+        return _key_domain;
+    }
+
+    public DescriptiveStatistics getKeyDomainConfidenceDescriptiveStatistics() {
+        return _key_domain_confidence_statistics;
+    }
+
+    public int getKeyDomainCount() {
+        return _key_domain_count;
+    }
+
+    public int getKeyDomainProteinsCount() {
+        return _key_domain_proteins_count;
+    }
+
+    public int getNumberOfCombinableDomains() {
+        return _combining_domains.size();
+    }
+
+    public int getNumberOfProteinsExhibitingCombination( final DomainId protein_domain ) {
+        if ( getCombiningDomains().containsKey( protein_domain ) ) {
+            return getCombiningDomains().get( protein_domain );
+        }
+        else {
+            return 0;
+        }
+    }
+
+    public Species getSpecies() {
+        return _species;
+    }
+
+    private void init() {
+        _key_domain_count = 0;
+        _key_domain_proteins_count = 0;
+        _key_domain_confidence_statistics = null;
+    }
+
+    public boolean isCombinable( final DomainId protein_domain ) {
+        return getCombiningDomains().containsKey( protein_domain );
+    }
+
+    public void setKeyDomainConfidenceDescriptiveStatistics( final DescriptiveStatistics key_domain_confidence_statistics ) {
+        _key_domain_confidence_statistics = key_domain_confidence_statistics;
+    }
+
+    public void setKeyDomainCount( final int key_domain_count ) {
+        _key_domain_count = key_domain_count;
+    }
+
+    public void setKeyDomainProteinsCount( final int key_domain_proteins_count ) {
+        _key_domain_proteins_count = key_domain_proteins_count;
+    }
+
+    @Override
+    public List<BinaryDomainCombination> toBinaryDomainCombinations() {
+        final List<BinaryDomainCombination> binary_combinations = new ArrayList<BinaryDomainCombination>( getNumberOfCombinableDomains() );
+        for( final DomainId domain : getCombiningDomains().keySet() ) {
+            binary_combinations.add( new BasicBinaryDomainCombination( getKeyDomain(), domain ) );
+        }
+        return binary_combinations;
+    }
+
+    @Override
+    public String toString() {
+        final StringBuilder sb = new StringBuilder();
+        sb.append( getKeyDomain() );
+        sb.append( " [" );
+        sb.append( getKeyDomainCount() );
+        sb.append( ", " );
+        sb.append( getKeyDomainProteinsCount() );
+        sb.append( ", " );
+        sb.append( getNumberOfCombinableDomains() );
+        sb.append( "]: " );
+        sb.append( getCombiningDomainIdsAsStringBuilder() );
+        return sb.toString();
+    }
+}
diff --git a/forester/java/src/org/forester/surfacing/BasicDomain.java b/forester/java/src/org/forester/surfacing/BasicDomain.java

new file mode 100644 (file)

index 0000000..a5a2da5
--- /dev/null
+++ b/forester/java/src/org/forester/surfacing/BasicDomain.java
@@ -0,0 +1,224 @@
+// $Id:
+//
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.surfacing;
+
+import org.forester.go.GoId;
+import org.forester.util.ForesterUtil;
+
+public class BasicDomain implements Domain {
+
+    final private DomainId _id;
+    final private int      _from;
+    final private int      _to;
+    final private short    _number;
+    final private short    _total_count;
+    final private double   _per_sequence_evalue;
+    final private double   _per_sequence_score;
+    final private double   _per_domain_evalue;
+    final private double   _per_domain_score;
+
+    public BasicDomain( final String id_str ) {
+        if ( ForesterUtil.isEmpty( id_str ) ) {
+            throw new IllegalArgumentException( "attempt to create protein domain with null or empty id" );
+        }
+        _id = new DomainId( id_str );
+        _from = -1;
+        _to = -1;
+        _number = -1;
+        _total_count = -1;
+        _per_sequence_evalue = -1;
+        _per_sequence_score = -1;
+        _per_domain_evalue = -1;
+        _per_domain_score = -1;
+    }
+
+    public BasicDomain( final String id_str,
+                        final int from,
+                        final int to,
+                        final short number,
+                        final short total_count,
+                        final double per_sequence_evalue,
+                        final double per_sequence_score ) {
+        if ( ( from >= to ) || ( from < 0 ) ) {
+            throw new IllegalArgumentException( "attempt to create protein domain from " + from + " to " + to );
+        }
+        if ( ForesterUtil.isEmpty( id_str ) ) {
+            throw new IllegalArgumentException( "attempt to create protein domain with null or empty id" );
+        }
+        if ( ( number > total_count ) || ( number < 0 ) ) {
+            throw new IllegalArgumentException( "attempt to create protein domain number " + number + " out of "
+                    + total_count );
+        }
+        if ( per_sequence_evalue < 0.0 ) {
+            throw new IllegalArgumentException( "attempt to create protein domain with E-value" );
+        }
+        _id = new DomainId( id_str );
+        _from = from;
+        _to = to;
+        _number = number;
+        _total_count = total_count;
+        _per_sequence_evalue = per_sequence_evalue;
+        _per_sequence_score = per_sequence_score;
+        _per_domain_evalue = -1;
+        _per_domain_score = -1;
+    }
+
+    public BasicDomain( final String id_str,
+                        final int from,
+                        final int to,
+                        final short number,
+                        final short total_count,
+                        final double per_sequence_evalue,
+                        final double per_sequence_score,
+                        final double per_domain_evalue,
+                        final double per_domain_score ) {
+        if ( ( from >= to ) || ( from < 0 ) ) {
+            throw new IllegalArgumentException( "attempt to create protein domain from " + from + " to " + to );
+        }
+        if ( ForesterUtil.isEmpty( id_str ) ) {
+            throw new IllegalArgumentException( "attempt to create protein domain with null or empty id" );
+        }
+        if ( ( number > total_count ) || ( number < 0 ) ) {
+            throw new IllegalArgumentException( "attempt to create protein domain number " + number + " out of "
+                    + total_count );
+        }
+        if ( ( per_sequence_evalue < 0.0 ) || ( per_domain_evalue < 0.0 ) ) {
+            throw new IllegalArgumentException( "attempt to create protein domain with E-value" );
+        }
+        _id = new DomainId( id_str );
+        _from = from;
+        _to = to;
+        _number = number;
+        _total_count = total_count;
+        _per_sequence_evalue = per_sequence_evalue;
+        _per_sequence_score = per_sequence_score;
+        _per_domain_evalue = per_domain_evalue;
+        _per_domain_score = per_domain_score;
+    }
+
+    public void addGoId( final GoId go_id ) {
+        getDomainId().getGoIds().add( go_id );
+    }
+
+    /**
+     * Basic domains are compared/sorted based upon their identifiers (case
+     * insensitive) and their numbers.
+     * 
+     */
+    public int compareTo( final Domain domain ) {
+        if ( domain.getClass() != this.getClass() ) {
+            throw new IllegalArgumentException( "attempt to compare [" + domain.getClass() + "] to " + "["
+                    + this.getClass() + "]" );
+        }
+        if ( this == domain ) {
+            return 0;
+        }
+        return getDomainId().compareTo( domain.getDomainId() );
+    }
+
+    /**
+     * Basic domains are considered equal if they have the same identifier (case
+     * sensitive).
+     * 
+     */
+    @Override
+    public boolean equals( final Object o ) {
+        if ( this == o ) {
+            return true;
+        }
+        else if ( o == null ) {
+            throw new IllegalArgumentException( "attempt to check [" + this.getClass() + "] equality to null" );
+        }
+        else if ( o.getClass() != this.getClass() ) {
+            throw new IllegalArgumentException( "attempt to check [" + this.getClass() + "] equality to " + o + " ["
+                    + o.getClass() + "]" );
+        }
+        else {
+            return getDomainId().equals( ( ( Domain ) o ).getDomainId() );
+        }
+    }
+
+    public DomainId getDomainId() {
+        return _id;
+    }
+
+    public int getFrom() {
+        return _from;
+    }
+
+    public GoId getGoId( final int i ) {
+        return getDomainId().getGoIds().get( i );
+    }
+
+    public short getNumber() {
+        return _number;
+    }
+
+    public int getNumberOfGoIds() {
+        return getDomainId().getGoIds().size();
+    }
+
+    @Override
+    public double getPerDomainEvalue() {
+        return _per_domain_evalue;
+    }
+
+    @Override
+    public double getPerDomainScore() {
+        return _per_domain_score;
+    }
+
+    public double getPerSequenceEvalue() {
+        return _per_sequence_evalue;
+    }
+
+    public double getPerSequenceScore() {
+        return _per_sequence_score;
+    }
+
+    public int getTo() {
+        return _to;
+    }
+
+    public short getTotalCount() {
+        return _total_count;
+    }
+
+    @Override
+    public int hashCode() {
+        return getDomainId().getId().hashCode();
+    }
+
+    @Override
+    public String toString() {
+        return toStringBuffer().toString();
+    }
+
+    public StringBuffer toStringBuffer() {
+        return new StringBuffer( getDomainId().getId() );
+    }
+}
diff --git a/forester/java/src/org/forester/surfacing/BasicDomainSimilarityCalculator.java b/forester/java/src/org/forester/surfacing/BasicDomainSimilarityCalculator.java

new file mode 100644 (file)

index 0000000..5b5a91e
--- /dev/null
+++ b/forester/java/src/org/forester/surfacing/BasicDomainSimilarityCalculator.java
@@ -0,0 +1,242 @@
+// $Id:
+// Exp $
+//
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.surfacing;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.SortedMap;
+import java.util.SortedSet;
+import java.util.TreeMap;
+import java.util.TreeSet;
+
+import org.forester.util.BasicDescriptiveStatistics;
+import org.forester.util.DescriptiveStatistics;
+
+public class BasicDomainSimilarityCalculator implements DomainSimilarityCalculator {
+
+    final DomainSimilarity.DomainSimilaritySortField _sort;
+    private final boolean                            _sort_by_species_count_first;
+    private final boolean                            _treat_as_binary_comparison;
+
+    public BasicDomainSimilarityCalculator( final DomainSimilarity.DomainSimilaritySortField sort,
+                                            final boolean sort_by_species_count_first,
+                                            final boolean treat_as_binary_comparison ) {
+        _sort = sort;
+        _sort_by_species_count_first = sort_by_species_count_first;
+        _treat_as_binary_comparison = treat_as_binary_comparison;
+    }
+
+    public SortedSet<DomainSimilarity> calculateSimilarities( final PairwiseDomainSimilarityCalculator pairwise_calculator,
+                                                              final List<GenomeWideCombinableDomains> cdc_list,
+                                                              final boolean ignore_domains_without_combinations_in_any_genome,
+                                                              final boolean ignore_domains_specific_to_one_genome ) {
+        if ( cdc_list.size() < 2 ) {
+            throw new IllegalArgumentException( "attempt to calculate multiple combinable domains similarity for less than two combinale domains collections" );
+        }
+        final SortedSet<DomainSimilarity> similarities = new TreeSet<DomainSimilarity>();
+        final SortedSet<DomainId> keys = new TreeSet<DomainId>();
+        for( final GenomeWideCombinableDomains cdc : cdc_list ) {
+            keys.addAll( ( cdc ).getAllCombinableDomainsIds().keySet() );
+        }
+        for( final DomainId key : keys ) {
+            final List<CombinableDomains> same_id_cd_list = new ArrayList<CombinableDomains>( cdc_list.size() );
+            final List<Species> species_with_key_id_domain = new ArrayList<Species>();
+            for( final GenomeWideCombinableDomains cdc : cdc_list ) {
+                if ( cdc.contains( key ) ) {
+                    same_id_cd_list.add( cdc.get( key ) );
+                    species_with_key_id_domain.add( cdc.getSpecies() );
+                }
+            }
+            if ( ignore_domains_without_combinations_in_any_genome ) { //TODO: test me..........................................<<<<<<<<<<<<<
+                boolean without_combinations = true;
+                for( final CombinableDomains cd : same_id_cd_list ) {
+                    if ( cd.getNumberOfCombinableDomains() > 0 ) {
+                        without_combinations = false;
+                        break;
+                    }
+                }
+                if ( without_combinations ) {
+                    continue;
+                }
+            }
+            // BIG CHANGE IN LOGIC: Tuesday July 08, 0;55
+            // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+            // OLD: if ( same_id_cd_list.size() > 1 ) {
+            if ( same_id_cd_list.size() > 0 ) {
+                if ( !ignore_domains_specific_to_one_genome || ( same_id_cd_list.size() > 1 ) ) {
+                    final DomainSimilarity s = calculateSimilarity( pairwise_calculator, same_id_cd_list );
+                    if ( s != null ) {
+                        similarities.add( s );
+                    }
+                    else {
+                        throw new RuntimeException( "similarity is null: this should not have happened" );
+                    }
+                }
+            }
+            // ~~~ NEW:
+            else {
+                throw new RuntimeException( "this should not have happened" );
+            }
+            // ~~~ OLD:
+            // else if ( same_id_cd_list.size() == 1 ) {
+            // TODO need to go in file
+            // System.out.println( "only in one species [" +
+            // species_with_key_id_domain.get( 0 ) + "]: " + key_id );
+            //}
+            //else {
+            //    throw new RuntimeException( "this should not have happened" );
+            // }
+        }
+        return similarities;
+    }
+
+    private DomainSimilarity calculateSimilarity( final PairwiseDomainSimilarityCalculator pairwise_calculator,
+                                                  final List<CombinableDomains> domains_list ) {
+        if ( domains_list.size() == 1 ) {
+            // BIG CHANGE IN LOGIC: Tuesday July 08, 0;55
+            // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+            // ~~~OLD:
+            //throw new IllegalArgumentException( "attempt to calculate multiple combinable domains similarity for less than two combinable domains" );
+            // ~~~new: 
+            final SortedMap<Species, SpeciesSpecificDomainSimilariyData> species_data = new TreeMap<Species, SpeciesSpecificDomainSimilariyData>();
+            species_data.put( domains_list.get( 0 ).getSpecies(),
+                              createSpeciesSpecificDomainSimilariyData( domains_list.get( 0 ) ) );
+            return new PrintableDomainSimilarity( domains_list.get( 0 ),
+                                                  1.0,
+                                                  1.0,
+                                                  1.0,
+                                                  1.0,
+                                                  0.0,
+                                                  0,
+                                                  0,
+                                                  0,
+                                                  species_data,
+                                                  getSort(),
+                                                  isSortBySpeciesCountFirst(),
+                                                  isTreatAsBinaryComparison() );
+        }
+        final DescriptiveStatistics stat = new BasicDescriptiveStatistics();
+        final SortedMap<Species, SpeciesSpecificDomainSimilariyData> species_data = new TreeMap<Species, SpeciesSpecificDomainSimilariyData>();
+        species_data.put( domains_list.get( 0 ).getSpecies(), createSpeciesSpecificDomainSimilariyData( domains_list
+                .get( 0 ) ) );
+        int max_difference_in_counts = 0;
+        int max_difference = 0;
+        final boolean is_domain_combination_based = pairwise_calculator instanceof CombinationsBasedPairwiseDomainSimilarityCalculator;
+        for( int i = 1; i < domains_list.size(); ++i ) {
+            species_data.put( domains_list.get( i ).getSpecies(),
+                              createSpeciesSpecificDomainSimilariyData( domains_list.get( i ) ) );
+            final CombinableDomains domains_i = domains_list.get( i );
+            for( int j = 0; j < i; ++j ) {
+                final PairwiseDomainSimilarity pairwise_similarity = pairwise_calculator
+                        .calculateSimilarity( domains_i, domains_list.get( j ) );
+                final int difference_in_counts = pairwise_similarity.getDifferenceInCounts();
+                int difference = 0;
+                if ( is_domain_combination_based ) {
+                    difference = ( ( CombinationsBasedPairwiseDomainSimilarity ) pairwise_similarity )
+                            .getNumberOfDifferentDomains();
+                }
+                else {
+                    difference = difference_in_counts;
+                }
+                if ( Math.abs( difference_in_counts ) > Math.abs( max_difference_in_counts ) ) {
+                    max_difference_in_counts = difference_in_counts;
+                }
+                if ( Math.abs( difference ) > Math.abs( max_difference ) ) {
+                    max_difference = difference;
+                }
+                stat.addValue( pairwise_similarity.getSimilarityScore() );
+            }
+        }
+        if ( stat.getN() < 1 ) {
+            throw new AssertionError( "empty descriptive statistics: this should not have happened" );
+        }
+        if ( ( stat.getN() != 1 ) && isTreatAsBinaryComparison() ) {
+            throw new IllegalArgumentException( "attmpt to treat similarity with N not equal to one as binary comparison" );
+        }
+        if ( ( /*stat.getN() != 1 ||*/!isTreatAsBinaryComparison() ) && ( max_difference_in_counts < 0 ) ) {
+            max_difference_in_counts = Math.abs( max_difference_in_counts );
+            if ( !is_domain_combination_based ) {
+                max_difference = Math.abs( max_difference ); //=max_difference_in_counts for !is_domain_combination_based.
+            }
+        }
+        DomainSimilarity similarity = null;
+        if ( stat.getN() == 1 ) {
+            similarity = new PrintableDomainSimilarity( domains_list.get( 0 ),
+                                                        stat.getMin(),
+                                                        stat.getMax(),
+                                                        stat.arithmeticMean(),
+                                                        stat.median(),
+                                                        0.0,
+                                                        stat.getN(),
+                                                        max_difference_in_counts,
+                                                        max_difference,
+                                                        species_data,
+                                                        getSort(),
+                                                        isSortBySpeciesCountFirst(),
+                                                        isTreatAsBinaryComparison() );
+        }
+        else {
+            similarity = new PrintableDomainSimilarity( domains_list.get( 0 ),
+                                                        stat.getMin(),
+                                                        stat.getMax(),
+                                                        stat.arithmeticMean(),
+                                                        stat.median(),
+                                                        stat.sampleStandardDeviation(),
+                                                        stat.getN(),
+                                                        max_difference_in_counts,
+                                                        max_difference,
+                                                        species_data,
+                                                        getSort(),
+                                                        isSortBySpeciesCountFirst(),
+                                                        isTreatAsBinaryComparison() );
+        }
+        return similarity;
+    }
+
+    private DomainSimilarity.DomainSimilaritySortField getSort() {
+        return _sort;
+    }
+
+    private boolean isSortBySpeciesCountFirst() {
+        return _sort_by_species_count_first;
+    }
+
+    private boolean isTreatAsBinaryComparison() {
+        return _treat_as_binary_comparison;
+    }
+
+    private static SpeciesSpecificDomainSimilariyData createSpeciesSpecificDomainSimilariyData( final CombinableDomains cd ) {
+        final SpeciesSpecificDomainSimilariyData sd = new PrintableSpeciesSpecificDomainSimilariyData( cd
+                .getKeyDomainProteinsCount(), cd.getKeyDomainCount(), cd.getNumberOfCombinableDomains(), cd
+                .getKeyDomainConfidenceDescriptiveStatistics() );
+        for( final DomainId domain : cd.getCombinableDomains() ) {
+            sd.addProteinsExhibitingCombinationCount( domain, cd.getNumberOfProteinsExhibitingCombination( domain ) );
+        }
+        return sd;
+    }
+}
diff --git a/forester/java/src/org/forester/surfacing/BasicGenomeWideCombinableDomains.java b/forester/java/src/org/forester/surfacing/BasicGenomeWideCombinableDomains.java

new file mode 100644 (file)

index 0000000..ca8bff0
--- /dev/null
+++ b/forester/java/src/org/forester/surfacing/BasicGenomeWideCombinableDomains.java
@@ -0,0 +1,365 @@
+
+package org.forester.surfacing;
+
+import java.text.DecimalFormat;
+import java.text.NumberFormat;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.SortedMap;
+import java.util.SortedSet;
+import java.util.TreeMap;
+import java.util.TreeSet;
+
+import org.forester.go.GoId;
+import org.forester.surfacing.BinaryDomainCombination.DomainCombinationType;
+import org.forester.util.BasicDescriptiveStatistics;
+import org.forester.util.DescriptiveStatistics;
+import org.forester.util.ForesterUtil;
+
+public class BasicGenomeWideCombinableDomains implements GenomeWideCombinableDomains {
+
+    private final static NumberFormat                    FORMATTER                                  = new DecimalFormat( "0.0E0" );
+    private static final Comparator<CombinableDomains>   DESCENDING_KEY_DOMAIN_COUNT_ORDER          = new Comparator<CombinableDomains>() {
+
+                                                                                                        public int compare( final CombinableDomains d1,
+                                                                                                                            final CombinableDomains d2 ) {
+                                                                                                            if ( d1
+                                                                                                                    .getKeyDomainCount() < d2
+                                                                                                                    .getKeyDomainCount() ) {
+                                                                                                                return 1;
+                                                                                                            }
+                                                                                                            else if ( d1
+                                                                                                                    .getKeyDomainCount() > d2
+                                                                                                                    .getKeyDomainCount() ) {
+                                                                                                                return -1;
+                                                                                                            }
+                                                                                                            else {
+                                                                                                                return d1
+                                                                                                                        .getKeyDomain()
+                                                                                                                        .getId()
+                                                                                                                        .compareTo( d2
+                                                                                                                                .getKeyDomain()
+                                                                                                                                .getId() );
+                                                                                                            }
+                                                                                                        }
+                                                                                                    };
+    private static final Comparator<CombinableDomains>   DESCENDING_KEY_DOMAIN_PROTEINS_COUNT_ORDER = new Comparator<CombinableDomains>() {
+
+                                                                                                        public int compare( final CombinableDomains d1,
+                                                                                                                            final CombinableDomains d2 ) {
+                                                                                                            if ( d1
+                                                                                                                    .getKeyDomainProteinsCount() < d2
+                                                                                                                    .getKeyDomainProteinsCount() ) {
+                                                                                                                return 1;
+                                                                                                            }
+                                                                                                            else if ( d1
+                                                                                                                    .getKeyDomainProteinsCount() > d2
+                                                                                                                    .getKeyDomainProteinsCount() ) {
+                                                                                                                return -1;
+                                                                                                            }
+                                                                                                            else {
+                                                                                                                return d1
+                                                                                                                        .getKeyDomain()
+                                                                                                                        .getId()
+                                                                                                                        .compareTo( d2
+                                                                                                                                .getKeyDomain()
+                                                                                                                                .getId() );
+                                                                                                            }
+                                                                                                        }
+                                                                                                    };
+    private static final Comparator<CombinableDomains>   DESCENDING_COMBINATIONS_COUNT_ORDER        = new Comparator<CombinableDomains>() {
+
+                                                                                                        public int compare( final CombinableDomains d1,
+                                                                                                                            final CombinableDomains d2 ) {
+                                                                                                            if ( d1
+                                                                                                                    .getNumberOfCombinableDomains() < d2
+                                                                                                                    .getNumberOfCombinableDomains() ) {
+                                                                                                                return 1;
+                                                                                                            }
+                                                                                                            else if ( d1
+                                                                                                                    .getNumberOfCombinableDomains() > d2
+                                                                                                                    .getNumberOfCombinableDomains() ) {
+                                                                                                                return -1;
+                                                                                                            }
+                                                                                                            else {
+                                                                                                                return d1
+                                                                                                                        .getKeyDomain()
+                                                                                                                        .getId()
+                                                                                                                        .compareTo( d2
+                                                                                                                                .getKeyDomain()
+                                                                                                                                .getId() );
+                                                                                                            }
+                                                                                                        }
+                                                                                                    };
+    final private SortedMap<DomainId, CombinableDomains> _combinable_domains_map;
+    final private Species                                _species;
+    final private DomainCombinationType                  _dc_type;
+
+    private BasicGenomeWideCombinableDomains( final Species species, final DomainCombinationType dc_type ) {
+        _combinable_domains_map = new TreeMap<DomainId, CombinableDomains>();
+        _species = species;
+        _dc_type = dc_type;
+    }
+
+    private void add( final DomainId key, final CombinableDomains cdc ) {
+        _combinable_domains_map.put( key, cdc );
+    }
+
+    public boolean contains( final DomainId key_id ) {
+        return _combinable_domains_map.containsKey( key_id );
+    }
+
+    public CombinableDomains get( final DomainId key_id ) {
+        return _combinable_domains_map.get( key_id );
+    }
+
+    public SortedMap<DomainId, CombinableDomains> getAllCombinableDomainsIds() {
+        return _combinable_domains_map;
+    }
+
+    @Override
+    public SortedSet<DomainId> getAllDomainIds() {
+        final SortedSet<DomainId> domains = new TreeSet<DomainId>();
+        for( final DomainId key : getAllCombinableDomainsIds().keySet() ) {
+            final CombinableDomains cb = getAllCombinableDomainsIds().get( key );
+            final List<DomainId> ds = cb.getAllDomains();
+            for( final DomainId d : ds ) {
+                domains.add( d );
+            }
+        }
+        return domains;
+    }
+
+    @Override
+    public DomainCombinationType getDomainCombinationType() {
+        return _dc_type;
+    }
+
+    @Override
+    public SortedSet<DomainId> getMostPromiscuosDomain() {
+        final SortedSet<DomainId> doms = new TreeSet<DomainId>();
+        final int max = ( int ) getPerGenomeDomainPromiscuityStatistics().getMax();
+        for( final DomainId key : getAllCombinableDomainsIds().keySet() ) {
+            final CombinableDomains cb = getAllCombinableDomainsIds().get( key );
+            if ( cb.getNumberOfCombinableDomains() == max ) {
+                doms.add( key );
+            }
+        }
+        return doms;
+    }
+
+    @Override
+    public DescriptiveStatistics getPerGenomeDomainPromiscuityStatistics() {
+        final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
+        for( final DomainId key : getAllCombinableDomainsIds().keySet() ) {
+            final CombinableDomains cb = getAllCombinableDomainsIds().get( key );
+            stats.addValue( cb.getNumberOfCombinableDomains() );
+        }
+        return stats;
+    }
+
+    public int getSize() {
+        return _combinable_domains_map.size();
+    }
+
+    public Species getSpecies() {
+        return _species;
+    }
+
+    @Override
+    public SortedSet<BinaryDomainCombination> toBinaryDomainCombinations() {
+        final SortedSet<BinaryDomainCombination> binary_combinations = new TreeSet<BinaryDomainCombination>();
+        for( final DomainId key : getAllCombinableDomainsIds().keySet() ) {
+            final CombinableDomains cb = getAllCombinableDomainsIds().get( key );
+            for( final BinaryDomainCombination b : cb.toBinaryDomainCombinations() ) {
+                binary_combinations.add( b );
+            }
+        }
+        return binary_combinations;
+    }
+
+    @Override
+    public String toString() {
+        return toStringBuilder( GenomeWideCombinableDomainsSortOrder.ALPHABETICAL_KEY_ID ).toString();
+    }
+
+    // Produces something like: 
+    // 2-oxoacid_dh      5       5       2       4.8E-67   Biotin_lipoyl [4], E3_binding [3]
+    public StringBuilder toStringBuilder( final GenomeWideCombinableDomainsSortOrder sort_order ) {
+        final StringBuilder sb = new StringBuilder();
+        final List<CombinableDomains> combinable_domains = new ArrayList<CombinableDomains>();
+        for( final DomainId key : getAllCombinableDomainsIds().keySet() ) {
+            final CombinableDomains cb = getAllCombinableDomainsIds().get( key );
+            combinable_domains.add( cb );
+        }
+        if ( sort_order == GenomeWideCombinableDomainsSortOrder.KEY_DOMAIN_COUNT ) {
+            Collections.sort( combinable_domains, BasicGenomeWideCombinableDomains.DESCENDING_KEY_DOMAIN_COUNT_ORDER );
+        }
+        else if ( sort_order == GenomeWideCombinableDomainsSortOrder.KEY_DOMAIN_PROTEINS_COUNT ) {
+            Collections.sort( combinable_domains,
+                              BasicGenomeWideCombinableDomains.DESCENDING_KEY_DOMAIN_PROTEINS_COUNT_ORDER );
+        }
+        else if ( sort_order == GenomeWideCombinableDomainsSortOrder.COMBINATIONS_COUNT ) {
+            Collections.sort( combinable_domains, BasicGenomeWideCombinableDomains.DESCENDING_COMBINATIONS_COUNT_ORDER );
+        }
+        for( final CombinableDomains cb : combinable_domains ) {
+            sb.append( ForesterUtil.pad( new StringBuffer( cb.getKeyDomain().toString() ), 18, ' ', false ) );
+            sb.append( ForesterUtil.pad( new StringBuffer( "" + cb.getKeyDomainCount() ), 8, ' ', false ) );
+            sb.append( ForesterUtil.pad( new StringBuffer( "" + cb.getKeyDomainProteinsCount() ), 8, ' ', false ) );
+            sb.append( ForesterUtil.pad( new StringBuffer( "" + cb.getNumberOfCombinableDomains() ), 8, ' ', false ) );
+            sb
+                    .append( ForesterUtil
+                            .pad( new StringBuffer( ""
+                                          + FORMATTER
+                                                  .format( cb.getKeyDomainConfidenceDescriptiveStatistics().median() ) ),
+                                  10,
+                                  ' ',
+                                  false ) );
+            sb.append( cb.getCombiningDomainIdsAsStringBuilder() );
+            sb.append( ForesterUtil.getLineSeparator() );
+        }
+        return sb;
+    }
+
+    private static void countDomains( final Map<DomainId, Integer> domain_counts,
+                                      final Map<DomainId, Integer> domain_protein_counts,
+                                      final Map<DomainId, DescriptiveStatistics> stats,
+                                      final Set<DomainId> saw_c,
+                                      final DomainId id_i,
+                                      final double support ) {
+        if ( domain_counts.containsKey( id_i ) ) {
+            domain_counts.put( id_i, 1 + domain_counts.get( ( id_i ) ) );
+            if ( !saw_c.contains( id_i ) ) {
+                domain_protein_counts.put( id_i, 1 + domain_protein_counts.get( ( id_i ) ) );
+            }
+        }
+        else {
+            stats.put( id_i, new BasicDescriptiveStatistics() );
+            domain_counts.put( id_i, 1 );
+            domain_protein_counts.put( id_i, 1 );
+        }
+        stats.get( id_i ).addValue( support );
+        saw_c.add( id_i );
+    }
+
+    public static BasicGenomeWideCombinableDomains createInstance( final List<Protein> protein_list,
+                                                                   final boolean ignore_combination_with_same_domain,
+                                                                   final Species species ) {
+        return createInstance( protein_list,
+                               ignore_combination_with_same_domain,
+                               species,
+                               null,
+                               DomainCombinationType.BASIC );
+    }
+
+    public static BasicGenomeWideCombinableDomains createInstance( final List<Protein> protein_list,
+                                                                   final boolean ignore_combination_with_same_domain,
+                                                                   final Species species,
+                                                                   final DomainCombinationType dc_type ) {
+        return createInstance( protein_list, ignore_combination_with_same_domain, species, null, dc_type );
+    }
+
+    public static BasicGenomeWideCombinableDomains createInstance( final List<Protein> protein_list,
+                                                                   final boolean ignore_combination_with_same_domain,
+                                                                   final Species species,
+                                                                   final Map<DomainId, List<GoId>> domain_id_to_go_ids_map,
+                                                                   final DomainCombinationType dc_type ) {
+        final BasicGenomeWideCombinableDomains instance = new BasicGenomeWideCombinableDomains( species, dc_type );
+        final Map<DomainId, Integer> domain_counts = new HashMap<DomainId, Integer>();
+        final Map<DomainId, Integer> domain_protein_counts = new HashMap<DomainId, Integer>();
+        final Map<DomainId, DescriptiveStatistics> stats = new HashMap<DomainId, DescriptiveStatistics>();
+        for( final Protein protein : protein_list ) {
+            if ( !protein.getSpecies().equals( species ) ) {
+                throw new IllegalArgumentException( "species (" + protein.getSpecies()
+                        + ") does not match species of combinable domains collection (" + species + ")" );
+            }
+            final Set<DomainId> saw_i = new HashSet<DomainId>();
+            final Set<DomainId> saw_c = new HashSet<DomainId>();
+            for( int i = 0; i < protein.getProteinDomains().size(); ++i ) {
+                final Domain pd_i = protein.getProteinDomain( i );
+                final DomainId id_i = pd_i.getDomainId();
+                final int current_start = pd_i.getFrom();
+                BasicGenomeWideCombinableDomains.countDomains( domain_counts,
+                                                               domain_protein_counts,
+                                                               stats,
+                                                               saw_c,
+                                                               id_i,
+                                                               pd_i.getPerSequenceEvalue() );
+                if ( !saw_i.contains( id_i ) ) {
+                    if ( dc_type == DomainCombinationType.BASIC ) {
+                        saw_i.add( id_i );
+                    }
+                    CombinableDomains domain_combination = null;
+                    if ( instance.contains( id_i ) ) {
+                        domain_combination = instance.get( id_i );
+                    }
+                    else {
+                        if ( dc_type == DomainCombinationType.DIRECTED_ADJACTANT ) {
+                            domain_combination = new AdjactantDirectedCombinableDomains( pd_i.getDomainId(), species );
+                        }
+                        else if ( dc_type == DomainCombinationType.DIRECTED ) {
+                            domain_combination = new DirectedCombinableDomains( pd_i.getDomainId(), species );
+                        }
+                        else {
+                            domain_combination = new BasicCombinableDomains( pd_i.getDomainId(), species );
+                        }
+                        if ( ( domain_id_to_go_ids_map != null )
+                                && domain_id_to_go_ids_map.containsKey( pd_i.getDomainId() ) ) {
+                            final List<GoId> go_ids = domain_id_to_go_ids_map.get( pd_i.getDomainId() );
+                            for( final GoId go_id : go_ids ) {
+                                domain_combination.getKeyDomain().addGoId( go_id );
+                            }
+                        }
+                        instance.add( id_i, domain_combination );
+                    }
+                    final Set<DomainId> saw_j = new HashSet<DomainId>();
+                    if ( ignore_combination_with_same_domain ) {
+                        saw_j.add( id_i );
+                    }
+                    Domain closest = null;
+                    for( int j = 0; j < protein.getNumberOfProteinDomains(); ++j ) {
+                        if ( ( dc_type != DomainCombinationType.BASIC )
+                                && ( current_start >= protein.getProteinDomain( j ).getFrom() ) ) {
+                            continue;
+                        }
+                        if ( i != j ) {
+                            final DomainId id = protein.getProteinDomain( j ).getDomainId();
+                            if ( !saw_j.contains( id ) ) {
+                                saw_j.add( id );
+                                if ( dc_type != DomainCombinationType.DIRECTED_ADJACTANT ) {
+                                    domain_combination
+                                            .addCombinableDomain( protein.getProteinDomain( j ).getDomainId() );
+                                }
+                                else {
+                                    if ( closest == null ) {
+                                        closest = protein.getProteinDomain( j );
+                                    }
+                                    else {
+                                        if ( protein.getProteinDomain( j ).getFrom() < closest.getFrom() ) {
+                                            closest = protein.getProteinDomain( j );
+                                        }
+                                    }
+                                }
+                            }
+                        }
+                    }
+                    if ( ( dc_type == DomainCombinationType.DIRECTED_ADJACTANT ) && ( closest != null ) ) {
+                        domain_combination.addCombinableDomain( closest.getDomainId() );
+                    }
+                }
+            }
+        }
+        for( final DomainId key_id : domain_counts.keySet() ) {
+            instance.get( key_id ).setKeyDomainCount( domain_counts.get( key_id ) );
+            instance.get( key_id ).setKeyDomainProteinsCount( domain_protein_counts.get( key_id ) );
+            instance.get( key_id ).setKeyDomainConfidenceDescriptiveStatistics( stats.get( key_id ) );
+        }
+        return instance;
+    }
+}
diff --git a/forester/java/src/org/forester/surfacing/BasicProtein.java b/forester/java/src/org/forester/surfacing/BasicProtein.java

new file mode 100644 (file)

index 0000000..bc67c18
--- /dev/null
+++ b/forester/java/src/org/forester/surfacing/BasicProtein.java
@@ -0,0 +1,175 @@
+// $Id:
+//
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.surfacing;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.SortedSet;
+import java.util.TreeSet;
+
+public class BasicProtein implements Protein {
+
+    private final ProteinId    _id;
+    private final Species      _species;
+    private String             _name;
+    private String             _desc;
+    private String             _accession;
+    private final List<Domain> _protein_domains;
+
+    public BasicProtein( final String id_str, final String species_str ) {
+        _id = new ProteinId( id_str );
+        _species = new BasicSpecies( species_str );
+        _protein_domains = new ArrayList<Domain>();
+        init();
+    }
+
+    public void addProteinDomain( final Domain protein_domain ) {
+        getProteinDomains().add( protein_domain );
+    }
+
+    @Override
+    /**
+     * If in_nc_order is set to true, this returns true only and only if
+     * the order in List 'domains' and this protein (as determined by the start positions
+     * of the domains of this proteins, _not_ by their index) are the same
+     * (interspersing, 'other', domains in this are ignored). 
+     * If in_nc_order is set to false, this returns true only and only if
+     * this contains all domains listed in 'domains' (order and count do not matter).
+     * 
+     * @param domains a list of domain ids in a certain order.
+     * @param in_nc_order to consider order
+     * @return
+     */
+    public boolean contains( final List<DomainId> query_domain_ids, final boolean in_nc_order ) {
+        if ( !in_nc_order ) {
+            for( final DomainId query_domain_id : query_domain_ids ) {
+                if ( !getProteinDomainIds().contains( query_domain_id ) ) {
+                    return false;
+                }
+            }
+            return true;
+        }
+        else {
+            int current_start_position = -1;
+            I: for( final DomainId query_domain_id : query_domain_ids ) {
+                if ( getProteinDomainIds().contains( query_domain_id ) ) {
+                    final List<Domain> found_domains = getProteinDomains( query_domain_id );
+                    final SortedSet<Integer> ordered_start_positions = new TreeSet<Integer>();
+                    for( final Domain found_domain : found_domains ) {
+                        ordered_start_positions.add( found_domain.getFrom() );
+                    }
+                    for( final int start_position : ordered_start_positions ) {
+                        if ( start_position > current_start_position ) {
+                            current_start_position = start_position;
+                            continue I;
+                        }
+                    }
+                    return false;
+                }
+                else {
+                    return false;
+                }
+            }
+            return true;
+        }
+    }
+
+    @Override
+    public String getAccession() {
+        return _accession;
+    }
+
+    @Override
+    public String getDescription() {
+        return _desc;
+    }
+
+    @Override
+    public String getName() {
+        return _name;
+    }
+
+    public int getNumberOfProteinDomains() {
+        return getProteinDomains().size();
+    }
+
+    public Domain getProteinDomain( final int index ) {
+        return _protein_domains.get( index );
+    }
+
+    public int getProteinDomainCount( final DomainId domain_id ) {
+        return getProteinDomains( domain_id ).size();
+    }
+
+    private List<DomainId> getProteinDomainIds() {
+        final List<DomainId> ids = new ArrayList<DomainId>( getProteinDomains().size() );
+        for( final Domain domain : getProteinDomains() ) {
+            ids.add( domain.getDomainId() );
+        }
+        return ids;
+    }
+
+    public List<Domain> getProteinDomains() {
+        return _protein_domains;
+    }
+
+    public List<Domain> getProteinDomains( final DomainId domain_id ) {
+        final List<Domain> domains = new ArrayList<Domain>();
+        for( final Domain domain : getProteinDomains() ) {
+            if ( domain.getDomainId().equals( domain_id ) ) {
+                domains.add( domain );
+            }
+        }
+        return domains;
+    }
+
+    public ProteinId getProteinId() {
+        return _id;
+    }
+
+    public Species getSpecies() {
+        return _species;
+    }
+
+    private void init() {
+        _desc = "";
+        _accession = "";
+        _name = "";
+    }
+
+    public void setAccession( final String accession ) {
+        _accession = accession;
+    }
+
+    public void setDescription( final String description ) {
+        _desc = description;
+    }
+
+    public void setName( final String name ) {
+        _name = name;
+    }
+}
diff --git a/forester/java/src/org/forester/surfacing/BasicSpecies.java b/forester/java/src/org/forester/surfacing/BasicSpecies.java

new file mode 100644 (file)

index 0000000..e425b39
--- /dev/null
+++ b/forester/java/src/org/forester/surfacing/BasicSpecies.java
@@ -0,0 +1,83 @@
+// $Id:
+//
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.surfacing;
+
+import org.forester.util.ForesterUtil;
+
+public class BasicSpecies implements Species {
+
+    final private String _species_id;
+
+    public BasicSpecies( final String species_id ) {
+        if ( ForesterUtil.isEmpty( species_id ) ) {
+            throw new IllegalArgumentException( "attempt to create new species from empty or null string" );
+        }
+        _species_id = species_id.trim();
+    }
+
+    @Override
+    public int compareTo( final Species species ) {
+        if ( this == species ) {
+            return 0;
+        }
+        return getSpeciesId().toLowerCase().compareTo( species.getSpeciesId().toLowerCase() );
+    }
+
+    @Override
+    public boolean equals( final Object o ) {
+        if ( this == o ) {
+            return true;
+        }
+        else if ( o == null ) {
+            throw new IllegalArgumentException( "attempt to check [" + this.getClass() + "] equality to null" );
+        }
+        else if ( o.getClass() != this.getClass() ) {
+            throw new IllegalArgumentException( "attempt to check [" + this.getClass() + "] equality to " + o + " ["
+                    + o.getClass() + "]" );
+        }
+        else {
+            return getSpeciesId().equals( ( ( Species ) o ).getSpeciesId() );
+        }
+    }
+
+    /* (non-Javadoc)
+     * @see org.forester.surfacing.Species#getSpeciesId()
+     */
+    public String getSpeciesId() {
+        return _species_id;
+    }
+
+    @Override
+    public int hashCode() {
+        return getSpeciesId().hashCode();
+    }
+
+    @Override
+    public String toString() {
+        return getSpeciesId();
+    }
+}
diff --git a/forester/java/src/org/forester/surfacing/BinaryDomainCombination.java b/forester/java/src/org/forester/surfacing/BinaryDomainCombination.java

new file mode 100644 (file)

index 0000000..3637dfe
--- /dev/null
+++ b/forester/java/src/org/forester/surfacing/BinaryDomainCombination.java
@@ -0,0 +1,56 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.surfacing;
+
+public interface BinaryDomainCombination extends Comparable<BinaryDomainCombination> {
+
+    public static final String SEPARATOR = "=";
+
+    public DomainId getId0();
+
+    public DomainId getId1();
+
+    public abstract StringBuffer toGraphDescribingLanguage( final OutputFormat format,
+                                                            final String node_attribute,
+                                                            String edge_attribute );
+
+    /**
+     * This has to return a String representation
+     * in the following format:
+     * id0 - id1
+     * 
+     * @return a String representation in the form id0 - id1
+     */
+    public String toString();
+
+    public static enum DomainCombinationType {
+        BASIC, DIRECTED, DIRECTED_ADJACTANT;
+    }
+
+    public static enum OutputFormat {
+        DOT
+    }
+}
\ No newline at end of file
diff --git a/forester/java/src/org/forester/surfacing/CombinableDomains.java b/forester/java/src/org/forester/surfacing/CombinableDomains.java

new file mode 100644 (file)

index 0000000..05fffbf
--- /dev/null
+++ b/forester/java/src/org/forester/surfacing/CombinableDomains.java
@@ -0,0 +1,138 @@
+// $Id:
+//
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.surfacing;
+
+import java.util.List;
+import java.util.SortedMap;
+
+import org.forester.util.DescriptiveStatistics;
+
+public interface CombinableDomains {
+
+    /**
+     * To add a new combinable domain.
+     * 
+     * @param protein_domain
+     */
+    public void addCombinableDomain( final DomainId protein_domain );
+
+    /**
+     * 
+     * This must return all domains in this set of combinable domains (i.e.
+     * the key domain and all domains which can combine with the key domain).
+     * 
+     *  @return all domains
+     */
+    List<DomainId> getAllDomains();
+
+    List<DomainId> getCombinableDomains();
+
+    /**
+     * Returns the combinable domain identifiers sorted in alphabetical manner: -
+     * keys are the combinable domain identifiers - values are the counts of
+     * proteins exhibiting a particular combination
+     * 
+     * @return combining domain identifiers sorted in alphabetical manner
+     */
+    public SortedMap<DomainId, Integer> getCombinableDomainsIds();
+
+    public StringBuilder getCombiningDomainIdsAsStringBuilder();
+
+    /**
+     * Returns the domain whose combinable domains are in stored in this
+     * combinable domains.
+     * 
+     * @return the domain identifier
+     */
+    public DomainId getKeyDomain();
+
+    /**
+     * Gets descriptive statistics for the confidence (i.e. E-values) of the key
+     * domain.
+     * 
+     * 
+     * @return descriptive statistics for the confidence of the key domain
+     */
+    public DescriptiveStatistics getKeyDomainConfidenceDescriptiveStatistics();
+
+    /**
+     * Returns how many times the key domain is present in a given species
+     * genome.
+     * 
+     * @return key domain count in species
+     */
+    public int getKeyDomainCount();
+
+    /**
+     * Returns how many proteins with the key domain are present in a given
+     * species genome.
+     * 
+     * @return key domain proteins count in species
+     */
+    public int getKeyDomainProteinsCount();
+
+    public int getNumberOfCombinableDomains();
+
+    public int getNumberOfProteinsExhibitingCombination( final DomainId protein_domain );
+
+    /**
+     * Returns the species of this combinable domains.
+     * 
+     * @return the species
+     */
+    public Species getSpecies();
+
+    public boolean isCombinable( final DomainId protein_domain );
+
+    /**
+     * This is to set descriptive statistics for the confidence (i.e. E-values)
+     * of the key domain.
+     * 
+     * 
+     * @param statistics
+     */
+    void setKeyDomainConfidenceDescriptiveStatistics( final DescriptiveStatistics statistics );
+
+    /**
+     * Sets how many times the key domain is present in a given species genome.
+     * 
+     * @param key_domain_count
+     *            key domain count in species
+     */
+    void setKeyDomainCount( final int key_domain_count );
+
+    /**
+     * Sets how many proteins with the key domain are present in a given species
+     * genome.
+     * 
+     * @param key_domain_proteins_count
+     *            key domain protein count in species
+     */
+    void setKeyDomainProteinsCount( final int key_domain_proteins_count );
+
+    public List<BinaryDomainCombination> toBinaryDomainCombinations();
+}
\ No newline at end of file
diff --git a/forester/java/src/org/forester/surfacing/CombinationsBasedPairwiseDomainSimilarity.java b/forester/java/src/org/forester/surfacing/CombinationsBasedPairwiseDomainSimilarity.java

new file mode 100644 (file)

index 0000000..b3035c5
--- /dev/null
+++ b/forester/java/src/org/forester/surfacing/CombinationsBasedPairwiseDomainSimilarity.java
@@ -0,0 +1,70 @@
+// $Id:
+// cmzmasek Exp $
+//
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.surfacing;
+
+public class CombinationsBasedPairwiseDomainSimilarity implements PairwiseDomainSimilarity {
+
+    private final int    _same_domains;
+    private final int    _different_domains;
+    private final int    _difference_in_counts;
+    private final double _score;
+
+    public CombinationsBasedPairwiseDomainSimilarity( final int same_domains,
+                                                      final int different_domains,
+                                                      final int difference_in_counts ) {
+        if ( ( same_domains < 0 ) || ( different_domains < 0 ) ) {
+            throw new IllegalArgumentException( "attempt to use domain counts less than 0" );
+        }
+        _difference_in_counts = difference_in_counts;
+        _same_domains = same_domains;
+        _different_domains = different_domains;
+        if ( _different_domains == 0 ) {
+            _score = 1.0;
+        }
+        else {
+            _score = ( double ) _same_domains / ( _different_domains + _same_domains );
+        }
+    }
+
+    @Override
+    public int getDifferenceInCounts() {
+        return _difference_in_counts;
+    }
+
+    public int getNumberOfDifferentDomains() {
+        return _different_domains;
+    }
+
+    public int getNumberOfSameDomains() {
+        return _same_domains;
+    }
+
+    public double getSimilarityScore() {
+        return _score;
+    }
+}
diff --git a/forester/java/src/org/forester/surfacing/CombinationsBasedPairwiseDomainSimilarityCalculator.java b/forester/java/src/org/forester/surfacing/CombinationsBasedPairwiseDomainSimilarityCalculator.java

new file mode 100644 (file)

index 0000000..a33527d
--- /dev/null
+++ b/forester/java/src/org/forester/surfacing/CombinationsBasedPairwiseDomainSimilarityCalculator.java
@@ -0,0 +1,59 @@
+// $Id:
+// 22:43:35 cmzmasek Exp $
+//
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.surfacing;
+
+import java.util.List;
+
+public class CombinationsBasedPairwiseDomainSimilarityCalculator implements PairwiseDomainSimilarityCalculator {
+
+    public PairwiseDomainSimilarity calculateSimilarity( final CombinableDomains domains_1,
+                                                         final CombinableDomains domains_2 ) {
+        if ( !domains_1.getKeyDomain().equals( domains_2.getKeyDomain() ) ) {
+            throw new IllegalArgumentException( "attempt to calculate similarity between domain collection with different keys" );
+        }
+        final List<DomainId> d1 = domains_1.getCombinableDomains();
+        final List<DomainId> d2 = domains_2.getCombinableDomains();
+        int same = 0;
+        int different = 0;
+        for( final DomainId domain : d1 ) {
+            if ( d2.contains( domain ) ) {
+                same++;
+            }
+            else {
+                different++;
+            }
+        }
+        for( final DomainId domain : d2 ) {
+            if ( !( d1.contains( domain ) ) ) {
+                different++;
+            }
+        }
+        final int difference = domains_1.getNumberOfCombinableDomains() - domains_2.getNumberOfCombinableDomains();
+        return new CombinationsBasedPairwiseDomainSimilarity( same, different, difference );
+    }
+}
diff --git a/forester/java/src/org/forester/surfacing/CountsBasedPairwiseDomainSimilarity.java b/forester/java/src/org/forester/surfacing/CountsBasedPairwiseDomainSimilarity.java

new file mode 100644 (file)

index 0000000..b1abfeb
--- /dev/null
+++ b/forester/java/src/org/forester/surfacing/CountsBasedPairwiseDomainSimilarity.java
@@ -0,0 +1,65 @@
+// $Id:
+// cmzmasek Exp $
+//
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.surfacing;
+
+public class CountsBasedPairwiseDomainSimilarity implements PairwiseDomainSimilarity {
+
+    private final double _score;
+    private final int    _copy_number_difference;
+
+    /**
+     * counts_difference: (counts for domain 1) minus (counts for domain 2).
+     * 
+     * 
+     * @param counts_difference value of domain_1 minus value of domain_2
+     * @param counts_sum
+     */
+    public CountsBasedPairwiseDomainSimilarity( final int counts_difference, final int counts_sum ) {
+        if ( counts_sum <= 0 ) {
+            throw new IllegalArgumentException( "attempt to use copy sum of less than or equal to 0: " + counts_sum );
+        }
+        _copy_number_difference = counts_difference;
+        final int abs_copy_number_difference = Math.abs( counts_difference );
+        if ( abs_copy_number_difference > counts_sum ) {
+            throw new IllegalArgumentException( "attempt to use absolute copy number difference larger than copy number sum" );
+        }
+        _score = 1.0 - ( double ) abs_copy_number_difference / counts_sum;
+    }
+
+    /**
+     * Returns (counts for domain 1) minus (counts for domain 2).
+     * 
+     */
+    public int getDifferenceInCounts() {
+        return _copy_number_difference;
+    }
+
+    public double getSimilarityScore() {
+        return _score;
+    }
+}
diff --git a/forester/java/src/org/forester/surfacing/DirectedBinaryDomainCombination.java b/forester/java/src/org/forester/surfacing/DirectedBinaryDomainCombination.java

new file mode 100644 (file)

index 0000000..4fa9179
--- /dev/null
+++ b/forester/java/src/org/forester/surfacing/DirectedBinaryDomainCombination.java
@@ -0,0 +1,54 @@
+// $Id:
+// Exp $
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.surfacing;
+
+public class DirectedBinaryDomainCombination extends BasicBinaryDomainCombination {
+
+    public DirectedBinaryDomainCombination( final DomainId n_terminal, final DomainId c_terminal ) {
+        super();
+        if ( ( n_terminal == null ) || ( c_terminal == null ) ) {
+            throw new IllegalArgumentException( "attempt to create binary domain combination using null" );
+        }
+        _id_0 = n_terminal;
+        _id_1 = c_terminal;
+    }
+
+    public DirectedBinaryDomainCombination( final String n_terminal, final String c_terminal ) {
+        this( new DomainId( n_terminal ), new DomainId( c_terminal ) );
+    }
+
+    public static BinaryDomainCombination createInstance( final String ids ) {
+        if ( ids.indexOf( BinaryDomainCombination.SEPARATOR ) < 1 ) {
+            throw new IllegalArgumentException( "Unexpected format for binary domain combination [" + ids + "]" );
+        }
+        final String[] ids_ary = ids.split( BinaryDomainCombination.SEPARATOR );
+        if ( ids_ary.length != 2 ) {
+            throw new IllegalArgumentException( "Unexpected format for binary domain combination [" + ids + "]" );
+        }
+        return new DirectedBinaryDomainCombination( ids_ary[ 0 ], ids_ary[ 1 ] );
+    }
+}
diff --git a/forester/java/src/org/forester/surfacing/DirectedCombinableDomains.java b/forester/java/src/org/forester/surfacing/DirectedCombinableDomains.java

new file mode 100644 (file)

index 0000000..2103d26
--- /dev/null
+++ b/forester/java/src/org/forester/surfacing/DirectedCombinableDomains.java
@@ -0,0 +1,48 @@
+// $Id:
+//
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.surfacing;
+
+import java.util.ArrayList;
+import java.util.List;
+
+public class DirectedCombinableDomains extends BasicCombinableDomains {
+
+    public DirectedCombinableDomains( final DomainId n_terminal_key_domain, final Species species ) {
+        super( n_terminal_key_domain, species );
+    }
+
+    @Override
+    public List<BinaryDomainCombination> toBinaryDomainCombinations() {
+        final List<BinaryDomainCombination> binary_combinations = new ArrayList<BinaryDomainCombination>( getNumberOfCombinableDomains() );
+        for( final DomainId domain : getCombiningDomains().keySet() ) {
+            // Precondition (!): key domain is most upstream domain.
+            //TODO ensure this is true.
+            binary_combinations.add( new DirectedBinaryDomainCombination( getKeyDomain(), domain ) );
+        }
+        return binary_combinations;
+    }
+}
diff --git a/forester/java/src/org/forester/surfacing/Domain.java b/forester/java/src/org/forester/surfacing/Domain.java

new file mode 100644 (file)

index 0000000..c2baf20
--- /dev/null
+++ b/forester/java/src/org/forester/surfacing/Domain.java
@@ -0,0 +1,56 @@
+// $Id:
+//
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.surfacing;
+
+import org.forester.go.GoId;
+
+public interface Domain extends Comparable<Domain> {
+
+    public void addGoId( GoId go_id );
+
+    public DomainId getDomainId();
+
+    public int getFrom();
+
+    public GoId getGoId( int i );
+
+    public short getNumber();
+
+    public int getNumberOfGoIds();
+
+    public double getPerDomainEvalue();
+
+    public double getPerDomainScore();
+
+    public double getPerSequenceEvalue();
+
+    public double getPerSequenceScore();
+
+    public int getTo();
+
+    public short getTotalCount();
+}
\ No newline at end of file
diff --git a/forester/java/src/org/forester/surfacing/DomainArchitectureBasedGenomeSimilarityCalculator.java b/forester/java/src/org/forester/surfacing/DomainArchitectureBasedGenomeSimilarityCalculator.java

new file mode 100644 (file)

index 0000000..317bec3
--- /dev/null
+++ b/forester/java/src/org/forester/surfacing/DomainArchitectureBasedGenomeSimilarityCalculator.java
@@ -0,0 +1,333 @@
+// $Id:
+// 19:38:35 cmzmasek Exp $
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.surfacing;
+
+import java.util.HashSet;
+import java.util.Set;
+
+public class DomainArchitectureBasedGenomeSimilarityCalculator {
+
+    public static final double                MAX_SIMILARITY_SCORE = 1.0;
+    public static final double                MIN_SIMILARITY_SCORE = 0.0;
+    final private GenomeWideCombinableDomains _combinable_domains_genome_0;
+    final private GenomeWideCombinableDomains _combinable_domains_genome_1;
+    private Set<DomainId>                     _domain_ids_to_ignore;
+    private boolean                           _allow_domains_to_be_ignored;
+    private Set<DomainId>                     _all_domains;
+    private Set<DomainId>                     _shared_domains;
+    private Set<DomainId>                     _domains_specific_to_0;
+    private Set<DomainId>                     _domains_specific_to_1;
+    private Set<BinaryDomainCombination>      _all_binary_domain_combinations;
+    private Set<BinaryDomainCombination>      _shared_binary_domain_combinations;
+    private Set<BinaryDomainCombination>      _binary_domain_combinations_specific_to_0;
+    private Set<BinaryDomainCombination>      _binary_domain_combinations_specific_to_1;
+
+    public DomainArchitectureBasedGenomeSimilarityCalculator( final GenomeWideCombinableDomains combinable_domains_genome_0,
+                                                              final GenomeWideCombinableDomains combinable_domains_genome_1 ) {
+        if ( ( combinable_domains_genome_0 == null ) || ( combinable_domains_genome_0.getSize() < 1 )
+                || ( combinable_domains_genome_1 == null ) || ( combinable_domains_genome_1.getSize() < 1 ) ) {
+            throw new IllegalArgumentException( "attempt to compare null or empty combinable domains collection" );
+        }
+        if ( combinable_domains_genome_0.getSpecies().equals( combinable_domains_genome_1.getSpecies() ) ) {
+            throw new IllegalArgumentException( "attempt to compare combinable domains collection from the same species" );
+        }
+        _combinable_domains_genome_0 = combinable_domains_genome_0;
+        _combinable_domains_genome_1 = combinable_domains_genome_1;
+        init();
+        forceRecalculation();
+    }
+
+    public void addDomainIdToIgnore( final DomainId domain_id_to_ignore ) {
+        forceRecalculation();
+        getDomainIdsToIgnore().add( domain_id_to_ignore );
+    }
+
+    /**
+     * This returns a score between 0.0 (no binary domain combination in common) 
+     * and 1.0 (all binary domain combinations in common) measuring the similarity between two
+     * genomes based on the number of shared binary domain combinations:
+     *   
+     * t: sum of (distinct) binary domain combinations
+     * s: sum of shared (distinct) binary domain combinations
+     *
+     * 1 - ( ( t - s ) / t )
+     *  
+     * @return shared binary domain combinations based similarity score 
+     */
+    public double calculateSharedBinaryDomainCombinationBasedGenomeSimilarityScore() {
+        final double t = getAllBinaryDomainCombinations().size();
+        final double s = getSharedBinaryDomainCombinations().size();
+        if ( t == 0.0 ) {
+            return MIN_SIMILARITY_SCORE;
+        }
+        return ( MAX_SIMILARITY_SCORE - ( ( t - s ) / t ) );
+    }
+
+    /**
+     * This returns a score between 0.0 (no domains in common) 
+     * and 1.0 (all domains in common) measuring the similarity between two
+     * genomes based on the number of shared domains:
+     * 
+     * t: sum of (distinct) domains
+     * s: sum of shared (distinct) domains
+     *
+     * 1 - ( ( t - s ) / t )
+     * 
+     * @return shared domains based similarity score 
+     */
+    public double calculateSharedDomainsBasedGenomeSimilarityScore() {
+        final double t = getAllDomains().size();
+        final double s = getSharedDomains().size();
+        if ( t == 0.0 ) {
+            return MIN_SIMILARITY_SCORE;
+        }
+        return ( MAX_SIMILARITY_SCORE - ( ( t - s ) / t ) );
+    }
+
+    public void deleteAllDomainIdsToIgnore() {
+        forceRecalculation();
+        setDomainIdsToIgnore( new HashSet<DomainId>() );
+    }
+
+    private void forceRecalculation() {
+        _all_domains = null;
+        _shared_domains = null;
+        _domains_specific_to_0 = null;
+        _domains_specific_to_1 = null;
+        _all_binary_domain_combinations = null;
+        _shared_binary_domain_combinations = null;
+        _binary_domain_combinations_specific_to_0 = null;
+        _binary_domain_combinations_specific_to_1 = null;
+    }
+
+    /**
+     * Does not return binary combinations which contain one or two domains
+     * to be ignored -- if ignoring is allowed.
+     * 
+     * @return SortedSet<BinaryDomainCombination>
+     */
+    public Set<BinaryDomainCombination> getAllBinaryDomainCombinations() {
+        if ( _all_binary_domain_combinations == null ) {
+            final Set<BinaryDomainCombination> all = new HashSet<BinaryDomainCombination>();
+            all.addAll( getCombinableDomainsGenome0().toBinaryDomainCombinations() );
+            all.addAll( getCombinableDomainsGenome1().toBinaryDomainCombinations() );
+            if ( isAllowDomainsToBeIgnored() && !getDomainIdsToIgnore().isEmpty() ) {
+                _all_binary_domain_combinations = pruneBinaryCombinations( all );
+            }
+            else {
+                _all_binary_domain_combinations = all;
+            }
+        }
+        return _all_binary_domain_combinations;
+    }
+
+    /**
+     * Does not return domains which are to be
+     * ignored -- if ignoring is allowed.
+     * 
+     * 
+     * @return
+     */
+    public Set<DomainId> getAllDomains() {
+        if ( _all_domains == null ) {
+            final Set<DomainId> all = new HashSet<DomainId>();
+            all.addAll( getCombinableDomainsGenome0().getAllDomainIds() );
+            all.addAll( getCombinableDomainsGenome1().getAllDomainIds() );
+            if ( isAllowDomainsToBeIgnored() && !getDomainIdsToIgnore().isEmpty() ) {
+                _all_domains = pruneDomains( all );
+            }
+            else {
+                _all_domains = all;
+            }
+        }
+        return _all_domains;
+    }
+
+    private Set<BinaryDomainCombination> getBinaryDomainCombinationsSpecificToGenome( final boolean specific_to_genome_0 ) {
+        final Set<BinaryDomainCombination> specific = new HashSet<BinaryDomainCombination>();
+        final Set<BinaryDomainCombination> bc0 = getCombinableDomainsGenome0().toBinaryDomainCombinations();
+        final Set<BinaryDomainCombination> bc1 = getCombinableDomainsGenome1().toBinaryDomainCombinations();
+        if ( specific_to_genome_0 ) {
+            for( final BinaryDomainCombination binary_domain_combination0 : bc0 ) {
+                if ( !bc1.contains( binary_domain_combination0 ) ) {
+                    specific.add( binary_domain_combination0 );
+                }
+            }
+        }
+        else {
+            for( final BinaryDomainCombination binary_domain_combination1 : bc1 ) {
+                if ( !bc0.contains( binary_domain_combination1 ) ) {
+                    specific.add( binary_domain_combination1 );
+                }
+            }
+        }
+        if ( isAllowDomainsToBeIgnored() && !getDomainIdsToIgnore().isEmpty() ) {
+            return pruneBinaryCombinations( specific );
+        }
+        return specific;
+    }
+
+    public Set<BinaryDomainCombination> getBinaryDomainCombinationsSpecificToGenome0() {
+        if ( _binary_domain_combinations_specific_to_0 == null ) {
+            _binary_domain_combinations_specific_to_0 = getBinaryDomainCombinationsSpecificToGenome( true );
+        }
+        return _binary_domain_combinations_specific_to_0;
+    }
+
+    public Set<BinaryDomainCombination> getBinaryDomainCombinationsSpecificToGenome1() {
+        if ( _binary_domain_combinations_specific_to_1 == null ) {
+            _binary_domain_combinations_specific_to_1 = getBinaryDomainCombinationsSpecificToGenome( false );
+        }
+        return _binary_domain_combinations_specific_to_1;
+    }
+
+    private GenomeWideCombinableDomains getCombinableDomainsGenome0() {
+        return _combinable_domains_genome_0;
+    }
+
+    private GenomeWideCombinableDomains getCombinableDomainsGenome1() {
+        return _combinable_domains_genome_1;
+    }
+
+    private Set<DomainId> getDomainIdsToIgnore() {
+        return _domain_ids_to_ignore;
+    }
+
+    private Set<DomainId> getDomainsSpecificToGenome( final boolean specific_to_genome_0 ) {
+        final Set<DomainId> specific = new HashSet<DomainId>();
+        final Set<DomainId> d0 = getCombinableDomainsGenome0().getAllDomainIds();
+        final Set<DomainId> d1 = getCombinableDomainsGenome1().getAllDomainIds();
+        if ( specific_to_genome_0 ) {
+            for( final DomainId domain0 : d0 ) {
+                if ( !d1.contains( domain0 ) ) {
+                    specific.add( domain0 );
+                }
+            }
+        }
+        else {
+            for( final DomainId domain1 : d1 ) {
+                if ( !d0.contains( domain1 ) ) {
+                    specific.add( domain1 );
+                }
+            }
+        }
+        if ( isAllowDomainsToBeIgnored() && !getDomainIdsToIgnore().isEmpty() ) {
+            return pruneDomains( specific );
+        }
+        return specific;
+    }
+
+    public Set<DomainId> getDomainsSpecificToGenome0() {
+        if ( _domains_specific_to_0 == null ) {
+            _domains_specific_to_0 = getDomainsSpecificToGenome( true );
+        }
+        return _domains_specific_to_0;
+    }
+
+    public Set<DomainId> getDomainsSpecificToGenome1() {
+        if ( _domains_specific_to_1 == null ) {
+            _domains_specific_to_1 = getDomainsSpecificToGenome( false );
+        }
+        return _domains_specific_to_1;
+    }
+
+    public Set<BinaryDomainCombination> getSharedBinaryDomainCombinations() {
+        if ( _shared_binary_domain_combinations == null ) {
+            final Set<BinaryDomainCombination> shared = new HashSet<BinaryDomainCombination>();
+            final Set<BinaryDomainCombination> bc0 = getCombinableDomainsGenome0().toBinaryDomainCombinations();
+            final Set<BinaryDomainCombination> bc1 = getCombinableDomainsGenome1().toBinaryDomainCombinations();
+            for( final BinaryDomainCombination binary_domain_combination0 : bc0 ) {
+                if ( bc1.contains( binary_domain_combination0 ) ) {
+                    shared.add( binary_domain_combination0 );
+                }
+            }
+            _shared_binary_domain_combinations = shared;
+            if ( isAllowDomainsToBeIgnored() && !getDomainIdsToIgnore().isEmpty() ) {
+                _shared_binary_domain_combinations = pruneBinaryCombinations( shared );
+            }
+        }
+        return _shared_binary_domain_combinations;
+    }
+
+    public Set<DomainId> getSharedDomains() {
+        if ( _shared_domains == null ) {
+            final Set<DomainId> shared = new HashSet<DomainId>();
+            final Set<DomainId> d0 = getCombinableDomainsGenome0().getAllDomainIds();
+            final Set<DomainId> d1 = getCombinableDomainsGenome1().getAllDomainIds();
+            for( final DomainId domain0 : d0 ) {
+                if ( d1.contains( domain0 ) ) {
+                    shared.add( domain0 );
+                }
+            }
+            _shared_domains = shared;
+            if ( isAllowDomainsToBeIgnored() && !getDomainIdsToIgnore().isEmpty() ) {
+                _shared_domains = pruneDomains( shared );
+            }
+        }
+        return _shared_domains;
+    }
+
+    private void init() {
+        deleteAllDomainIdsToIgnore();
+        setAllowDomainsToBeIgnored( false );
+    }
+
+    private boolean isAllowDomainsToBeIgnored() {
+        return _allow_domains_to_be_ignored;
+    }
+
+    private Set<BinaryDomainCombination> pruneBinaryCombinations( final Set<BinaryDomainCombination> all ) {
+        final Set<BinaryDomainCombination> pruned = new HashSet<BinaryDomainCombination>();
+        for( final BinaryDomainCombination bc : all ) {
+            if ( ( !getDomainIdsToIgnore().contains( bc.getId0() ) )
+                    && ( !getDomainIdsToIgnore().contains( bc.getId1() ) ) ) {
+                pruned.add( bc );
+            }
+        }
+        return pruned;
+    }
+
+    private Set<DomainId> pruneDomains( final Set<DomainId> all ) {
+        final Set<DomainId> pruned = new HashSet<DomainId>();
+        for( final DomainId d : all ) {
+            if ( !getDomainIdsToIgnore().contains( d ) ) {
+                pruned.add( d );
+            }
+        }
+        return pruned;
+    }
+
+    public void setAllowDomainsToBeIgnored( final boolean allow_domains_to_be_ignored ) {
+        forceRecalculation();
+        _allow_domains_to_be_ignored = allow_domains_to_be_ignored;
+    }
+
+    void setDomainIdsToIgnore( final Set<DomainId> domain_ids_to_ignore ) {
+        forceRecalculation();
+        _domain_ids_to_ignore = domain_ids_to_ignore;
+    }
+}
\ No newline at end of file
diff --git a/forester/java/src/org/forester/surfacing/DomainCountsBasedPairwiseSimilarityCalculator.java b/forester/java/src/org/forester/surfacing/DomainCountsBasedPairwiseSimilarityCalculator.java

new file mode 100644 (file)

index 0000000..d902e75
--- /dev/null
+++ b/forester/java/src/org/forester/surfacing/DomainCountsBasedPairwiseSimilarityCalculator.java
@@ -0,0 +1,41 @@
+// $Id:
+// 04:20:19 cmzmasek Exp $
+//
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.surfacing;
+
+public class DomainCountsBasedPairwiseSimilarityCalculator implements PairwiseDomainSimilarityCalculator {
+
+    public PairwiseDomainSimilarity calculateSimilarity( final CombinableDomains domains_1,
+                                                         final CombinableDomains domains_2 ) {
+        if ( !domains_1.getKeyDomain().equals( domains_2.getKeyDomain() ) ) {
+            throw new IllegalArgumentException( "attempt to calculate similarity between domain collection with different keys" );
+        }
+        final int dc1 = domains_1.getKeyDomainCount();
+        final int dc2 = domains_2.getKeyDomainCount();
+        return new CountsBasedPairwiseDomainSimilarity( dc1 - dc2, dc1 + dc2 );
+    }
+}
diff --git a/forester/java/src/org/forester/surfacing/DomainCountsDifferenceUtil.java b/forester/java/src/org/forester/surfacing/DomainCountsDifferenceUtil.java

new file mode 100644 (file)

index 0000000..a36aa12
--- /dev/null
+++ b/forester/java/src/org/forester/surfacing/DomainCountsDifferenceUtil.java
@@ -0,0 +1,825 @@
+// $Id:
+// $
+//
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.surfacing;
+
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.Writer;
+import java.text.DecimalFormat;
+import java.text.NumberFormat;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.SortedMap;
+import java.util.SortedSet;
+import java.util.TreeMap;
+import java.util.TreeSet;
+
+import org.forester.go.GoId;
+import org.forester.go.GoTerm;
+import org.forester.util.BasicDescriptiveStatistics;
+import org.forester.util.DescriptiveStatistics;
+import org.forester.util.ForesterUtil;
+
+/*
+ * Poorly designed static class which essential has one method:
+ * calculateCopyNumberDifferences.
+ */
+public final class DomainCountsDifferenceUtil {
+
+    private final static NumberFormat          FORMATTER                                   = new DecimalFormat( "0.0E0" );
+    private static final COPY_CALCULATION_MODE COPY_CALC_MODE_FOR_HIGH_COPY_TARGET_SPECIES = COPY_CALCULATION_MODE.MIN;
+    private static final COPY_CALCULATION_MODE COPY_CALC_MODE_FOR_HIGH_COPY_BASE_SPECIES   = COPY_CALCULATION_MODE.MIN;
+    private static final COPY_CALCULATION_MODE COPY_CALC_MODE_FOR_LOW_COPY_SPECIES         = COPY_CALCULATION_MODE.MAX;
+    private static final String                PLUS_MINUS_PROTEINS_FILE_DOM_SUFFIX         = ".prot";
+
+    //FIXME really needs to be tested! 
+    private static void addCounts( final SortedMap<BinaryDomainCombination, List<Integer>> copy_counts,
+                                   final BinaryDomainCombination dc,
+                                   final GenomeWideCombinableDomains genome,
+                                   final Set<BinaryDomainCombination> bdc ) {
+        if ( !copy_counts.containsKey( dc ) ) {
+            copy_counts.put( dc, new ArrayList<Integer>() );
+        }
+        if ( bdc.contains( dc )
+                && ( ( ( BasicCombinableDomains ) genome.get( dc.getId0() ) ).getCombiningDomains().get( dc.getId1() ) != null ) ) {
+            final int count = ( ( BasicCombinableDomains ) genome.get( dc.getId0() ) ).getCombiningDomains().get( dc
+                    .getId1() );
+            copy_counts.get( dc ).add( count );
+        }
+        else {
+            copy_counts.get( dc ).add( 0 );
+        }
+    }
+
+    private static void addCounts( final SortedMap<DomainId, List<Integer>> copy_counts,
+                                   final DomainId domain,
+                                   final GenomeWideCombinableDomains genome ) {
+        if ( !copy_counts.containsKey( domain ) ) {
+            copy_counts.put( domain, new ArrayList<Integer>() );
+        }
+        if ( genome.contains( domain ) ) {
+            copy_counts.get( domain ).add( genome.get( domain ).getKeyDomainProteinsCount() );
+        }
+        else {
+            copy_counts.get( domain ).add( 0 );
+        }
+    }
+
+    private static StringBuilder addGoInformation( final DomainId d,
+                                                   final Map<DomainId, List<GoId>> domain_id_to_go_ids_map,
+                                                   final Map<GoId, GoTerm> go_id_to_term_map ) {
+        final StringBuilder sb = new StringBuilder();
+        if ( ( domain_id_to_go_ids_map == null ) || domain_id_to_go_ids_map.isEmpty()
+                || !domain_id_to_go_ids_map.containsKey( d ) ) {
+            return sb;
+        }
+        final List<GoId> go_ids = domain_id_to_go_ids_map.get( d );
+        for( int i = 0; i < go_ids.size(); ++i ) {
+            final GoId go_id = go_ids.get( i );
+            if ( go_id_to_term_map.containsKey( go_id ) ) {
+                appendGoTerm( sb, go_id_to_term_map.get( go_id ) );
+                sb.append( "<br>" );
+            }
+            else {
+                sb.append( "go id \"" + go_id + "\" not found [" + d.getId() + "]" );
+            }
+        }
+        return sb;
+    }
+
+    private static void appendGoTerm( final StringBuilder sb, final GoTerm go_term ) {
+        final GoId go_id = go_term.getGoId();
+        sb.append( "<a href=\"" + SurfacingConstants.AMIGO_LINK + go_id + "\" target=\"amigo_window\">" + go_id
+                + "</a>" );
+        sb.append( ":" );
+        sb.append( go_term.getName() );
+        sb.append( " [" );
+        sb.append( go_term.getGoNameSpace().toShortString() );
+        sb.append( "]" );
+    }
+
+    public static void calculateCopyNumberDifferences( final List<GenomeWideCombinableDomains> genomes,
+                                                       final SortedMap<Species, List<Protein>> protein_lists_per_species,
+                                                       final List<String> high_copy_base_species,
+                                                       final List<String> high_copy_target_species,
+                                                       final List<String> low_copy_species,
+                                                       final int min_diff,
+                                                       final Double factor,
+                                                       final File plain_output_dom,
+                                                       final File html_output_dom,
+                                                       final File html_output_dc,
+                                                       final Map<DomainId, List<GoId>> domain_id_to_go_ids_map,
+                                                       final Map<GoId, GoTerm> go_id_to_term_map,
+                                                       final File all_domains_go_ids_out_dom,
+                                                       final File passing_domains_go_ids_out_dom,
+                                                       final File proteins_file_base ) throws IOException {
+        if ( genomes.size() < 1 ) {
+            throw new IllegalArgumentException( "attempt to use empty list of genomes for domain difference calculation" );
+        }
+        if ( ( high_copy_base_species.size() < 1 ) || ( low_copy_species.size() < 1 ) ) {
+            throw new IllegalArgumentException( "attempt to use empty list of species for domain difference calculation" );
+        }
+        if ( high_copy_base_species.contains( high_copy_target_species )
+                || low_copy_species.contains( high_copy_target_species ) ) {
+            throw new IllegalArgumentException( "species [" + high_copy_target_species
+                    + "] appears in other list as well" );
+        }
+        if ( min_diff < 0 ) {
+            throw new IllegalArgumentException( "attempt to use negative addition [" + min_diff + "]" );
+        }
+        if ( factor <= 0.0 ) {
+            throw new IllegalArgumentException( "attempt to use factor equal or smaller than 0.0 [" + factor + "]" );
+        }
+        SurfacingUtil.checkForOutputFileWriteability( plain_output_dom );
+        SurfacingUtil.checkForOutputFileWriteability( html_output_dom );
+        SurfacingUtil.checkForOutputFileWriteability( html_output_dc );
+        SurfacingUtil.checkForOutputFileWriteability( all_domains_go_ids_out_dom );
+        SurfacingUtil.checkForOutputFileWriteability( passing_domains_go_ids_out_dom );
+        final Writer plain_writer = new BufferedWriter( new FileWriter( plain_output_dom ) );
+        final Writer html_writer = new BufferedWriter( new FileWriter( html_output_dom ) );
+        final Writer html_writer_dc = new BufferedWriter( new FileWriter( html_output_dc ) );
+        final Writer all_gos_writer = new BufferedWriter( new FileWriter( all_domains_go_ids_out_dom ) );
+        final Writer passing_gos_writer = new BufferedWriter( new FileWriter( passing_domains_go_ids_out_dom ) );
+        final SortedMap<DomainId, Double> high_copy_base_values = new TreeMap<DomainId, Double>();
+        final SortedMap<DomainId, Double> high_copy_target_values = new TreeMap<DomainId, Double>();
+        final SortedMap<DomainId, Double> low_copy_values = new TreeMap<DomainId, Double>();
+        final SortedMap<DomainId, List<Integer>> high_copy_base_copy_counts = new TreeMap<DomainId, List<Integer>>();
+        final SortedMap<DomainId, List<Integer>> high_copy_target_copy_counts = new TreeMap<DomainId, List<Integer>>();
+        final SortedMap<DomainId, List<Integer>> low_copy_copy_counts = new TreeMap<DomainId, List<Integer>>();
+        final SortedSet<DomainId> all_domains = new TreeSet<DomainId>();
+        final SortedMap<BinaryDomainCombination, Double> high_copy_base_values_dc = new TreeMap<BinaryDomainCombination, Double>();
+        final SortedMap<BinaryDomainCombination, Double> high_copy_target_values_dc = new TreeMap<BinaryDomainCombination, Double>();
+        final SortedMap<BinaryDomainCombination, Double> low_copy_values_dc = new TreeMap<BinaryDomainCombination, Double>();
+        final SortedMap<BinaryDomainCombination, List<Integer>> high_copy_base_copy_counts_dc = new TreeMap<BinaryDomainCombination, List<Integer>>();
+        final SortedMap<BinaryDomainCombination, List<Integer>> high_copy_target_copy_counts_dc = new TreeMap<BinaryDomainCombination, List<Integer>>();
+        final SortedMap<BinaryDomainCombination, List<Integer>> low_copy_copy_counts_dc = new TreeMap<BinaryDomainCombination, List<Integer>>();
+        final SortedSet<BinaryDomainCombination> all_dcs = new TreeSet<BinaryDomainCombination>();
+        final Map<String, Set<BinaryDomainCombination>> bdcs_per_genome = new HashMap<String, Set<BinaryDomainCombination>>();
+        final SortedSet<GoId> go_ids_of_passing_domains = new TreeSet<GoId>();
+        final SortedSet<GoId> go_ids_all = new TreeSet<GoId>();
+        for( final GenomeWideCombinableDomains genome : genomes ) {
+            final SortedSet<DomainId> domains = genome.getAllDomainIds();
+            final SortedSet<BinaryDomainCombination> dcs = genome.toBinaryDomainCombinations();
+            final String species = genome.getSpecies().getSpeciesId();
+            bdcs_per_genome.put( species, genome.toBinaryDomainCombinations() );
+            for( final DomainId d : domains ) {
+                all_domains.add( d );
+                if ( domain_id_to_go_ids_map.containsKey( d ) ) {
+                    go_ids_all.addAll( domain_id_to_go_ids_map.get( d ) );
+                }
+            }
+            for( final BinaryDomainCombination dc : dcs ) {
+                all_dcs.add( dc );
+            }
+        }
+        for( final DomainId domain : all_domains ) {
+            for( final GenomeWideCombinableDomains genome : genomes ) {
+                final String species = genome.getSpecies().getSpeciesId();
+                if ( high_copy_base_species.contains( species ) ) {
+                    DomainCountsDifferenceUtil.addCounts( high_copy_base_copy_counts, domain, genome );
+                }
+                if ( high_copy_target_species.contains( species ) ) {
+                    DomainCountsDifferenceUtil.addCounts( high_copy_target_copy_counts, domain, genome );
+                }
+                if ( low_copy_species.contains( species ) ) {
+                    DomainCountsDifferenceUtil.addCounts( low_copy_copy_counts, domain, genome );
+                }
+            }
+        }
+        for( final BinaryDomainCombination dc : all_dcs ) {
+            for( final GenomeWideCombinableDomains genome : genomes ) {
+                final String species = genome.getSpecies().getSpeciesId();
+                if ( high_copy_base_species.contains( species ) ) {
+                    DomainCountsDifferenceUtil.addCounts( high_copy_base_copy_counts_dc, dc, genome, bdcs_per_genome
+                            .get( species ) );
+                }
+                if ( high_copy_target_species.contains( species ) ) {
+                    DomainCountsDifferenceUtil.addCounts( high_copy_target_copy_counts_dc, dc, genome, bdcs_per_genome
+                            .get( species ) );
+                }
+                if ( low_copy_species.contains( species ) ) {
+                    DomainCountsDifferenceUtil.addCounts( low_copy_copy_counts_dc, dc, genome, bdcs_per_genome
+                            .get( species ) );
+                }
+            }
+        }
+        for( final DomainId domain : all_domains ) {
+            calculateDomainCountsBasedValue( high_copy_target_values,
+                                             high_copy_target_copy_counts,
+                                             domain,
+                                             COPY_CALC_MODE_FOR_HIGH_COPY_TARGET_SPECIES );
+            calculateDomainCountsBasedValue( high_copy_base_values,
+                                             high_copy_base_copy_counts,
+                                             domain,
+                                             COPY_CALC_MODE_FOR_HIGH_COPY_BASE_SPECIES );
+            calculateDomainCountsBasedValue( low_copy_values,
+                                             low_copy_copy_counts,
+                                             domain,
+                                             COPY_CALC_MODE_FOR_LOW_COPY_SPECIES );
+        }
+        for( final BinaryDomainCombination dc : all_dcs ) {
+            calculateDomainCountsBasedValue( high_copy_target_values_dc,
+                                             high_copy_target_copy_counts_dc,
+                                             dc,
+                                             COPY_CALC_MODE_FOR_HIGH_COPY_TARGET_SPECIES );
+            calculateDomainCountsBasedValue( high_copy_base_values_dc,
+                                             high_copy_base_copy_counts_dc,
+                                             dc,
+                                             COPY_CALC_MODE_FOR_HIGH_COPY_BASE_SPECIES );
+            calculateDomainCountsBasedValue( low_copy_values_dc,
+                                             low_copy_copy_counts_dc,
+                                             dc,
+                                             COPY_CALC_MODE_FOR_LOW_COPY_SPECIES );
+        }
+        writeDomainValuesToFiles( genomes,
+                                  high_copy_base_species,
+                                  high_copy_target_species,
+                                  low_copy_species,
+                                  min_diff,
+                                  factor,
+                                  domain_id_to_go_ids_map,
+                                  go_id_to_term_map,
+                                  plain_writer,
+                                  html_writer,
+                                  proteins_file_base,
+                                  high_copy_base_values,
+                                  high_copy_target_values,
+                                  low_copy_values,
+                                  all_domains,
+                                  go_ids_of_passing_domains,
+                                  protein_lists_per_species );
+        writeDomainCombinationValuesToFiles( genomes,
+                                             high_copy_base_species,
+                                             high_copy_target_species,
+                                             low_copy_species,
+                                             min_diff,
+                                             factor,
+                                             html_writer_dc,
+                                             high_copy_base_values_dc,
+                                             high_copy_target_values_dc,
+                                             low_copy_values_dc,
+                                             all_dcs,
+                                             bdcs_per_genome );
+        writeGoIdsToFile( all_gos_writer, go_ids_all );
+        writeGoIdsToFile( passing_gos_writer, go_ids_of_passing_domains );
+    }
+
+    private static void calculateDomainCountsBasedValue( final SortedMap<BinaryDomainCombination, Double> copy_values,
+                                                         final SortedMap<BinaryDomainCombination, List<Integer>> copy_counts,
+                                                         final BinaryDomainCombination bdc,
+                                                         final COPY_CALCULATION_MODE copy_calc_mode ) {
+        if ( copy_counts.containsKey( bdc ) ) {
+            switch ( copy_calc_mode ) {
+                case MAX:
+                    DomainCountsDifferenceUtil.calculateMaxCount( copy_values, copy_counts, bdc );
+                    break;
+                case MIN:
+                    DomainCountsDifferenceUtil.calculateMinCount( copy_values, copy_counts, bdc );
+                    break;
+                case MEAN:
+                    DomainCountsDifferenceUtil.calculateMeanCount( copy_values, copy_counts, bdc );
+                    break;
+                case MEDIAN:
+                    DomainCountsDifferenceUtil.calculateMedianCount( copy_values, copy_counts, bdc );
+                    break;
+                default:
+                    throw new IllegalArgumentException();
+            }
+        }
+        else {
+            copy_values.put( bdc, Double.valueOf( 0.0 ) );
+        }
+    }
+
+    private static void calculateDomainCountsBasedValue( final SortedMap<DomainId, Double> copy_values,
+                                                         final SortedMap<DomainId, List<Integer>> copy_counts,
+                                                         final DomainId domain,
+                                                         final COPY_CALCULATION_MODE copy_calc_mode ) {
+        if ( copy_counts.containsKey( domain ) ) {
+            switch ( copy_calc_mode ) {
+                case MAX:
+                    DomainCountsDifferenceUtil.calculateMaxCount( copy_values, copy_counts, domain );
+                    break;
+                case MIN:
+                    DomainCountsDifferenceUtil.calculateMinCount( copy_values, copy_counts, domain );
+                    break;
+                case MEAN:
+                    DomainCountsDifferenceUtil.calculateMeanCount( copy_values, copy_counts, domain );
+                    break;
+                case MEDIAN:
+                    DomainCountsDifferenceUtil.calculateMedianCount( copy_values, copy_counts, domain );
+                    break;
+                default:
+                    throw new IllegalArgumentException();
+            }
+        }
+        else {
+            copy_values.put( domain, Double.valueOf( 0.0 ) );
+        }
+    }
+
+    private static void calculateMaxCount( final SortedMap<BinaryDomainCombination, Double> results,
+                                           final SortedMap<BinaryDomainCombination, List<Integer>> copy_counts,
+                                           final BinaryDomainCombination bdc ) {
+        final List<Integer> counts = copy_counts.get( bdc );
+        int max = 0;
+        for( final Integer count : counts ) {
+            if ( count > max ) {
+                max = count;
+            }
+        }
+        results.put( bdc, ( double ) max );
+    }
+
+    private static void calculateMaxCount( final SortedMap<DomainId, Double> results,
+                                           final SortedMap<DomainId, List<Integer>> copy_counts,
+                                           final DomainId domain ) {
+        final List<Integer> counts = copy_counts.get( domain );
+        int max = 0;
+        for( final Integer count : counts ) {
+            if ( count > max ) {
+                max = count;
+            }
+        }
+        results.put( domain, ( double ) max );
+    }
+
+    private static void calculateMeanCount( final SortedMap<BinaryDomainCombination, Double> results,
+                                            final SortedMap<BinaryDomainCombination, List<Integer>> copy_counts,
+                                            final BinaryDomainCombination bdc ) {
+        final List<Integer> counts = copy_counts.get( bdc );
+        int sum = 0;
+        for( final Integer count : counts ) {
+            sum += count;
+        }
+        results.put( bdc, ( ( double ) sum ) / ( ( double ) counts.size() ) );
+    }
+
+    private static void calculateMeanCount( final SortedMap<DomainId, Double> results,
+                                            final SortedMap<DomainId, List<Integer>> copy_counts,
+                                            final DomainId domain ) {
+        final List<Integer> counts = copy_counts.get( domain );
+        int sum = 0;
+        for( final Integer count : counts ) {
+            sum += count;
+        }
+        results.put( domain, ( ( double ) sum ) / ( ( double ) counts.size() ) );
+    }
+
+    private static void calculateMedianCount( final SortedMap<BinaryDomainCombination, Double> results,
+                                              final SortedMap<BinaryDomainCombination, List<Integer>> copy_counts,
+                                              final BinaryDomainCombination bdc ) {
+        final List<Integer> counts = copy_counts.get( bdc );
+        final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
+        for( final Integer count : counts ) {
+            stats.addValue( count );
+        }
+        results.put( bdc, stats.median() );
+    }
+
+    private static void calculateMedianCount( final SortedMap<DomainId, Double> results,
+                                              final SortedMap<DomainId, List<Integer>> copy_counts,
+                                              final DomainId domain ) {
+        final List<Integer> counts = copy_counts.get( domain );
+        final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
+        for( final Integer count : counts ) {
+            stats.addValue( count );
+        }
+        results.put( domain, stats.median() );
+    }
+
+    private static void calculateMinCount( final SortedMap<BinaryDomainCombination, Double> results,
+                                           final SortedMap<BinaryDomainCombination, List<Integer>> copy_counts,
+                                           final BinaryDomainCombination bdc ) {
+        final List<Integer> counts = copy_counts.get( bdc );
+        int min = Integer.MAX_VALUE;
+        for( final Integer count : counts ) {
+            if ( count < min ) {
+                min = count;
+            }
+        }
+        results.put( bdc, ( double ) min );
+    }
+
+    private static void calculateMinCount( final SortedMap<DomainId, Double> results,
+                                           final SortedMap<DomainId, List<Integer>> copy_counts,
+                                           final DomainId domain ) {
+        final List<Integer> counts = copy_counts.get( domain );
+        int min = Integer.MAX_VALUE;
+        for( final Integer count : counts ) {
+            if ( count < min ) {
+                min = count;
+            }
+        }
+        results.put( domain, ( double ) min );
+    }
+
+    private static String combinableDomaindToString( final CombinableDomains cd ) {
+        final StringBuilder sb = new StringBuilder();
+        sb.append( cd.getKeyDomainProteinsCount() );
+        sb.append( "\t[" );
+        sb.append( FORMATTER.format( cd.getKeyDomainConfidenceDescriptiveStatistics().median() ) );
+        sb.append( "]" );
+        return sb.toString();
+    }
+
+    private static String combinableDomaindToStringHtml( final CombinableDomains cd ) {
+        final StringBuilder sb = new StringBuilder();
+        sb.append( "[" );
+        sb.append( cd.getKeyDomainCount() );
+        sb.append( ", <b>" );
+        sb.append( cd.getKeyDomainProteinsCount() );
+        sb.append( "</b>, " );
+        sb.append( cd.getNumberOfCombinableDomains() );
+        sb.append( "]</td><td>[" );
+        sb.append( FORMATTER.format( cd.getKeyDomainConfidenceDescriptiveStatistics().median() ) );
+        sb.append( "]</td><td>" );
+        sb.append( cd.getCombiningDomainIdsAsStringBuilder() );
+        return sb.toString();
+    }
+
+    private static void writeCopyNumberValues( final SortedMap<BinaryDomainCombination, Double> copy_means,
+                                               final BinaryDomainCombination bdc,
+                                               final GenomeWideCombinableDomains genome,
+                                               final Map<String, Set<BinaryDomainCombination>> bdcs_per_genome,
+                                               final String species,
+                                               final Writer html_writer,
+                                               final String color ) throws IOException {
+        html_writer.write( "<td> " );
+        if ( !ForesterUtil.isEmpty( color ) ) {
+            html_writer.write( "<font color=\"" + color + "\">" );
+        }
+        html_writer.write( "<b>" + species + ":</b> " );
+        if ( !ForesterUtil.isEmpty( color ) ) {
+            html_writer.write( "</font>" );
+        }
+        html_writer.write( "</td><td>" );
+        if ( bdcs_per_genome.get( species ).contains( bdc ) && ( copy_means.get( bdc ) > 0 ) ) {
+            final int count = ( ( BasicCombinableDomains ) genome.get( bdc.getId0() ) ).getCombiningDomains().get( bdc
+                    .getId1() );
+            html_writer.write( count + "" );
+        }
+        else {
+            html_writer.write( "0" );
+        }
+        html_writer.write( "</td>" );
+    }
+
+    private static void writeCopyNumberValues( final SortedMap<DomainId, Double> copy_means,
+                                               final DomainId domain,
+                                               final GenomeWideCombinableDomains genome,
+                                               final String species,
+                                               final Writer plain_writer,
+                                               final Writer html_writer,
+                                               final String color ) throws IOException {
+        plain_writer.write( "  " + species + "\t" );
+        html_writer.write( "<td> " );
+        if ( !ForesterUtil.isEmpty( color ) ) {
+            html_writer.write( "<font color=\"" + color + "\">" );
+        }
+        html_writer.write( "<b>" + species + ":</b> " );
+        if ( !ForesterUtil.isEmpty( color ) ) {
+            html_writer.write( "</font>" );
+        }
+        html_writer.write( "</td><td>" );
+        if ( genome.contains( domain ) && ( copy_means.get( domain ) > 0 ) ) {
+            plain_writer.write( DomainCountsDifferenceUtil.combinableDomaindToString( genome.get( domain ) ) );
+            html_writer.write( DomainCountsDifferenceUtil.combinableDomaindToStringHtml( genome.get( domain ) ) );
+        }
+        else {
+            plain_writer.write( "0" );
+            html_writer.write( "0" );
+        }
+        html_writer.write( "</td>" );
+        plain_writer.write( SurfacingConstants.NL );
+    }
+
+    private static void writeDomainCombinationValuesToFiles( final List<GenomeWideCombinableDomains> genomes,
+                                                             final List<String> high_copy_base_species,
+                                                             final List<String> high_copy_target_species,
+                                                             final List<String> low_copy_species,
+                                                             final int min_diff,
+                                                             final Double factor,
+                                                             final Writer html_writer,
+                                                             final SortedMap<BinaryDomainCombination, Double> high_copy_base_values,
+                                                             final SortedMap<BinaryDomainCombination, Double> high_copy_target_values,
+                                                             final SortedMap<BinaryDomainCombination, Double> low_copy_values,
+                                                             final SortedSet<BinaryDomainCombination> all_bdcs,
+                                                             final Map<String, Set<BinaryDomainCombination>> bdcs_per_genome )
+            throws IOException {
+        int counter = 0;
+        int total_absense_counter = 0;
+        int not_total_absense_counter = 0;
+        SurfacingUtil.addHtmlHead( html_writer, "Binary Domain Combination Copy Differences" );
+        html_writer.write( "<body><table>" );
+        for( final BinaryDomainCombination bdc : all_bdcs ) {
+            if ( ( high_copy_base_values.get( bdc ) > 0 ) && ( high_copy_target_values.get( bdc ) > 0 )
+                    && ( high_copy_base_values.get( bdc ) >= low_copy_values.get( bdc ) ) ) {
+                if ( high_copy_target_values.get( bdc ) >= min_diff + ( factor * low_copy_values.get( bdc ) ) ) {
+                    if ( low_copy_values.get( bdc ) <= 0.0 ) {
+                        ++total_absense_counter;
+                    }
+                    else {
+                        ++not_total_absense_counter;
+                    }
+                    ++counter;
+                    html_writer.write( "<tr><td><a href=\"" + SurfacingConstants.PFAM_FAMILY_ID_LINK + bdc.getId0()
+                            + "\">" + bdc.getId0() + "</a> = <a href=\"" + SurfacingConstants.PFAM_FAMILY_ID_LINK
+                            + bdc.getId1() + "\">" + bdc.getId1() + "</a>" );
+                    html_writer.write( "</td><td>" );
+                    html_writer.write( "<table>" );
+                    for( final GenomeWideCombinableDomains genome : genomes ) {
+                        final String species = genome.getSpecies().getSpeciesId();
+                        if ( high_copy_target_species.contains( species ) ) {
+                            html_writer.write( "<tr>" );
+                            writeCopyNumberValues( high_copy_target_values,
+                                                   bdc,
+                                                   genome,
+                                                   bdcs_per_genome,
+                                                   species,
+                                                   html_writer,
+                                                   "#0000FF" );
+                            html_writer.write( "</tr>" );
+                        }
+                        else if ( low_copy_species.contains( species ) ) {
+                            html_writer.write( "<tr>" );
+                            writeCopyNumberValues( low_copy_values,
+                                                   bdc,
+                                                   genome,
+                                                   bdcs_per_genome,
+                                                   species,
+                                                   html_writer,
+                                                   "#A0A0A0" );
+                            html_writer.write( "</tr>" );
+                        }
+                        else if ( high_copy_base_species.contains( species ) ) {
+                            html_writer.write( "<tr>" );
+                            writeCopyNumberValues( high_copy_base_values,
+                                                   bdc,
+                                                   genome,
+                                                   bdcs_per_genome,
+                                                   species,
+                                                   html_writer,
+                                                   "#404040" );
+                            html_writer.write( "</tr>" );
+                        }
+                    }
+                    html_writer.write( "</table>" );
+                    html_writer.write( "</td></tr>" );
+                    html_writer.write( SurfacingConstants.NL );
+                }
+            }
+        }
+        html_writer.write( "</table>" );
+        html_writer.write( SurfacingConstants.NL );
+        html_writer.write( "<hr>" );
+        html_writer.write( SurfacingConstants.NL );
+        html_writer.write( "Rule 1: high-copy-base > 0 && high-copy-target > 0 && high-copy-base >= low-copy" );
+        html_writer.write( "<br>" );
+        html_writer.write( SurfacingConstants.NL );
+        html_writer.write( "Rule 2: high-copy-target >= minimal-difference + ( factor * low-copy )" );
+        html_writer.write( "<br>" );
+        html_writer.write( SurfacingConstants.NL );
+        html_writer.write( "Calculation mode for high copy target : " + COPY_CALC_MODE_FOR_HIGH_COPY_TARGET_SPECIES );
+        html_writer.write( SurfacingConstants.NL );
+        html_writer.write( "<br>" );
+        html_writer.write( "Calculation mode for high copy base : " + COPY_CALC_MODE_FOR_HIGH_COPY_BASE_SPECIES );
+        html_writer.write( SurfacingConstants.NL );
+        html_writer.write( "<br>" );
+        html_writer.write( "Calculation mode for low copy : " + COPY_CALC_MODE_FOR_LOW_COPY_SPECIES );
+        html_writer.write( SurfacingConstants.NL );
+        html_writer.write( "<br>" );
+        html_writer.write( "Minimal difference : " + min_diff );
+        html_writer.write( SurfacingConstants.NL );
+        html_writer.write( "<br>" );
+        html_writer.write( "Factor : " + factor );
+        html_writer.write( SurfacingConstants.NL );
+        html_writer.write( "<br>" );
+        html_writer.write( "Lower copy binary domain combinations : " + counter );
+        html_writer.write( SurfacingConstants.NL );
+        html_writer.write( "<br>" );
+        html_writer.write( "Total absence : " + total_absense_counter );
+        html_writer.write( SurfacingConstants.NL );
+        html_writer.write( "<br>" );
+        html_writer.write( "Not total absence : " + not_total_absense_counter );
+        html_writer.write( SurfacingConstants.NL );
+        html_writer.write( "<br>" );
+        html_writer.write( "Total binary domain combinations : " + all_bdcs.size() );
+        html_writer.write( SurfacingConstants.NL );
+        html_writer.write( "<hr>" );
+        html_writer.write( SurfacingConstants.NL );
+        html_writer.write( "</body></html>" );
+        html_writer.write( SurfacingConstants.NL );
+        html_writer.close();
+    }
+
+    private static void writeDomainValuesToFiles( final List<GenomeWideCombinableDomains> genomes,
+                                                  final List<String> high_copy_base_species,
+                                                  final List<String> high_copy_target_species,
+                                                  final List<String> low_copy_species,
+                                                  final int min_diff,
+                                                  final Double factor,
+                                                  final Map<DomainId, List<GoId>> domain_id_to_go_ids_map,
+                                                  final Map<GoId, GoTerm> go_id_to_term_map,
+                                                  final Writer plain_writer,
+                                                  final Writer html_writer,
+                                                  final File proteins_file_base,
+                                                  final SortedMap<DomainId, Double> high_copy_base_values,
+                                                  final SortedMap<DomainId, Double> high_copy_target_values,
+                                                  final SortedMap<DomainId, Double> low_copy_values,
+                                                  final SortedSet<DomainId> all_domains,
+                                                  final SortedSet<GoId> go_ids_of_passing_domains,
+                                                  final SortedMap<Species, List<Protein>> protein_lists_per_species )
+            throws IOException {
+        int counter = 0;
+        int total_absense_counter = 0;
+        int not_total_absense_counter = 0;
+        SurfacingUtil.addHtmlHead( html_writer, "Domain Copy Differences" );
+        html_writer.write( "<body><table>" );
+        for( final DomainId domain_id : all_domains ) {
+            if ( ( high_copy_base_values.get( domain_id ) > 0 ) && ( high_copy_target_values.get( domain_id ) > 0 )
+                    && ( high_copy_base_values.get( domain_id ) >= low_copy_values.get( domain_id ) ) ) {
+                if ( high_copy_target_values.get( domain_id ) >= min_diff
+                        + ( factor * low_copy_values.get( domain_id ) ) ) {
+                    if ( low_copy_values.get( domain_id ) <= 0.0 ) {
+                        ++total_absense_counter;
+                    }
+                    else {
+                        ++not_total_absense_counter;
+                    }
+                    ++counter;
+                    writeProteinsToFile( proteins_file_base, protein_lists_per_species, domain_id );
+                    if ( domain_id_to_go_ids_map.containsKey( domain_id ) ) {
+                        go_ids_of_passing_domains.addAll( domain_id_to_go_ids_map.get( domain_id ) );
+                    }
+                    plain_writer.write( domain_id.getId() );
+                    plain_writer.write( SurfacingConstants.NL );
+                    html_writer.write( "<tr><td><a href=\"" + SurfacingConstants.PFAM_FAMILY_ID_LINK
+                            + domain_id.getId() + "\">" + domain_id.getId() + "</a></td><td>" );
+                    html_writer.write( addGoInformation( domain_id, domain_id_to_go_ids_map, go_id_to_term_map )
+                            .toString() );
+                    html_writer.write( "</td><td>" );
+                    html_writer.write( "<table>" );
+                    for( final GenomeWideCombinableDomains genome : genomes ) {
+                        final String species = genome.getSpecies().getSpeciesId();
+                        if ( high_copy_target_species.contains( species ) ) {
+                            html_writer.write( "<tr>" );
+                            writeCopyNumberValues( high_copy_target_values,
+                                                   domain_id,
+                                                   genome,
+                                                   species,
+                                                   plain_writer,
+                                                   html_writer,
+                                                   "#0000FF" );
+                            html_writer.write( "</tr>" );
+                        }
+                        else if ( low_copy_species.contains( species ) ) {
+                            html_writer.write( "<tr>" );
+                            writeCopyNumberValues( low_copy_values,
+                                                   domain_id,
+                                                   genome,
+                                                   species,
+                                                   plain_writer,
+                                                   html_writer,
+                                                   "#A0A0A0" );
+                            html_writer.write( "</tr>" );
+                        }
+                        else if ( high_copy_base_species.contains( species ) ) {
+                            html_writer.write( "<tr>" );
+                            writeCopyNumberValues( high_copy_base_values,
+                                                   domain_id,
+                                                   genome,
+                                                   species,
+                                                   plain_writer,
+                                                   html_writer,
+                                                   "#404040" );
+                            html_writer.write( "</tr>" );
+                        }
+                    }
+                    html_writer.write( "</table>" );
+                    html_writer.write( "</td></tr>" );
+                    html_writer.write( SurfacingConstants.NL );
+                    plain_writer.write( SurfacingConstants.NL );
+                }
+            }
+        }
+        html_writer.write( "</table>" );
+        html_writer.write( SurfacingConstants.NL );
+        html_writer.write( "<hr>" );
+        html_writer.write( SurfacingConstants.NL );
+        html_writer.write( "Rule 1: high-copy-base > 0 && high-copy-target > 0 && high-copy-base >= low-copy" );
+        html_writer.write( "<br>" );
+        html_writer.write( SurfacingConstants.NL );
+        html_writer.write( "Rule 2: high-copy-target >= minimal-difference + ( factor * low-copy )" );
+        html_writer.write( "<br>" );
+        html_writer.write( SurfacingConstants.NL );
+        html_writer.write( "Calculation mode for high copy target : " + COPY_CALC_MODE_FOR_HIGH_COPY_TARGET_SPECIES );
+        html_writer.write( SurfacingConstants.NL );
+        html_writer.write( "<br>" );
+        html_writer.write( "Calculation mode for high copy base : " + COPY_CALC_MODE_FOR_HIGH_COPY_BASE_SPECIES );
+        html_writer.write( SurfacingConstants.NL );
+        html_writer.write( "<br>" );
+        html_writer.write( "Calculation mode for low copy : " + COPY_CALC_MODE_FOR_LOW_COPY_SPECIES );
+        html_writer.write( SurfacingConstants.NL );
+        html_writer.write( "<br>" );
+        html_writer.write( "Minimal difference : " + min_diff );
+        html_writer.write( SurfacingConstants.NL );
+        html_writer.write( "<br>" );
+        html_writer.write( "Factor : " + factor );
+        html_writer.write( SurfacingConstants.NL );
+        html_writer.write( "<br>" );
+        html_writer.write( "Lower copy domains : " + counter );
+        html_writer.write( SurfacingConstants.NL );
+        html_writer.write( "<br>" );
+        html_writer.write( "Total absence : " + total_absense_counter );
+        html_writer.write( SurfacingConstants.NL );
+        html_writer.write( "<br>" );
+        html_writer.write( "Not total absence : " + not_total_absense_counter );
+        html_writer.write( SurfacingConstants.NL );
+        html_writer.write( "<br>" );
+        html_writer.write( "Total domains : " + all_domains.size() );
+        html_writer.write( SurfacingConstants.NL );
+        html_writer.write( "<hr>" );
+        html_writer.write( SurfacingConstants.NL );
+        html_writer.write( "</body></html>" );
+        html_writer.write( SurfacingConstants.NL );
+        html_writer.close();
+        plain_writer.write( "# Rule 1: high-copy-base > 0 && high-copy-target > 0 && high-copy-base >= low-copy" );
+        plain_writer.write( SurfacingConstants.NL );
+        plain_writer.write( "# Rule 2: high-copy-target >= minimal-difference + ( factor * low-copy )" );
+        plain_writer.write( SurfacingConstants.NL );
+        plain_writer.write( "# Calculation mode for high copy target: " + COPY_CALC_MODE_FOR_HIGH_COPY_TARGET_SPECIES );
+        plain_writer.write( SurfacingConstants.NL );
+        plain_writer.write( "# Calculation mode for high copy base  : " + COPY_CALC_MODE_FOR_HIGH_COPY_BASE_SPECIES );
+        plain_writer.write( SurfacingConstants.NL );
+        plain_writer.write( "# Calculation mode for low copy        : " + COPY_CALC_MODE_FOR_LOW_COPY_SPECIES );
+        plain_writer.write( SurfacingConstants.NL );
+        plain_writer.write( "# Minimal difference: " + min_diff );
+        plain_writer.write( SurfacingConstants.NL );
+        plain_writer.write( "# Factor            : " + factor );
+        plain_writer.write( SurfacingConstants.NL );
+        plain_writer.write( "# Lower copy domains: " + counter );
+        plain_writer.write( SurfacingConstants.NL );
+        plain_writer.write( "# Total absence     : " + total_absense_counter );
+        plain_writer.write( SurfacingConstants.NL );
+        plain_writer.write( "# Not total absence : " + not_total_absense_counter );
+        plain_writer.write( SurfacingConstants.NL );
+        plain_writer.write( "# Total domains     : " + all_domains.size() );
+        plain_writer.write( SurfacingConstants.NL );
+        plain_writer.close();
+    }
+
+    private static void writeGoIdsToFile( final Writer writer, final SortedSet<GoId> gos ) throws IOException {
+        for( final GoId go_id : gos ) {
+            writer.write( go_id.toString() );
+            writer.write( SurfacingConstants.NL );
+        }
+        writer.close();
+    }
+
+    private static void writeProteinsToFile( final File proteins_file_base,
+                                             final SortedMap<Species, List<Protein>> protein_lists_per_species,
+                                             final DomainId domain_id ) throws IOException {
+        final File my_proteins_file = new File( proteins_file_base.getParentFile() + ForesterUtil.FILE_SEPARATOR
+                + domain_id + PLUS_MINUS_PROTEINS_FILE_DOM_SUFFIX );
+        SurfacingUtil.checkForOutputFileWriteability( my_proteins_file );
+        final Writer proteins_file_writer = new BufferedWriter( new FileWriter( my_proteins_file ) );
+        SurfacingUtil.extractProteinNames( protein_lists_per_species, domain_id, proteins_file_writer, "\t" );
+        proteins_file_writer.close();
+        System.out.println( "Wrote proteins list to \"" + my_proteins_file + "\"" );
+    }
+
+    public static enum COPY_CALCULATION_MODE {
+        MEAN, MEDIAN, MAX, MIN
+    }
+}
diff --git a/forester/java/src/org/forester/surfacing/DomainId.java b/forester/java/src/org/forester/surfacing/DomainId.java

new file mode 100644 (file)

index 0000000..18a4a4d
--- /dev/null
+++ b/forester/java/src/org/forester/surfacing/DomainId.java
@@ -0,0 +1,131 @@
+// $Id:
+//
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.surfacing;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.forester.go.GoId;
+import org.forester.util.ForesterUtil;
+
+public class DomainId implements Comparable<DomainId> {
+
+    final private String _id;
+    private List<GoId>   _go_ids;
+
+    public DomainId( final String id ) {
+        if ( ForesterUtil.isEmpty( id ) ) {
+            throw new IllegalArgumentException( "attempt to create domain id from empty or null string" );
+        }
+        _id = id.trim();
+        if ( _id.indexOf( ' ' ) > -1 ) {
+            throw new IllegalArgumentException( "attempt to create domain id from string containing one ore more spaces ["
+                    + _id + "]" );
+        }
+        else if ( _id.indexOf( BinaryDomainCombination.SEPARATOR ) > -1 ) {
+            throw new IllegalArgumentException( "attempt to create domain id from string containing the separator character ["
+                    + BinaryDomainCombination.SEPARATOR + "] for domain combinations [" + _id + "]" );
+        }
+        setGoIds( null );
+    }
+
+    public void addGoId( final GoId go_id ) {
+        if ( getGoIds() == null ) {
+            setGoIds( new ArrayList<GoId>() );
+        }
+        getGoIds().add( go_id );
+    }
+
+    @Override
+    public int compareTo( final DomainId domain_id ) {
+        if ( this == domain_id ) {
+            return 0;
+        }
+        return getId().toLowerCase().compareTo( domain_id.getId().toLowerCase() );
+    }
+
+    @Override
+    public boolean equals( final Object o ) {
+        if ( this == o ) {
+            return true;
+        }
+        else if ( o == null ) {
+            throw new IllegalArgumentException( "attempt to check [" + this.getClass() + "] equality to null" );
+        }
+        else if ( o.getClass() != this.getClass() ) {
+            throw new IllegalArgumentException( "attempt to check [" + this.getClass() + "] equality to " + o + " ["
+                    + o.getClass() + "]" );
+        }
+        else {
+            return getId().equals( ( ( DomainId ) o ).getId() );
+        }
+    }
+
+    public GoId getGoId( final int i ) {
+        return getGoIds().get( i );
+    }
+
+    // Note.
+    // The fact that equals and compareTo do not behave the same in cases where ids only differ by their case
+    // is not ideal. From Sun regarding Interface SortedSet<E>:
+    // "Note that the ordering maintained by a sorted set (whether or not an explicit comparator is provided)
+    // must be consistent with equals if the sorted set is to correctly implement the Set interface.
+    // (See the Comparable interface or Comparator interface for a precise definition of consistent 
+    // with equals.) This is so because the Set interface is defined in terms of the equals  operation,
+    // but a sorted set performs all element comparisons using its compareTo (or compare) method, 
+    // so two elements that are deemed equal by this method are, from the standpoint of the sorted set,
+    // equal. The behavior of a sorted set is well-defined even if its ordering is inconsistent with equals; 
+    // it just fails to obey the general contract of the Set interface."
+    List<GoId> getGoIds() {
+        return _go_ids;
+    }
+
+    public String getId() {
+        return _id;
+    }
+
+    public int getNumberOfGoIds() {
+        if ( getGoIds() == null ) {
+            return 0;
+        }
+        return getGoIds().size();
+    }
+
+    @Override
+    public int hashCode() {
+        return getId().hashCode();
+    }
+
+    private void setGoIds( final List<GoId> go_ids ) {
+        _go_ids = go_ids;
+    }
+
+    @Override
+    public String toString() {
+        return getId();
+    }
+}
diff --git a/forester/java/src/org/forester/surfacing/DomainLengths.java b/forester/java/src/org/forester/surfacing/DomainLengths.java

new file mode 100644 (file)

index 0000000..9e39b9e
--- /dev/null
+++ b/forester/java/src/org/forester/surfacing/DomainLengths.java
@@ -0,0 +1,143 @@
+// $Id:
+//
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2010 Christian M. Zmasek
+// Copyright (C) 2008-2010 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.surfacing;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.SortedMap;
+import java.util.TreeMap;
+
+import org.forester.util.BasicDescriptiveStatistics;
+import org.forester.util.DescriptiveStatistics;
+
+public class DomainLengths {
+
+    final DomainId                                  _domain_id;
+    final SortedMap<Species, DescriptiveStatistics> _length_statistics;
+
+    public DomainLengths( final DomainId domain_id ) {
+        _domain_id = domain_id;
+        _length_statistics = new TreeMap<Species, DescriptiveStatistics>();
+    }
+
+    public void addLength( final Species species, final int domain_length ) {
+        if ( !getLengthStatistics().containsKey( species ) ) {
+            addLengthStatistics( species, new BasicDescriptiveStatistics() );
+        }
+        getLengthStatistic( species ).addValue( domain_length );
+    }
+
+    private void addLengthStatistics( final Species species, final DescriptiveStatistics length_statistic ) {
+        if ( getLengthStatistics().containsKey( species ) ) {
+            throw new IllegalArgumentException( "length statistics for [" + species.getSpeciesId() + "] already added" );
+        }
+        getLengthStatistics().put( species, length_statistic );
+    }
+
+    /**
+     * Returns descriptive statistics based on the arithmetic means
+     * for each species.  
+     * 
+     * 
+     * @return
+     */
+    public DescriptiveStatistics calculateMeanBasedStatistics() {
+        final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
+        for( final DescriptiveStatistics s : getLengthStatisticsList() ) {
+            stats.addValue( s.arithmeticMean() );
+        }
+        return stats;
+    }
+
+    /**
+     * 
+     * Note. This is not technically a Z-score since the distribution
+     * of means is unknown (and not normal).
+     * 
+     * @param species
+     * @return
+     */
+    public double calculateZScoreForSpecies( final Species species ) {
+        final double species_mean = getLengthStatistic( species ).arithmeticMean();
+        final DescriptiveStatistics domain_stats = calculateMeanBasedStatistics();
+        final double population_sd = domain_stats.sampleStandardDeviation();
+        final double population_mean = domain_stats.arithmeticMean();
+        return ( species_mean - population_mean ) / population_sd;
+    }
+
+    public DomainId getDomainId() {
+        return _domain_id;
+    }
+
+    public DescriptiveStatistics getLengthStatistic( final Species species ) {
+        return getLengthStatistics().get( species );
+    }
+
+    private SortedMap<Species, DescriptiveStatistics> getLengthStatistics() {
+        return _length_statistics;
+    }
+
+    public List<DescriptiveStatistics> getLengthStatisticsList() {
+        final List<DescriptiveStatistics> list = new ArrayList<DescriptiveStatistics>();
+        for( final DescriptiveStatistics stats : _length_statistics.values() ) {
+            list.add( stats );
+        }
+        return list;
+    }
+
+    public List<Species> getMeanBasedOutlierSpecies( final double z_score_limit ) {
+        final List<Species> species = new ArrayList<Species>();
+        if ( getSpeciesList().size() > 1 ) {
+            for( final Species s : getSpeciesList() ) {
+                final double z = calculateZScoreForSpecies( s );
+                if ( z_score_limit < 0 ) {
+                    if ( z <= z_score_limit ) {
+                        species.add( s );
+                    }
+                }
+                else if ( z_score_limit > 0 ) {
+                    if ( z >= z_score_limit ) {
+                        species.add( s );
+                    }
+                }
+            }
+        }
+        return species;
+    }
+
+    public List<Species> getSpeciesList() {
+        final List<Species> list = new ArrayList<Species>();
+        for( final Species s : _length_statistics.keySet() ) {
+            list.add( s );
+        }
+        return list;
+    }
+
+    public boolean isHasLengthStatistic( final Species species ) {
+        return getLengthStatistics().containsKey( species );
+    }
+}
diff --git a/forester/java/src/org/forester/surfacing/DomainLengthsTable.java b/forester/java/src/org/forester/surfacing/DomainLengthsTable.java

new file mode 100644 (file)

index 0000000..4b6ca22
--- /dev/null
+++ b/forester/java/src/org/forester/surfacing/DomainLengthsTable.java
@@ -0,0 +1,165 @@
+// $Id:
+//
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2010 Christian M. Zmasek
+// Copyright (C) 2008-2010 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.surfacing;
+
+import java.text.DecimalFormat;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.SortedMap;
+import java.util.TreeMap;
+
+import org.forester.util.BasicDescriptiveStatistics;
+import org.forester.util.DescriptiveStatistics;
+import org.forester.util.ForesterUtil;
+
+public class DomainLengthsTable {
+
+    private final static DecimalFormat       DF = new DecimalFormat( "#.0" );
+    final SortedMap<DomainId, DomainLengths> _domain_lengths;
+    final List<Species>                      _species;
+
+    public DomainLengthsTable() {
+        _domain_lengths = new TreeMap<DomainId, DomainLengths>();
+        _species = new ArrayList<Species>();
+    }
+
+    private void addDomainLengths( final DomainLengths domain_lengths ) {
+        if ( getDomainLengths().containsKey( domain_lengths.getDomainId() ) ) {
+            throw new IllegalArgumentException( "domain lengths for [" + domain_lengths.getDomainId()
+                    + "] already added" );
+        }
+        getDomainLengths().put( domain_lengths.getDomainId(), domain_lengths );
+    }
+
+    private void addLength( final DomainId domain_id, final Species species, final int domain_length ) {
+        if ( !getDomainLengths().containsKey( domain_id ) ) {
+            addDomainLengths( new DomainLengths( domain_id ) );
+        }
+        getDomainLengths().get( domain_id ).addLength( species, domain_length );
+    }
+
+    public void addLengths( final List<Protein> protein_list ) {
+        for( final Protein protein : protein_list ) {
+            final Species species = protein.getSpecies();
+            if ( !_species.contains( species ) ) {
+                _species.add( species );
+            }
+            for( final Domain domain : protein.getProteinDomains() ) {
+                addLength( domain.getDomainId(), species, ( domain.getTo() - domain.getFrom() ) + 1 );
+            }
+        }
+    }
+
+    public DescriptiveStatistics calculateMeanBasedStatisticsForAllSpecies() {
+        final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
+        for( final Species species : getSpecies() ) {
+            final DescriptiveStatistics stats_per_species = calculateMeanBasedStatisticsForSpecies( species );
+            stats.addValue( stats_per_species.arithmeticMean() );
+        }
+        return stats;
+    }
+
+    public DescriptiveStatistics calculateMeanBasedStatisticsForDomain( final DomainId domain_id ) {
+        return getDomainLengths( domain_id ).calculateMeanBasedStatistics();
+    }
+
+    public DescriptiveStatistics calculateMeanBasedStatisticsForSpecies( final Species species ) {
+        final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
+        for( final DomainLengths l : getDomainLengths().values() ) {
+            if ( l.isHasLengthStatistic( species ) ) {
+                stats.addValue( l.getLengthStatistic( species ).arithmeticMean() );
+            }
+        }
+        return stats;
+    }
+
+    public StringBuilder createMeanBasedStatisticsPerSpeciesTable() {
+        final StringBuilder sb = new StringBuilder();
+        sb.append( "SPECIES" );
+        sb.append( "\t" );
+        sb.append( "MEAN" );
+        sb.append( "\t" );
+        sb.append( "SD" );
+        sb.append( "\t" );
+        sb.append( "MIN" );
+        sb.append( "\t" );
+        sb.append( "MAX" );
+        sb.append( "\t" );
+        sb.append( "MEDIAN" );
+        sb.append( ForesterUtil.LINE_SEPARATOR );
+        for( final Species species : getSpecies() ) {
+            final DescriptiveStatistics stats = calculateMeanBasedStatisticsForSpecies( species );
+            sb.append( species );
+            sb.append( "\t" );
+            sb.append( DF.format( stats.arithmeticMean() ) );
+            sb.append( "\t" );
+            try {
+                sb.append( DF.format( stats.sampleStandardDeviation() ) );
+            }
+            catch ( final ArithmeticException e ) {
+                sb.append( "" );
+            }
+            sb.append( "\t" );
+            sb.append( DF.format( stats.getMin() ) );
+            sb.append( "\t" );
+            sb.append( DF.format( stats.getMax() ) );
+            sb.append( "\t" );
+            try {
+                sb.append( DF.format( stats.median() ) );
+            }
+            catch ( final ArithmeticException e ) {
+                sb.append( "" );
+            }
+            sb.append( ForesterUtil.LINE_SEPARATOR );
+        }
+        return sb;
+    }
+
+    private SortedMap<DomainId, DomainLengths> getDomainLengths() {
+        return _domain_lengths;
+    }
+
+    public DomainLengths getDomainLengths( final DomainId domain_id ) {
+        return getDomainLengths().get( domain_id );
+    }
+
+    public List<DomainLengths> getDomainLengthsList() {
+        final List<DomainLengths> list = new ArrayList<DomainLengths>();
+        for( final DomainLengths l : getDomainLengths().values() ) {
+            list.add( l );
+        }
+        return list;
+    }
+
+    public DescriptiveStatistics getLengthStatistic( final DomainId domain_id, final Species species ) {
+        return getDomainLengths( domain_id ).getLengthStatistic( species );
+    }
+
+    public List<Species> getSpecies() {
+        return _species;
+    }
+}
diff --git a/forester/java/src/org/forester/surfacing/DomainParsimonyCalculator.java b/forester/java/src/org/forester/surfacing/DomainParsimonyCalculator.java

new file mode 100644 (file)

index 0000000..82de445
--- /dev/null
+++ b/forester/java/src/org/forester/surfacing/DomainParsimonyCalculator.java
@@ -0,0 +1,744 @@
+// $Id:
+//
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.surfacing;
+
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.SortedSet;
+import java.util.TreeSet;
+
+import org.forester.evoinference.matrix.character.BasicCharacterStateMatrix;
+import org.forester.evoinference.matrix.character.CharacterStateMatrix;
+import org.forester.evoinference.matrix.character.CharacterStateMatrix.BinaryStates;
+import org.forester.evoinference.matrix.character.CharacterStateMatrix.GainLossStates;
+import org.forester.evoinference.parsimony.DolloParsimony;
+import org.forester.evoinference.parsimony.FitchParsimony;
+import org.forester.phylogeny.Phylogeny;
+import org.forester.phylogeny.PhylogenyNode;
+import org.forester.phylogeny.data.BinaryCharacters;
+import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
+import org.forester.surfacing.BinaryDomainCombination.DomainCombinationType;
+import org.forester.util.ForesterUtil;
+
+public final class DomainParsimonyCalculator {
+
+    private static final String                     TYPE_FORBINARY_CHARACTERS = "parsimony inferred";
+    private CharacterStateMatrix<GainLossStates>    _gain_loss_matrix;
+    private CharacterStateMatrix<BinaryStates>      _binary_internal_states_matrix;
+    private final List<GenomeWideCombinableDomains> _gwcd_list;
+    private final Phylogeny                         _phylogeny;
+    private int                                     _total_losses;
+    private int                                     _total_gains;
+    private int                                     _total_unchanged;
+    private int                                     _cost;
+    private Map<DomainId, Set<String>>              _domain_id_to_secondary_features_map;
+    private SortedSet<DomainId>                     _positive_filter;
+
+    private DomainParsimonyCalculator( final Phylogeny phylogeny ) {
+        init();
+        _phylogeny = phylogeny;
+        _gwcd_list = null;
+    }
+
+    private DomainParsimonyCalculator( final Phylogeny phylogeny, final List<GenomeWideCombinableDomains> gwcd_list ) {
+        init();
+        _phylogeny = phylogeny;
+        _gwcd_list = gwcd_list;
+    }
+
+    private DomainParsimonyCalculator( final Phylogeny phylogeny,
+                                       final List<GenomeWideCombinableDomains> gwcd_list,
+                                       final Map<DomainId, Set<String>> domain_id_to_secondary_features_map ) {
+        init();
+        _phylogeny = phylogeny;
+        _gwcd_list = gwcd_list;
+        setDomainIdToSecondaryFeaturesMap( domain_id_to_secondary_features_map );
+    }
+
+    int calculateNumberOfBinaryDomainCombination() {
+        if ( getGenomeWideCombinableDomainsList().isEmpty() ) {
+            throw new IllegalArgumentException( "genome wide combinable domains list is empty" );
+        }
+        final Set<BinaryDomainCombination> all_binary_combinations = new HashSet<BinaryDomainCombination>();
+        for( final GenomeWideCombinableDomains gwcd : getGenomeWideCombinableDomainsList() ) {
+            for( final BinaryDomainCombination bc : gwcd.toBinaryDomainCombinations() ) {
+                all_binary_combinations.add( bc );
+            }
+        }
+        return all_binary_combinations.size();
+    }
+
+    CharacterStateMatrix<BinaryStates> createMatrixOfBinaryDomainCombinationPresenceOrAbsence() {
+        return createMatrixOfBinaryDomainCombinationPresenceOrAbsence( getGenomeWideCombinableDomainsList() );
+    }
+
+    CharacterStateMatrix<BinaryStates> createMatrixOfDomainPresenceOrAbsence() {
+        return createMatrixOfDomainPresenceOrAbsence( getGenomeWideCombinableDomainsList(), getPositiveFilter() );
+    }
+
+    CharacterStateMatrix<BinaryStates> createMatrixOfSecondaryFeaturePresenceOrAbsence( final Map<Species, MappingResults> mapping_results_map ) {
+        return createMatrixOfSecondaryFeaturePresenceOrAbsence( getGenomeWideCombinableDomainsList(),
+                                                                getDomainIdToSecondaryFeaturesMap(),
+                                                                mapping_results_map );
+    }
+
+    Phylogeny decoratePhylogenyWithDomains( final Phylogeny phylogeny ) {
+        for( final PhylogenyNodeIterator it = phylogeny.iteratorPostorder(); it.hasNext(); ) {
+            final PhylogenyNode node = it.next();
+            final String node_identifier = node.getName();
+            final BinaryCharacters bc = new BinaryCharacters( getUnitsOnNode( node_identifier ),
+                                                              getUnitsGainedOnNode( node_identifier ),
+                                                              getUnitsLostOnNode( node_identifier ),
+                                                              TYPE_FORBINARY_CHARACTERS,
+                                                              getSumOfPresentOnNode( node_identifier ),
+                                                              getSumOfGainsOnNode( node_identifier ),
+                                                              getSumOfLossesOnNode( node_identifier ) );
+            node.getNodeData().setBinaryCharacters( bc );
+        }
+        return phylogeny;
+    }
+
+    private void executeDolloParsimony( final boolean on_domain_presence ) {
+        reset();
+        final DolloParsimony dollo = DolloParsimony.createInstance();
+        dollo.setReturnGainLossMatrix( true );
+        dollo.setReturnInternalStates( true );
+        CharacterStateMatrix<BinaryStates> states = null;
+        if ( on_domain_presence ) {
+            states = createMatrixOfDomainPresenceOrAbsence();
+        }
+        else {
+            states = createMatrixOfBinaryDomainCombinationPresenceOrAbsence();
+        }
+        dollo.execute( getPhylogeny(), states );
+        setGainLossMatrix( dollo.getGainLossMatrix() );
+        setBinaryInternalStatesMatrix( dollo.getInternalStatesMatrix() );
+        setCost( dollo.getCost() );
+        setTotalGains( dollo.getTotalGains() );
+        setTotalLosses( dollo.getTotalLosses() );
+        setTotalUnchanged( dollo.getTotalUnchanged() );
+    }
+
+    public void executeDolloParsimonyOnBinaryDomainCombintionPresence() {
+        executeDolloParsimony( false );
+    }
+
+    public void executeDolloParsimonyOnDomainPresence() {
+        executeDolloParsimony( true );
+    }
+
+    public void executeDolloParsimonyOnDomainPresence( final SortedSet<DomainId> positive_filter ) {
+        setPositiveFilter( positive_filter );
+        executeDolloParsimony( true );
+        setPositiveFilter( null );
+    }
+
+    public void executeDolloParsimonyOnSecondaryFeatures( final Map<Species, MappingResults> mapping_results_map ) {
+        if ( getDomainIdToSecondaryFeaturesMap() == null ) {
+            throw new RuntimeException( "Domain id to secondary features map has apparently not been set" );
+        }
+        reset();
+        final DolloParsimony dollo = DolloParsimony.createInstance();
+        dollo.setReturnGainLossMatrix( true );
+        dollo.setReturnInternalStates( true );
+        final CharacterStateMatrix<BinaryStates> states = createMatrixOfSecondaryFeaturePresenceOrAbsence( mapping_results_map );
+        dollo.execute( getPhylogeny(), states );
+        setGainLossMatrix( dollo.getGainLossMatrix() );
+        setBinaryInternalStatesMatrix( dollo.getInternalStatesMatrix() );
+        setCost( dollo.getCost() );
+        setTotalGains( dollo.getTotalGains() );
+        setTotalLosses( dollo.getTotalLosses() );
+        setTotalUnchanged( dollo.getTotalUnchanged() );
+    }
+
+    private void executeFitchParsimony( final boolean on_domain_presence,
+                                        final boolean use_last,
+                                        final boolean randomize,
+                                        final long random_number_seed ) {
+        reset();
+        if ( use_last ) {
+            System.out.println( "   Fitch parsimony: use_last = true" );
+        }
+        final FitchParsimony<BinaryStates> fitch = new FitchParsimony<BinaryStates>();
+        fitch.setRandomize( randomize );
+        if ( randomize ) {
+            fitch.setRandomNumberSeed( random_number_seed );
+        }
+        fitch.setUseLast( use_last );
+        fitch.setReturnGainLossMatrix( true );
+        fitch.setReturnInternalStates( true );
+        CharacterStateMatrix<BinaryStates> states = null;
+        if ( on_domain_presence ) {
+            states = createMatrixOfDomainPresenceOrAbsence( getGenomeWideCombinableDomainsList() );
+        }
+        else {
+            states = createMatrixOfBinaryDomainCombinationPresenceOrAbsence( getGenomeWideCombinableDomainsList() );
+        }
+        fitch.execute( getPhylogeny(), states );
+        setGainLossMatrix( fitch.getGainLossMatrix() );
+        setBinaryInternalStatesMatrix( fitch.getInternalStatesMatrix() );
+        setCost( fitch.getCost() );
+        setTotalGains( fitch.getTotalGains() );
+        setTotalLosses( fitch.getTotalLosses() );
+        setTotalUnchanged( fitch.getTotalUnchanged() );
+    }
+
+    public void executeFitchParsimonyOnBinaryDomainCombintion( final boolean use_last ) {
+        executeFitchParsimony( false, use_last, false, 0 );
+    }
+
+    public void executeFitchParsimonyOnBinaryDomainCombintion( final long random_number_seed ) {
+        executeFitchParsimony( false, false, true, random_number_seed );
+    }
+
+    public void executeFitchParsimonyOnDomainPresence( final boolean use_last ) {
+        executeFitchParsimony( true, use_last, false, 0 );
+    }
+
+    public void executeFitchParsimonyOnDomainPresence( final long random_number_seed ) {
+        executeFitchParsimony( true, false, true, random_number_seed );
+    }
+
+    public void executeOnGivenBinaryStatesMatrix( final CharacterStateMatrix<BinaryStates> binary_states_matrix,
+                                                  final String[] character_labels ) {
+        reset();
+        if ( binary_states_matrix.getNumberOfCharacters() != character_labels.length ) {
+            throw new IllegalArgumentException( "binary states matrix number of characters is not equal to the number of character labels provided" );
+        }
+        if ( binary_states_matrix.getNumberOfIdentifiers() != getPhylogeny().getNumberOfBranches() ) {
+            throw new IllegalArgumentException( "binary states matrix number of identifiers is not equal to the number of tree nodes provided" );
+        }
+        final CharacterStateMatrix<GainLossStates> gl_matrix = new BasicCharacterStateMatrix<GainLossStates>( binary_states_matrix
+                                                                                                                      .getNumberOfIdentifiers(),
+                                                                                                              binary_states_matrix
+                                                                                                                      .getNumberOfCharacters() );
+        int total_gains = 0;
+        int total_losses = 0;
+        int total_unchanged = 0;
+        int i = 0;
+        for( final PhylogenyNodeIterator it = getPhylogeny().iteratorPostorder(); it.hasNext(); ) {
+            gl_matrix.setIdentifier( i++, it.next().getName() );
+        }
+        for( int c = 0; c < character_labels.length; ++c ) {
+            gl_matrix.setCharacter( c, character_labels[ c ] );
+            final PhylogenyNodeIterator it = getPhylogeny().iteratorPostorder();
+            while ( it.hasNext() ) {
+                final PhylogenyNode node = it.next();
+                final String name = node.getName();
+                final BinaryStates bin_state = binary_states_matrix.getState( binary_states_matrix
+                        .getIdentifierIndex( name ), c );
+                final PhylogenyNode parent_node = getPhylogeny().getNode( name ).getParent();
+                GainLossStates gl_state = null;
+                if ( node.isRoot() ) {
+                    ++total_unchanged;
+                    if ( bin_state == BinaryStates.ABSENT ) {
+                        gl_state = GainLossStates.UNCHANGED_ABSENT;
+                    }
+                    else {
+                        gl_state = GainLossStates.UNCHANGED_PRESENT;
+                    }
+                }
+                else {
+                    final BinaryStates parent_bin_state = binary_states_matrix.getState( binary_states_matrix
+                            .getIdentifierIndex( parent_node.getName() ), c );
+                    if ( bin_state == BinaryStates.ABSENT ) {
+                        if ( parent_bin_state == BinaryStates.ABSENT ) {
+                            ++total_unchanged;
+                            gl_state = GainLossStates.UNCHANGED_ABSENT;
+                        }
+                        else {
+                            ++total_losses;
+                            gl_state = GainLossStates.LOSS;
+                        }
+                    }
+                    else {
+                        if ( parent_bin_state == BinaryStates.ABSENT ) {
+                            ++total_gains;
+                            gl_state = GainLossStates.GAIN;
+                        }
+                        else {
+                            ++total_unchanged;
+                            gl_state = GainLossStates.UNCHANGED_PRESENT;
+                        }
+                    }
+                }
+                gl_matrix.setState( name, c, gl_state );
+            }
+        }
+        setTotalGains( total_gains );
+        setTotalLosses( total_losses );
+        setTotalUnchanged( total_unchanged );
+        setCost( total_gains + total_losses );
+        setGainLossMatrix( gl_matrix );
+    }
+
+    public int getCost() {
+        return _cost;
+    }
+
+    private Map<DomainId, Set<String>> getDomainIdToSecondaryFeaturesMap() {
+        return _domain_id_to_secondary_features_map;
+    }
+
+    public CharacterStateMatrix<Integer> getGainLossCountsMatrix() {
+        final CharacterStateMatrix<Integer> matrix = new BasicCharacterStateMatrix<Integer>( getGainLossMatrix()
+                .getNumberOfIdentifiers(), 3 );
+        for( int i = 0; i < getGainLossMatrix().getNumberOfIdentifiers(); ++i ) {
+            matrix.setIdentifier( i, getGainLossMatrix().getIdentifier( i ) );
+        }
+        matrix.setCharacter( 0, "GAINS" );
+        matrix.setCharacter( 1, "LOSSES" );
+        matrix.setCharacter( 2, "NET" );
+        for( int i = 0; i < getGainLossMatrix().getNumberOfIdentifiers(); ++i ) {
+            int gains = 0;
+            int losses = 0;
+            for( int c = 0; c < getGainLossMatrix().getNumberOfCharacters(); ++c ) {
+                final GainLossStates s = getGainLossMatrix().getState( i, c );
+                if ( s == GainLossStates.GAIN ) {
+                    ++gains;
+                }
+                else if ( s == GainLossStates.LOSS ) {
+                    ++losses;
+                }
+            }
+            matrix.setState( i, 0, gains );
+            matrix.setState( i, 1, losses );
+            matrix.setState( i, 2, gains - losses );
+        }
+        return matrix;
+    }
+
+    public CharacterStateMatrix<GainLossStates> getGainLossMatrix() {
+        return _gain_loss_matrix;
+    }
+
+    private List<GenomeWideCombinableDomains> getGenomeWideCombinableDomainsList() {
+        return _gwcd_list;
+    }
+
+    public CharacterStateMatrix<BinaryStates> getInternalStatesMatrix() {
+        return _binary_internal_states_matrix;
+    }
+
+    public int getNetGainsOnNode( final String node_identifier ) {
+        if ( getGainLossMatrix() == null ) {
+            throw new RuntimeException( "no gain loss matrix has been calculated" );
+        }
+        int net = 0;
+        final int id_index = getGainLossMatrix().getIdentifierIndex( node_identifier );
+        for( int c = 0; c < getGainLossMatrix().getNumberOfCharacters(); ++c ) {
+            if ( getGainLossMatrix().getState( id_index, c ) == GainLossStates.GAIN ) {
+                ++net;
+            }
+            else if ( getGainLossMatrix().getState( id_index, c ) == GainLossStates.LOSS ) {
+                --net;
+            }
+        }
+        return net;
+    }
+
+    private Phylogeny getPhylogeny() {
+        return _phylogeny;
+    }
+
+    private SortedSet<DomainId> getPositiveFilter() {
+        return _positive_filter;
+    }
+
+    public int getSumOfGainsOnNode( final String node_identifier ) {
+        return getStateSumDeltaOnNode( node_identifier, getGainLossMatrix(), GainLossStates.GAIN );
+    }
+
+    public int getSumOfLossesOnNode( final String node_identifier ) {
+        return getStateSumDeltaOnNode( node_identifier, getGainLossMatrix(), GainLossStates.LOSS );
+    }
+
+    public int getSumOfPresentOnNode( final String node_identifier ) {
+        return getSumOfGainsOnNode( node_identifier ) + getSumOfUnchangedPresentOnNode( node_identifier );
+    }
+
+    int getSumOfUnchangedAbsentOnNode( final String node_identifier ) {
+        return getStateSumDeltaOnNode( node_identifier, getGainLossMatrix(), GainLossStates.UNCHANGED_ABSENT );
+    }
+
+    int getSumOfUnchangedOnNode( final String node_identifier ) {
+        return getSumOfUnchangedPresentOnNode( node_identifier ) + getSumOfUnchangedAbsentOnNode( node_identifier );
+    }
+
+    int getSumOfUnchangedPresentOnNode( final String node_identifier ) {
+        return getStateSumDeltaOnNode( node_identifier, getGainLossMatrix(), GainLossStates.UNCHANGED_PRESENT );
+    }
+
+    public int getTotalGains() {
+        return _total_gains;
+    }
+
+    public int getTotalLosses() {
+        return _total_losses;
+    }
+
+    public int getTotalUnchanged() {
+        return _total_unchanged;
+    }
+
+    public SortedSet<String> getUnitsGainedOnNode( final String node_identifier ) {
+        return getUnitsDeltaOnNode( node_identifier, getGainLossMatrix(), GainLossStates.GAIN );
+    }
+
+    public SortedSet<String> getUnitsLostOnNode( final String node_identifier ) {
+        return getUnitsDeltaOnNode( node_identifier, getGainLossMatrix(), GainLossStates.LOSS );
+    }
+
+    public SortedSet<String> getUnitsOnNode( final String node_identifier ) {
+        final SortedSet<String> present = getUnitsGainedOnNode( node_identifier );
+        present.addAll( getUnitsUnchangedPresentOnNode( node_identifier ) );
+        return present;
+    }
+
+    SortedSet<String> getUnitsUnchangedAbsentOnNode( final String node_identifier ) {
+        return getUnitsDeltaOnNode( node_identifier, getGainLossMatrix(), GainLossStates.UNCHANGED_ABSENT );
+    }
+
+    SortedSet<String> getUnitsUnchangedPresentOnNode( final String node_identifier ) {
+        return getUnitsDeltaOnNode( node_identifier, getGainLossMatrix(), GainLossStates.UNCHANGED_PRESENT );
+    }
+
+    private void init() {
+        setDomainIdToSecondaryFeaturesMap( null );
+        setPositiveFilter( null );
+        reset();
+    }
+
+    private void reset() {
+        setGainLossMatrix( null );
+        setBinaryInternalStatesMatrix( null );
+        setCost( -1 );
+        setTotalGains( -1 );
+        setTotalLosses( -1 );
+        setTotalUnchanged( -1 );
+    }
+
+    private void setBinaryInternalStatesMatrix( final CharacterStateMatrix<BinaryStates> binary_states_matrix ) {
+        _binary_internal_states_matrix = binary_states_matrix;
+    }
+
+    private void setCost( final int cost ) {
+        _cost = cost;
+    }
+
+    private void setDomainIdToSecondaryFeaturesMap( final Map<DomainId, Set<String>> domain_id_to_secondary_features_map ) {
+        _domain_id_to_secondary_features_map = domain_id_to_secondary_features_map;
+    }
+
+    private void setGainLossMatrix( final CharacterStateMatrix<GainLossStates> gain_loss_matrix ) {
+        _gain_loss_matrix = gain_loss_matrix;
+    }
+
+    private void setPositiveFilter( final SortedSet<DomainId> positive_filter ) {
+        _positive_filter = positive_filter;
+    }
+
+    private void setTotalGains( final int total_gains ) {
+        _total_gains = total_gains;
+    }
+
+    private void setTotalLosses( final int total_losses ) {
+        _total_losses = total_losses;
+    }
+
+    private void setTotalUnchanged( final int total_unchanged ) {
+        _total_unchanged = total_unchanged;
+    }
+
+    public static DomainParsimonyCalculator createInstance( final Phylogeny phylogeny ) {
+        return new DomainParsimonyCalculator( phylogeny );
+    }
+
+    public static DomainParsimonyCalculator createInstance( final Phylogeny phylogeny,
+                                                            final List<GenomeWideCombinableDomains> gwcd_list ) {
+        if ( phylogeny.getNumberOfExternalNodes() != gwcd_list.size() ) {
+            throw new IllegalArgumentException( "number of external nodes [" + phylogeny.getNumberOfExternalNodes()
+                    + "] does not equal size of genome wide combinable domains list [" + gwcd_list.size() + "]" );
+        }
+        return new DomainParsimonyCalculator( phylogeny, gwcd_list );
+    }
+
+    public static DomainParsimonyCalculator createInstance( final Phylogeny phylogeny,
+                                                            final List<GenomeWideCombinableDomains> gwcd_list,
+                                                            final Map<DomainId, Set<String>> domain_id_to_secondary_features_map ) {
+        if ( phylogeny.getNumberOfExternalNodes() != gwcd_list.size() ) {
+            throw new IllegalArgumentException( "size of external nodes does not equal size of genome wide combinable domains list" );
+        }
+        return new DomainParsimonyCalculator( phylogeny, gwcd_list, domain_id_to_secondary_features_map );
+    }
+
+    public static CharacterStateMatrix<BinaryStates> createMatrixOfBinaryDomainCombinationPresenceOrAbsence( final List<GenomeWideCombinableDomains> gwcd_list ) {
+        if ( gwcd_list.isEmpty() ) {
+            throw new IllegalArgumentException( "genome wide combinable domains list is empty" );
+        }
+        final int number_of_identifiers = gwcd_list.size();
+        final SortedSet<BinaryDomainCombination> all_binary_combinations = new TreeSet<BinaryDomainCombination>();
+        final Set<BinaryDomainCombination>[] binary_combinations_per_genome = new HashSet[ number_of_identifiers ];
+        int identifier_index = 0;
+        for( final GenomeWideCombinableDomains gwcd : gwcd_list ) {
+            binary_combinations_per_genome[ identifier_index ] = new HashSet<BinaryDomainCombination>();
+            for( final BinaryDomainCombination bc : gwcd.toBinaryDomainCombinations() ) {
+                all_binary_combinations.add( bc );
+                binary_combinations_per_genome[ identifier_index ].add( bc );
+            }
+            ++identifier_index;
+        }
+        final int number_of_characters = all_binary_combinations.size();
+        final CharacterStateMatrix<CharacterStateMatrix.BinaryStates> matrix = new BasicCharacterStateMatrix<CharacterStateMatrix.BinaryStates>( number_of_identifiers,
+                                                                                                                                                 number_of_characters );
+        int character_index = 0;
+        for( final BinaryDomainCombination bc : all_binary_combinations ) {
+            matrix.setCharacter( character_index++, bc.toString() );
+        }
+        identifier_index = 0;
+        final Set<String> all_identifiers = new HashSet<String>();
+        for( final GenomeWideCombinableDomains gwcd : gwcd_list ) {
+            final String species_id = gwcd.getSpecies().getSpeciesId();
+            if ( all_identifiers.contains( species_id ) ) {
+                throw new AssertionError( "species [" + species_id + "] is not unique" );
+            }
+            all_identifiers.add( species_id );
+            matrix.setIdentifier( identifier_index, species_id );
+            for( int ci = 0; ci < matrix.getNumberOfCharacters(); ++ci ) {
+                BinaryDomainCombination bc = null;
+                if ( gwcd.getDomainCombinationType() == DomainCombinationType.DIRECTED_ADJACTANT ) {
+                    bc = AdjactantDirectedBinaryDomainCombination.createInstance( matrix.getCharacter( ci ) );
+                }
+                else if ( gwcd.getDomainCombinationType() == DomainCombinationType.DIRECTED ) {
+                    bc = DirectedBinaryDomainCombination.createInstance( matrix.getCharacter( ci ) );
+                }
+                else {
+                    bc = BasicBinaryDomainCombination.createInstance( matrix.getCharacter( ci ) );
+                }
+                if ( binary_combinations_per_genome[ identifier_index ].contains( bc ) ) {
+                    matrix.setState( identifier_index, ci, CharacterStateMatrix.BinaryStates.PRESENT );
+                }
+                else {
+                    matrix.setState( identifier_index, ci, CharacterStateMatrix.BinaryStates.ABSENT );
+                }
+            }
+            ++identifier_index;
+        }
+        return matrix;
+    }
+
+    static CharacterStateMatrix<BinaryStates> createMatrixOfDomainPresenceOrAbsence( final List<GenomeWideCombinableDomains> gwcd_list ) {
+        return createMatrixOfDomainPresenceOrAbsence( gwcd_list, null );
+    }
+
+    public static CharacterStateMatrix<BinaryStates> createMatrixOfDomainPresenceOrAbsence( final List<GenomeWideCombinableDomains> gwcd_list,
+                                                                                            final SortedSet<DomainId> positive_filter ) {
+        if ( gwcd_list.isEmpty() ) {
+            throw new IllegalArgumentException( "genome wide combinable domains list is empty" );
+        }
+        if ( ( positive_filter != null ) && ( positive_filter.size() < 1 ) ) {
+            throw new IllegalArgumentException( "positive filter is empty" );
+        }
+        final int number_of_identifiers = gwcd_list.size();
+        final SortedSet<DomainId> all_domain_ids = new TreeSet<DomainId>();
+        for( final GenomeWideCombinableDomains gwcd : gwcd_list ) {
+            for( final DomainId domain : gwcd.getAllDomainIds() ) {
+                all_domain_ids.add( domain );
+            }
+        }
+        int number_of_characters = all_domain_ids.size();
+        if ( positive_filter != null ) {
+            //number_of_characters = positive_filter.size(); -- bad if doms in filter but not in genomes 
+            number_of_characters = 0;
+            for( final DomainId id : all_domain_ids ) {
+                if ( positive_filter.contains( id ) ) {
+                    number_of_characters++;
+                }
+            }
+        }
+        final CharacterStateMatrix<CharacterStateMatrix.BinaryStates> matrix = new BasicCharacterStateMatrix<CharacterStateMatrix.BinaryStates>( number_of_identifiers,
+                                                                                                                                                 number_of_characters );
+        int character_index = 0;
+        for( final DomainId id : all_domain_ids ) {
+            if ( positive_filter == null ) {
+                matrix.setCharacter( character_index++, id.getId() );
+            }
+            else {
+                if ( positive_filter.contains( id ) ) {
+                    matrix.setCharacter( character_index++, id.getId() );
+                }
+            }
+        }
+        int identifier_index = 0;
+        final Set<String> all_identifiers = new HashSet<String>();
+        for( final GenomeWideCombinableDomains gwcd : gwcd_list ) {
+            final String species_id = gwcd.getSpecies().getSpeciesId();
+            if ( all_identifiers.contains( species_id ) ) {
+                throw new IllegalArgumentException( "species [" + species_id + "] is not unique" );
+            }
+            all_identifiers.add( species_id );
+            matrix.setIdentifier( identifier_index, species_id );
+            for( int ci = 0; ci < matrix.getNumberOfCharacters(); ++ci ) {
+                if ( ForesterUtil.isEmpty( matrix.getCharacter( ci ) ) ) {
+                    throw new RuntimeException( "this should not have happened: problem with character #" + ci );
+                }
+                final DomainId id = new DomainId( matrix.getCharacter( ci ) );
+                if ( gwcd.contains( id ) ) {
+                    matrix.setState( identifier_index, ci, CharacterStateMatrix.BinaryStates.PRESENT );
+                }
+                else {
+                    matrix.setState( identifier_index, ci, CharacterStateMatrix.BinaryStates.ABSENT );
+                }
+            }
+            ++identifier_index;
+        }
+        return matrix;
+    }
+
+    /**
+     * For folds instead of Pfam-domains, for example
+     * 
+     * 
+     * @param gwcd_list
+     * @return
+     */
+    static CharacterStateMatrix<BinaryStates> createMatrixOfSecondaryFeaturePresenceOrAbsence( final List<GenomeWideCombinableDomains> gwcd_list,
+                                                                                               final Map<DomainId, Set<String>> domain_id_to_second_features_map,
+                                                                                               final Map<Species, MappingResults> mapping_results_map ) {
+        if ( gwcd_list.isEmpty() ) {
+            throw new IllegalArgumentException( "genome wide combinable domains list is empty" );
+        }
+        if ( ( domain_id_to_second_features_map == null ) || domain_id_to_second_features_map.isEmpty() ) {
+            throw new IllegalArgumentException( "domain id to secondary features map is null or empty" );
+        }
+        final int number_of_identifiers = gwcd_list.size();
+        final SortedSet<String> all_secondary_features = new TreeSet<String>();
+        for( final GenomeWideCombinableDomains gwcd : gwcd_list ) {
+            int mapped = 0;
+            int not_mapped = 0;
+            for( final DomainId domain : gwcd.getAllDomainIds() ) {
+                if ( domain_id_to_second_features_map.containsKey( domain ) ) {
+                    all_secondary_features.addAll( domain_id_to_second_features_map.get( domain ) );
+                    mapped++;
+                }
+                else {
+                    not_mapped++;
+                }
+            }
+            if ( mapping_results_map != null ) {
+                final MappingResults mr = new MappingResults();
+                mr.setDescription( gwcd.getSpecies().getSpeciesId() );
+                mr.setSumOfSuccesses( mapped );
+                mr.setSumOfFailures( not_mapped );
+                mapping_results_map.put( gwcd.getSpecies(), mr );
+            }
+        }
+        final int number_of_characters = all_secondary_features.size();
+        final CharacterStateMatrix<CharacterStateMatrix.BinaryStates> matrix = new BasicCharacterStateMatrix<CharacterStateMatrix.BinaryStates>( number_of_identifiers,
+                                                                                                                                                 number_of_characters );
+        int character_index = 0;
+        for( final String second_id : all_secondary_features ) {
+            matrix.setCharacter( character_index++, second_id );
+        }
+        int identifier_index = 0;
+        final Set<String> all_identifiers = new HashSet<String>();
+        for( final GenomeWideCombinableDomains gwcd : gwcd_list ) {
+            final String species_id = gwcd.getSpecies().getSpeciesId();
+            if ( all_identifiers.contains( species_id ) ) {
+                throw new IllegalArgumentException( "species [" + species_id + "] is not unique" );
+            }
+            all_identifiers.add( species_id );
+            matrix.setIdentifier( identifier_index, species_id );
+            final Set<String> all_second_per_gwcd = new HashSet<String>();
+            for( final DomainId domain : gwcd.getAllDomainIds() ) {
+                if ( domain_id_to_second_features_map.containsKey( domain ) ) {
+                    all_second_per_gwcd.addAll( domain_id_to_second_features_map.get( domain ) );
+                }
+            }
+            for( int ci = 0; ci < matrix.getNumberOfCharacters(); ++ci ) {
+                if ( all_second_per_gwcd.contains( matrix.getCharacter( ci ) ) ) {
+                    matrix.setState( identifier_index, ci, CharacterStateMatrix.BinaryStates.PRESENT );
+                }
+                else {
+                    matrix.setState( identifier_index, ci, CharacterStateMatrix.BinaryStates.ABSENT );
+                }
+            }
+            ++identifier_index;
+        }
+        return matrix;
+    }
+
+    private static int getStateSumDeltaOnNode( final String node_identifier,
+                                               final CharacterStateMatrix<GainLossStates> gain_loss_matrix,
+                                               final GainLossStates state ) {
+        if ( gain_loss_matrix == null ) {
+            throw new RuntimeException( "no gain loss matrix has been calculated" );
+        }
+        if ( ForesterUtil.isEmpty( node_identifier ) ) {
+            throw new IllegalArgumentException( "node identifier must not be empty" );
+        }
+        if ( gain_loss_matrix.isEmpty() ) {
+            throw new RuntimeException( "gain loss matrix is empty" );
+        }
+        int sum = 0;
+        final int id_index = gain_loss_matrix.getIdentifierIndex( node_identifier );
+        for( int c = 0; c < gain_loss_matrix.getNumberOfCharacters(); ++c ) {
+            if ( gain_loss_matrix.getState( id_index, c ) == state ) {
+                ++sum;
+            }
+        }
+        return sum;
+    }
+
+    private static SortedSet<String> getUnitsDeltaOnNode( final String node_identifier,
+                                                          final CharacterStateMatrix<GainLossStates> gain_loss_matrix,
+                                                          final GainLossStates state ) {
+        if ( gain_loss_matrix == null ) {
+            throw new RuntimeException( "no gain loss matrix has been calculated" );
+        }
+        if ( ForesterUtil.isEmpty( node_identifier ) ) {
+            throw new IllegalArgumentException( "node identifier must not be empty" );
+        }
+        if ( gain_loss_matrix.isEmpty() ) {
+            throw new RuntimeException( "gain loss matrix is empty" );
+        }
+        final SortedSet<String> d = new TreeSet<String>();
+        final int id_index = gain_loss_matrix.getIdentifierIndex( node_identifier );
+        for( int c = 0; c < gain_loss_matrix.getNumberOfCharacters(); ++c ) {
+            if ( gain_loss_matrix.getState( id_index, c ) == state ) {
+                if ( d.contains( gain_loss_matrix.getCharacter( c ) ) ) {
+                    throw new AssertionError( "this should not have happended: character ["
+                            + gain_loss_matrix.getCharacter( c ) + "] already in set" );
+                }
+                d.add( gain_loss_matrix.getCharacter( c ) );
+            }
+        }
+        return d;
+    }
+}
diff --git a/forester/java/src/org/forester/surfacing/DomainSimilarity.java b/forester/java/src/org/forester/surfacing/DomainSimilarity.java

new file mode 100644 (file)

index 0000000..bdf227c
--- /dev/null
+++ b/forester/java/src/org/forester/surfacing/DomainSimilarity.java
@@ -0,0 +1,101 @@
+// $Id:
+//
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.surfacing;
+
+import java.util.SortedMap;
+import java.util.SortedSet;
+
+/*
+ * This is to represent a measure of similarity between two or more domains from
+ * different genomes.
+ */
+public interface DomainSimilarity extends Comparable<DomainSimilarity> {
+
+    public SortedSet<DomainId> getCombinableDomainIds( final Species species_of_combinable_domain );;
+
+    public DomainId getDomainId();
+
+    /**
+     * For pairwise similarities, this should return the "difference"; for example the difference in counts
+     * for copy number based features (the same as getMaximalDifferenceInCounts(), or the number
+     * of actually different domain combinations. 
+     * For pairwise similarities, this should return the difference,
+     * while for comparisons of more than two domains, this should return the maximal difference
+     * 
+     * 
+     * 
+     * @return
+     */
+    public int getMaximalDifference();
+
+    /**
+     * For pairwise similarities, this should return the difference in counts,
+     * while for comparisons of more than two domains, this should return the maximal difference
+     * in counts
+     * 
+     * 
+     * @return the (maximal) difference in counts
+     */
+    public int getMaximalDifferenceInCounts();
+
+    public double getMaximalSimilarityScore();
+
+    public double getMeanSimilarityScore();
+
+    public double getMinimalSimilarityScore();
+
+    /**
+     * This should return the number of pairwise distances used to calculate
+     * this similarity score
+     * 
+     * @return the number of pairwise distances
+     */
+    public int getN();
+
+    public SortedSet<Species> getSpecies();
+
+    /**
+     * This should return a map, which maps species names to
+     * SpeciesSpecificDomainSimilariyData
+     * 
+     * 
+     * @return SortedMap<String, SpeciesSpecificDomainSimilariyData>
+     */
+    public SortedMap<Species, SpeciesSpecificDomainSimilariyData> getSpeciesData();
+
+    public double getStandardDeviationOfSimilarityScore();
+
+    public StringBuffer toStringBuffer( final PrintableDomainSimilarity.PRINT_OPTION print_option );
+
+    static public enum DomainSimilarityScoring {
+        DOMAINS, PROTEINS, COMBINATIONS;
+    }
+
+    public static enum DomainSimilaritySortField {
+        MIN, MAX, SD, MEAN, ABS_MAX_COUNTS_DIFFERENCE, MAX_COUNTS_DIFFERENCE, MAX_DIFFERENCE, SPECIES_COUNT, DOMAIN_ID,
+    }
+}
diff --git a/forester/java/src/org/forester/surfacing/DomainSimilarityCalculator.java b/forester/java/src/org/forester/surfacing/DomainSimilarityCalculator.java

new file mode 100644 (file)

index 0000000..4a4c91f
--- /dev/null
+++ b/forester/java/src/org/forester/surfacing/DomainSimilarityCalculator.java
@@ -0,0 +1,47 @@
+// $Id:
+// $
+//
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.surfacing;
+
+import java.util.List;
+import java.util.SortedSet;
+
+public interface DomainSimilarityCalculator {
+
+    public SortedSet<DomainSimilarity> calculateSimilarities( final PairwiseDomainSimilarityCalculator pairwise_calculator,
+                                                              final List<GenomeWideCombinableDomains> cdc_list,
+                                                              final boolean ignore_domains_without_combinations_in_any_genome,
+                                                              final boolean ignore_domains_specific_to_one_genome );;
+
+    public static enum Detailedness {
+        BASIC, LIST_COMBINING_DOMAIN_FOR_EACH_SPECIES, PUNCTILIOUS
+    }
+
+    public static enum GoAnnotationOutput {
+        NONE, ALL
+    }
+}
diff --git a/forester/java/src/org/forester/surfacing/GenomeWideCombinableDomains.java b/forester/java/src/org/forester/surfacing/GenomeWideCombinableDomains.java

new file mode 100644 (file)

index 0000000..e1c6cc6
--- /dev/null
+++ b/forester/java/src/org/forester/surfacing/GenomeWideCombinableDomains.java
@@ -0,0 +1,79 @@
+// $Id:
+// $
+//
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.surfacing;
+
+import java.util.SortedMap;
+import java.util.SortedSet;
+
+import org.forester.surfacing.BinaryDomainCombination.DomainCombinationType;
+import org.forester.util.DescriptiveStatistics;
+
+public interface GenomeWideCombinableDomains {
+
+    public boolean contains( DomainId key_id );
+
+    public CombinableDomains get( DomainId key_id );
+
+    public SortedMap<DomainId, CombinableDomains> getAllCombinableDomainsIds();
+
+    /**
+     * This should return all domains ids present in the genome.
+     * 
+     * @return a sorted set of domains ids
+     */
+    public SortedSet<DomainId> getAllDomainIds();
+
+    public DomainCombinationType getDomainCombinationType();
+
+    SortedSet<DomainId> getMostPromiscuosDomain();
+
+    /**
+     * This should return a statistic for per domain 
+     * promiscuity in a genome.
+     * 
+     * @return descriptive statistics for per domain promiscuity in a genome
+     */
+    public DescriptiveStatistics getPerGenomeDomainPromiscuityStatistics();
+
+    public int getSize();
+
+    public Species getSpecies();
+
+    /**
+     * This should return all binary domain combinations present in the genome.
+     * 
+     * @return a sorted set of binary domain combinations
+     */
+    public SortedSet<BinaryDomainCombination> toBinaryDomainCombinations();
+
+    public StringBuilder toStringBuilder( GenomeWideCombinableDomainsSortOrder order );
+
+    public static enum GenomeWideCombinableDomainsSortOrder {
+        ALPHABETICAL_KEY_ID, KEY_DOMAIN_PROTEINS_COUNT, KEY_DOMAIN_COUNT, COMBINATIONS_COUNT
+    }
+}
diff --git a/forester/java/src/org/forester/surfacing/MappingResults.java b/forester/java/src/org/forester/surfacing/MappingResults.java

new file mode 100644 (file)

index 0000000..8204dbc
--- /dev/null
+++ b/forester/java/src/org/forester/surfacing/MappingResults.java
@@ -0,0 +1,58 @@
+// $Id:
+//
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org
+
+package org.forester.surfacing;
+
+public class MappingResults {
+
+    private String _description;
+    private int    _sum_of_successes;
+    private int    _sum_of_failures;
+
+    public String getDescription() {
+        return _description;
+    }
+
+    public int getSumOfFailures() {
+        return _sum_of_failures;
+    }
+
+    public int getSumOfSuccesses() {
+        return _sum_of_successes;
+    }
+
+    public void setDescription( final String description ) {
+        _description = description;
+    }
+
+    public void setSumOfFailures( final int sum_of_failures ) {
+        _sum_of_failures = sum_of_failures;
+    }
+
+    public void setSumOfSuccesses( final int sum_of_successes ) {
+        _sum_of_successes = sum_of_successes;
+    }
+}
diff --git a/forester/java/src/org/forester/surfacing/PairwiseDomainSimilarity.java b/forester/java/src/org/forester/surfacing/PairwiseDomainSimilarity.java

new file mode 100644 (file)

index 0000000..d1f67d0
--- /dev/null
+++ b/forester/java/src/org/forester/surfacing/PairwiseDomainSimilarity.java
@@ -0,0 +1,41 @@
+// $Id:
+//
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.surfacing;
+
+public interface PairwiseDomainSimilarity {
+
+    /**
+     * This should return the -- not normalized, not absolute -- difference in
+     * counts (for example domain counts) for the two domains.
+     * It is important that it is: (counts for domain 1) minus (counts for domain 2).
+     * 
+     * @return the difference in counts
+     */
+    public int getDifferenceInCounts();
+
+    public double getSimilarityScore();
+}
diff --git a/forester/java/src/org/forester/surfacing/PairwiseDomainSimilarityCalculator.java b/forester/java/src/org/forester/surfacing/PairwiseDomainSimilarityCalculator.java

new file mode 100644 (file)

index 0000000..f0e04ca
--- /dev/null
+++ b/forester/java/src/org/forester/surfacing/PairwiseDomainSimilarityCalculator.java
@@ -0,0 +1,34 @@
+// $Id:
+// cmzmasek Exp $
+//
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.surfacing;
+
+public interface PairwiseDomainSimilarityCalculator {
+
+    public PairwiseDomainSimilarity calculateSimilarity( final CombinableDomains domains_1,
+                                                         final CombinableDomains domains_2 );
+}
diff --git a/forester/java/src/org/forester/surfacing/PairwiseGenomeComparator.java b/forester/java/src/org/forester/surfacing/PairwiseGenomeComparator.java

new file mode 100644 (file)

index 0000000..ae94f81
--- /dev/null
+++ b/forester/java/src/org/forester/surfacing/PairwiseGenomeComparator.java
@@ -0,0 +1,353 @@
+// $Id:
+//
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.surfacing;
+
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.Writer;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+import java.util.SortedSet;
+import java.util.TreeSet;
+
+import org.forester.evoinference.matrix.distance.BasicSymmetricalDistanceMatrix;
+import org.forester.evoinference.matrix.distance.DistanceMatrix;
+import org.forester.go.GoId;
+import org.forester.go.GoNameSpace;
+import org.forester.go.GoTerm;
+import org.forester.surfacing.DomainSimilarityCalculator.Detailedness;
+import org.forester.util.DescriptiveStatistics;
+import org.forester.util.ForesterUtil;
+
+public class PairwiseGenomeComparator {
+
+    private List<DistanceMatrix> _domain_distance_scores_means;
+    private List<DistanceMatrix> _shared_domains_based_distances;
+    private List<DistanceMatrix> _shared_binary_combinations_based_distances;
+
+    //private List<HistogramData>  _histogram_datas;
+    public PairwiseGenomeComparator() {
+        init();
+    }
+
+    public List<DistanceMatrix> getDomainDistanceScoresMeans() {
+        return _domain_distance_scores_means;
+    }
+
+    //public List<HistogramData> getHistogramDatas() {
+    //    return _histogram_datas;
+    //}
+    public List<DistanceMatrix> getSharedBinaryCombinationsBasedDistances() {
+        return _shared_binary_combinations_based_distances;
+    }
+
+    public List<DistanceMatrix> getSharedDomainsBasedDistances() {
+        return _shared_domains_based_distances;
+    }
+
+    private void init() {
+        //_histogram_datas = new ArrayList<HistogramData>();
+        _domain_distance_scores_means = new ArrayList<DistanceMatrix>();
+        _shared_domains_based_distances = new ArrayList<DistanceMatrix>();
+        _shared_binary_combinations_based_distances = new ArrayList<DistanceMatrix>();
+    }
+
+    public void performPairwiseComparisons( final StringBuilder html_desc,
+                                            final boolean sort_by_species_count_first,
+                                            final Detailedness detailedness,
+                                            final boolean ignore_domains_without_combs_in_all_spec,
+                                            final boolean ignore_domains_specific_to_one_species,
+                                            final DomainSimilarity.DomainSimilaritySortField domain_similarity_sort_field,
+                                            final PrintableDomainSimilarity.PRINT_OPTION domain_similarity_print_option,
+                                            final DomainSimilarity.DomainSimilarityScoring scoring,
+                                            final Map<DomainId, List<GoId>> domain_id_to_go_ids_map,
+                                            final Map<GoId, GoTerm> go_id_to_term_map,
+                                            final GoNameSpace go_namespace_limit,
+                                            final Species[] species,
+                                            final int number_of_genomes,
+                                            final List<GenomeWideCombinableDomains> list_of_genome_wide_combinable_domains,
+                                            final PairwiseDomainSimilarityCalculator pw_calc,
+                                            final String automated_pairwise_comparison_suffix,
+                                            final boolean verbose,
+                                            final String automated_pairwise_comparison_prefix,
+                                            final String command_line_prg_name,
+                                            final boolean display_histograms,
+                                            final File out_dir,
+                                            final boolean write_pairwise_comparisons ) {
+        init();
+        final BasicSymmetricalDistanceMatrix domain_distance_scores_means = new BasicSymmetricalDistanceMatrix( number_of_genomes );
+        final BasicSymmetricalDistanceMatrix shared_domains_based_distances = new BasicSymmetricalDistanceMatrix( number_of_genomes );
+        final BasicSymmetricalDistanceMatrix shared_binary_combinations_based_distances = new BasicSymmetricalDistanceMatrix( number_of_genomes );
+        if ( verbose ) {
+            System.out.println();
+            System.out.println( "Pairwise genome distances:" );
+            System.out.print( "[species-i - species-j:" );
+            System.out.print( " mean-score-based" );
+            System.out.print( " (sd)" );
+            System.out.print( " [N]" );
+            System.out.print( " | shared-domains-based" );
+            System.out.println( " | shared-binary-combinations-based]" );
+            System.out.println();
+        }
+        for( int i = 0; i < number_of_genomes; ++i ) {
+            final String species_i = species[ i ].getSpeciesId();
+            domain_distance_scores_means.setIdentifier( i, species_i );
+            shared_domains_based_distances.setIdentifier( i, species_i );
+            shared_binary_combinations_based_distances.setIdentifier( i, species_i );
+            if ( verbose ) {
+                System.out.println( ( i + 1 ) + "/" + number_of_genomes );
+            }
+            for( int j = 0; j < i; ++j ) {
+                if ( ( list_of_genome_wide_combinable_domains.get( i ).getSize() < 1 )
+                        || ( list_of_genome_wide_combinable_domains.get( j ).getSize() < 1 ) ) {
+                    domain_distance_scores_means
+                            .setValue( i, j, DomainArchitectureBasedGenomeSimilarityCalculator.MAX_SIMILARITY_SCORE );
+                    shared_domains_based_distances
+                            .setValue( i, j, DomainArchitectureBasedGenomeSimilarityCalculator.MAX_SIMILARITY_SCORE );
+                    shared_binary_combinations_based_distances
+                            .setValue( i, j, DomainArchitectureBasedGenomeSimilarityCalculator.MAX_SIMILARITY_SCORE );
+                    continue;
+                }
+                final List<GenomeWideCombinableDomains> genome_pair = new ArrayList<GenomeWideCombinableDomains>( 2 );
+                genome_pair.add( list_of_genome_wide_combinable_domains.get( i ) );
+                genome_pair.add( list_of_genome_wide_combinable_domains.get( j ) );
+                DomainSimilarityCalculator.GoAnnotationOutput go_annotation_output = DomainSimilarityCalculator.GoAnnotationOutput.NONE;
+                if ( domain_id_to_go_ids_map != null ) {
+                    go_annotation_output = DomainSimilarityCalculator.GoAnnotationOutput.ALL;
+                }
+                final DomainSimilarityCalculator calc = new BasicDomainSimilarityCalculator( domain_similarity_sort_field,
+                                                                                             sort_by_species_count_first,
+                                                                                             true );
+                final SortedSet<DomainSimilarity> similarities = calc
+                        .calculateSimilarities( pw_calc,
+                                                genome_pair,
+                                                ignore_domains_without_combs_in_all_spec,
+                                                ignore_domains_specific_to_one_species );
+                SurfacingUtil.decoratePrintableDomainSimilarities( similarities,
+                                                                   detailedness,
+                                                                   go_annotation_output,
+                                                                   go_id_to_term_map,
+                                                                   go_namespace_limit );
+                final DescriptiveStatistics stats = SurfacingUtil
+                        .calculateDescriptiveStatisticsForMeanValues( similarities );
+                final String species_j = species[ j ].getSpeciesId();
+                final DomainArchitectureBasedGenomeSimilarityCalculator genome_similarity_calculator = new DomainArchitectureBasedGenomeSimilarityCalculator( list_of_genome_wide_combinable_domains
+                                                                                                                                                                      .get( i ),
+                                                                                                                                                              list_of_genome_wide_combinable_domains
+                                                                                                                                                                      .get( j ) );
+                genome_similarity_calculator.setAllowDomainsToBeIgnored( false );
+                // TODO make histos for these 5 values
+                double dissimilarity_score_mean;
+                if ( stats.getN() < 1 ) {
+                    // No domains in common
+                    dissimilarity_score_mean = 1.0;
+                }
+                else {
+                    dissimilarity_score_mean = 1.0 - stats.arithmeticMean();
+                }
+                final double shared_domains_based_genome_distance = 1.0 - genome_similarity_calculator
+                        .calculateSharedDomainsBasedGenomeSimilarityScore();
+                final double shared_binary_combinations_based_genome_distance = 1.0 - genome_similarity_calculator
+                        .calculateSharedBinaryDomainCombinationBasedGenomeSimilarityScore();
+                domain_distance_scores_means.setValue( i, j, dissimilarity_score_mean );
+                shared_domains_based_distances.setValue( i, j, shared_domains_based_genome_distance );
+                shared_binary_combinations_based_distances.setValue( i,
+                                                                     j,
+                                                                     shared_binary_combinations_based_genome_distance );
+                if ( verbose ) {
+                    System.out.print( species_i + "-" );
+                    System.out.print( species_j + ": " );
+                    System.out.print( ForesterUtil.round( dissimilarity_score_mean, 2 ) );
+                    if ( stats.getN() > 1 ) {
+                        System.out.print( " (" + ForesterUtil.round( stats.sampleStandardDeviation(), 2 ) + ")" );
+                    }
+                    else {
+                        System.out.print( " (n/a)" );
+                    }
+                    System.out.print( " [" + stats.getN() + "]" );
+                    System.out.print( " | " );
+                    System.out.print( ForesterUtil.round( shared_domains_based_genome_distance, 2 ) );
+                    System.out.print( " | " );
+                    System.out.println( ForesterUtil.round( shared_binary_combinations_based_genome_distance, 2 ) );
+                }
+                String pairwise_similarities_output_file_str = automated_pairwise_comparison_prefix + species_i + "_"
+                        + species_j + automated_pairwise_comparison_suffix;
+                switch ( domain_similarity_print_option ) {
+                    case HTML:
+                        if ( !pairwise_similarities_output_file_str.endsWith( ".html" ) ) {
+                            pairwise_similarities_output_file_str += ".html";
+                        }
+                        break;
+                }
+                DescriptiveStatistics pw_stats = null;
+                if ( write_pairwise_comparisons ) {
+                    try {
+                        final Writer writer = new BufferedWriter( new FileWriter( out_dir == null ? pairwise_similarities_output_file_str
+                                : out_dir + ForesterUtil.FILE_SEPARATOR + pairwise_similarities_output_file_str ) );
+                        pw_stats = SurfacingUtil.writeDomainSimilaritiesToFile( html_desc,
+                                                                                new StringBuilder( species_i + "-"
+                                                                                        + species_j ),
+                                                                                writer,
+                                                                                similarities,
+                                                                                true,
+                                                                                null,
+                                                                                domain_similarity_print_option,
+                                                                                domain_similarity_sort_field,
+                                                                                scoring,
+                                                                                false );
+                    }
+                    catch ( final IOException e ) {
+                        ForesterUtil.fatalError( command_line_prg_name, "Failed to write similarites to: \""
+                                + pairwise_similarities_output_file_str + "\" [" + e.getMessage() + "]" );
+                    }
+                }
+                // pairwise_matrix.setValue( i, j, cdc_list.get( cdc_list.size()
+                // - 1 ) );
+                if ( pw_stats != null ) {
+                    if ( pw_stats.getMin() >= pw_stats.getMax() ) {
+                        ForesterUtil.printWarningMessage( command_line_prg_name, "for [" + species_i + "-" + species_j
+                                + "] score minimum is [" + pw_stats.getMin() + "] while score maximum is ["
+                                + pw_stats.getMax() + "], possibly indicating that a genome is compared to itself" );
+                    }
+                    if ( display_histograms && ( pw_stats.getMin() < pw_stats.getMax() ) ) {
+                        //final double[] values = pw_stats.getDataAsDoubleArray();
+                        // List<HistogramDataItem> data_items = new
+                        // ArrayList<HistogramDataItem>( values.length );
+                        // for( int n = 0; n < values.length; i++ ) {
+                        // data_items.add( new BasicHistogramDataItem( "", values[ n ] )
+                        // );
+                        // }
+                        //~   _histogram_datas.add( new HistogramData( species_i + "-" + species_j, values, null, 20 ) );
+                    }
+                }
+            }
+        }
+        getDomainDistanceScoresMeans().add( domain_distance_scores_means );
+        getSharedDomainsBasedDistances().add( shared_domains_based_distances );
+        getSharedBinaryCombinationsBasedDistances().add( shared_binary_combinations_based_distances );
+        if ( verbose ) {
+            System.out.println();
+        }
+    }
+
+    public void performPairwiseComparisonsJacknifed( final Species[] species,
+                                                     final int number_of_genomes,
+                                                     final List<GenomeWideCombinableDomains> list_of_genome_wide_combinable_domains,
+                                                     final boolean verbose,
+                                                     final int number_of_resamplings,
+                                                     final double jacknife_ratio,
+                                                     final long random_seed ) {
+        init();
+        if ( number_of_resamplings < 2 ) {
+            throw new IllegalArgumentException( "attempt to perform jacknife resampling with less than 2 resamplings" );
+        }
+        if ( jacknife_ratio <= 0.0 ) {
+            throw new IllegalArgumentException( "attempt to perform jacknife resampling with jacknife ratio of 0.0 or less" );
+        }
+        else if ( jacknife_ratio >= 1.0 ) {
+            throw new IllegalArgumentException( "attempt to perform jacknife resampling with jacknife ratio 1.0 or more" );
+        }
+        final DomainId[] all_unique_domain_ids = getAllUniqueDomainIdAsArray( list_of_genome_wide_combinable_domains );
+        if ( verbose ) {
+            System.out.println();
+            System.out.println( "Jacknife: total of domains: " + all_unique_domain_ids.length );
+        }
+        if ( verbose ) {
+            System.out.print( "resampling " );
+        }
+        final Random generator = new Random( random_seed );
+        for( int r = 0; r < number_of_resamplings; ++r ) {
+            if ( verbose ) {
+                System.out.print( " " + r );
+            }
+            final SortedSet<DomainId> domain_ids_to_ignore = randomlyPickDomainIds( all_unique_domain_ids,
+                                                                                    jacknife_ratio,
+                                                                                    generator );
+            final BasicSymmetricalDistanceMatrix shared_domains_based_distances = new BasicSymmetricalDistanceMatrix( number_of_genomes );
+            final BasicSymmetricalDistanceMatrix shared_binary_combinations_based_distances = new BasicSymmetricalDistanceMatrix( number_of_genomes );
+            for( int i = 0; i < number_of_genomes; ++i ) {
+                final String species_i = species[ i ].getSpeciesId();
+                shared_domains_based_distances.setIdentifier( i, species_i );
+                shared_binary_combinations_based_distances.setIdentifier( i, species_i );
+                for( int j = 0; j < i; ++j ) {
+                    final List<GenomeWideCombinableDomains> genome_pair = new ArrayList<GenomeWideCombinableDomains>( 2 );
+                    genome_pair.add( list_of_genome_wide_combinable_domains.get( i ) );
+                    genome_pair.add( list_of_genome_wide_combinable_domains.get( j ) );
+                    final DomainArchitectureBasedGenomeSimilarityCalculator genome_simiarity_calculator = new DomainArchitectureBasedGenomeSimilarityCalculator( list_of_genome_wide_combinable_domains
+                                                                                                                                                                         .get( i ),
+                                                                                                                                                                 list_of_genome_wide_combinable_domains
+                                                                                                                                                                         .get( j ) );
+                    genome_simiarity_calculator.setAllowDomainsToBeIgnored( true );
+                    genome_simiarity_calculator.setDomainIdsToIgnore( domain_ids_to_ignore );
+                    shared_domains_based_distances.setValue( i, j, 1.0 - genome_simiarity_calculator
+                            .calculateSharedDomainsBasedGenomeSimilarityScore() );
+                    shared_binary_combinations_based_distances.setValue( i, j, 1.0 - genome_simiarity_calculator
+                            .calculateSharedBinaryDomainCombinationBasedGenomeSimilarityScore() );
+                }
+            }
+            getSharedDomainsBasedDistances().add( shared_domains_based_distances );
+            getSharedBinaryCombinationsBasedDistances().add( shared_binary_combinations_based_distances );
+        }
+        if ( verbose ) {
+            System.out.println();
+        }
+    }
+
+    static private DomainId[] getAllUniqueDomainIdAsArray( final List<GenomeWideCombinableDomains> list_of_genome_wide_combinable_domains ) {
+        DomainId[] all_domain_ids_array;
+        final SortedSet<DomainId> all_domain_ids = new TreeSet<DomainId>();
+        for( final GenomeWideCombinableDomains genome_wide_combinable_domains : list_of_genome_wide_combinable_domains ) {
+            final SortedSet<DomainId> all_domains = genome_wide_combinable_domains.getAllDomainIds();
+            for( final DomainId domain : all_domains ) {
+                all_domain_ids.add( domain );
+            }
+        }
+        all_domain_ids_array = new DomainId[ all_domain_ids.size() ];
+        int n = 0;
+        for( final DomainId domain_id : all_domain_ids ) {
+            all_domain_ids_array[ n++ ] = domain_id;
+        }
+        return all_domain_ids_array;
+    }
+
+    static private SortedSet<DomainId> randomlyPickDomainIds( final DomainId[] all_domain_ids_array,
+                                                              final double jacknife_ratio,
+                                                              final Random generator ) {
+        final int size = all_domain_ids_array.length;
+        final SortedSet<DomainId> random_domain_ids = new TreeSet<DomainId>();
+        final int number_of_ids_pick = ForesterUtil.roundToInt( jacknife_ratio * size );
+        while ( random_domain_ids.size() < number_of_ids_pick ) {
+            final int r = generator.nextInt( size );
+            random_domain_ids.add( all_domain_ids_array[ r ] );
+        }
+        return random_domain_ids;
+    }
+}
diff --git a/forester/java/src/org/forester/surfacing/PrintableDomainSimilarity.java b/forester/java/src/org/forester/surfacing/PrintableDomainSimilarity.java

new file mode 100644 (file)

index 0000000..0fb075e
--- /dev/null
+++ b/forester/java/src/org/forester/surfacing/PrintableDomainSimilarity.java
@@ -0,0 +1,717 @@
+// $Id:
+//
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.surfacing;
+
+import java.util.List;
+import java.util.Map;
+import java.util.SortedMap;
+import java.util.SortedSet;
+import java.util.TreeSet;
+
+import org.forester.go.GoId;
+import org.forester.go.GoNameSpace;
+import org.forester.go.GoTerm;
+import org.forester.go.GoXRef;
+import org.forester.surfacing.DomainSimilarityCalculator.Detailedness;
+import org.forester.surfacing.DomainSimilarityCalculator.GoAnnotationOutput;
+import org.forester.util.ForesterUtil;
+
+public class PrintableDomainSimilarity implements DomainSimilarity {
+
+    final public static String                                           SPECIES_SEPARATOR = "  ";
+    final private static char                                            TAB               = '\t';
+    final private static int                                             BEFORE            = -1;
+    final private static int                                             EQUAL             = 0;
+    final private static int                                             AFTER             = 1;
+    final private static String                                          NO_SPECIES        = "     ";
+    final private double                                                 _min;
+    final private double                                                 _max;
+    final private double                                                 _mean;
+    final private double                                                 _sd;
+    final private int                                                    _n;
+    private final int                                                    _max_difference_in_counts;
+    private final int                                                    _max_difference;
+    private DomainSimilarityCalculator.GoAnnotationOutput                _go_annotation_output;
+    final private CombinableDomains                                      _combinable_domains;
+    final private SortedMap<Species, SpeciesSpecificDomainSimilariyData> _species_data;
+    final private DomainSimilaritySortField                              _sort_field;
+    private List<Species>                                                _species_order;
+    private final boolean                                                _sort_by_species_count_first;
+    private DomainSimilarityCalculator.Detailedness                      _detailedness;
+    private Map<GoId, GoTerm>                                            _go_id_to_term_map;
+    private GoNameSpace                                                  _go_namespace_limit;
+    private final boolean                                                _treat_as_binary_comparison;
+
+    /**
+     * If go_id_to_term_map not null, detailed GO information is written,
+     * only GO ids otherwise.
+     * 
+     * 
+     */
+    public PrintableDomainSimilarity( final CombinableDomains combinable_domains,
+                                      final double min,
+                                      final double max,
+                                      final double mean,
+                                      final double median,
+                                      final double sd,
+                                      final int n,
+                                      final int max_difference_in_counts,
+                                      final int max_difference,
+                                      final SortedMap<Species, SpeciesSpecificDomainSimilariyData> species_data,
+                                      final DomainSimilaritySortField sort_field,
+                                      final boolean sort_by_species_count_first,
+                                      final boolean treat_as_binary_comparison ) {
+        if ( combinable_domains == null ) {
+            throw new IllegalArgumentException( "attempt to use null combinable domains" );
+        }
+        if ( sort_field == null ) {
+            throw new IllegalArgumentException( "attempt to use null sorting" );
+        }
+        if ( species_data == null ) {
+            throw new IllegalArgumentException( "attempt to use null species data" );
+        }
+        if ( species_data.size() < 1 ) {
+            throw new IllegalArgumentException( "attempt to use empty species data" );
+        }
+        if ( n < 0 ) {
+            throw new IllegalArgumentException( "attempt to use N less than 0" );
+        }
+        if ( ( species_data.size() > 1 ) && ( n < 1 ) ) {
+            throw new IllegalArgumentException( "attempt to use N less than 1" );
+        }
+        if ( sd < 0.0 ) {
+            throw new IllegalArgumentException( "attempt to use negative SD" );
+        }
+        if ( max < min ) {
+            throw new IllegalArgumentException( "attempt to use max smaller than min" );
+        }
+        init();
+        _combinable_domains = combinable_domains;
+        _min = min;
+        _max = max;
+        _mean = mean;
+        _sd = sd;
+        _n = n;
+        _max_difference_in_counts = max_difference_in_counts;
+        _max_difference = max_difference;
+        _species_data = species_data;
+        _sort_field = sort_field;
+        _sort_by_species_count_first = sort_by_species_count_first;
+        _treat_as_binary_comparison = treat_as_binary_comparison;
+        final int s = species_data.size();
+        if ( ( ( s * s ) - s ) != ( getN() * 2 ) ) {
+            throw new IllegalArgumentException( "illegal species count and n: species count:" + s + ", n:" + _n
+                    + " for domain " + combinable_domains.getKeyDomain() );
+        }
+        if ( s > 2 ) {
+            if ( getMaximalDifferenceInCounts() < 0 ) {
+                throw new IllegalArgumentException( "attempt to use negative max difference in counts with more than two species" );
+            }
+            if ( getMaximalDifference() < 0 ) {
+                throw new IllegalArgumentException( "attempt to use negative max difference with more than two species" );
+            }
+        }
+    }
+
+    private void addGoInformation( final StringBuffer sb, final boolean for_table, final boolean html ) {
+        if ( !for_table ) {
+            sb.append( "<" );
+        }
+        switch ( getGoAnnotationOutput() ) {
+            case ALL: {
+                final int go_ids = getCombinableDomains().getKeyDomain().getNumberOfGoIds();
+                boolean first = true;
+                for( int i = 0; i < go_ids; ++i ) {
+                    final GoId go_id = getCombinableDomains().getKeyDomain().getGoId( i );
+                    if ( getGoIdToTermMap() != null ) {
+                        if ( getGoIdToTermMap().containsKey( go_id ) ) {
+                            first = appendGoTerm( sb, getGoIdToTermMap().get( go_id ), first, html );
+                        }
+                        else {
+                            sb.append( "go id \"" + go_id + "\" not found ["
+                                    + getCombinableDomains().getKeyDomain().getId() + "]" );
+                        }
+                    }
+                    else {
+                        if ( !first ) {
+                            sb.append( ", " );
+                        }
+                        if ( html ) {
+                            sb.append( "<a href=\"" + SurfacingConstants.AMIGO_LINK + go_id
+                                    + "\" target=\"amigo_window\">" + go_id + "</a>" );
+                        }
+                        else {
+                            sb.append( go_id );
+                        }
+                        first = false;
+                    }
+                }
+                break;
+            }
+            case NONE: {
+                break;
+            }
+            default:
+                throw new RuntimeException( "unknown " + getGoAnnotationOutput() );
+        }
+        if ( !for_table ) {
+            sb.append( ">: " );
+        }
+    }
+
+    private void addSpeciesSpecificDomainData( final StringBuffer sb, final Species species, final boolean html ) {
+        if ( getDetaildness() != DomainSimilarityCalculator.Detailedness.BASIC ) {
+            sb.append( "[" );
+        }
+        if ( html ) {
+            sb.append( "<b>" );
+            if ( ( SurfacingConstants.TAXONOMY_LINK != null ) && ( species.getSpeciesId().length() > 2 )
+                    && ( species.getSpeciesId().length() < 6 ) ) {
+                sb.append( "<a href=\"" + SurfacingConstants.TAXONOMY_LINK + species.getSpeciesId()
+                        + "\" target=\"taxonomy_window\">" + species.getSpeciesId() + "</a>" );
+            }
+            else {
+                sb.append( species.getSpeciesId() );
+            }
+            sb.append( "</b>" );
+        }
+        else {
+            sb.append( species.getSpeciesId() );
+        }
+        if ( getDetaildness() != DomainSimilarityCalculator.Detailedness.BASIC ) {
+            sb.append( ":" );
+            sb.append( getSpeciesData().get( species ).toStringBuffer( getDetaildness(), html ) );
+            sb.append( "]" );
+        }
+        if ( html ) {
+            sb.append( "<br>" );
+        }
+        sb.append( PrintableDomainSimilarity.SPECIES_SEPARATOR );
+    }
+
+    private boolean appendGoTerm( final StringBuffer sb, final GoTerm go_term, final boolean first, final boolean html ) {
+        if ( ( getGoNamespaceLimit() == null ) || getGoNamespaceLimit().equals( go_term.getGoNameSpace() ) ) {
+            if ( !first ) {
+                sb.append( ", " );
+            }
+            final GoId go_id = go_term.getGoId();
+            if ( html ) {
+                sb.append( "<a href=\"" + SurfacingConstants.AMIGO_LINK + go_id + "\" target=\"amigo_window\">" + go_id
+                        + "</a>" );
+            }
+            else {
+                sb.append( go_id );
+            }
+            sb.append( ":" );
+            sb.append( go_term.getName() );
+            if ( !html ) {
+                if ( getGoNamespaceLimit() == null ) {
+                    sb.append( ":" );
+                    sb.append( go_term.getGoNameSpace().toString() );
+                }
+                for( final GoXRef xref : go_term.getGoXRefs() ) {
+                    sb.append( ":" );
+                    sb.append( xref.toString() );
+                }
+            }
+            return false;
+        }
+        return true;
+    }
+
+    private void boldEndIfSortedBy( final DomainSimilaritySortField sort_field, final StringBuffer sb ) {
+        if ( getSortField() == sort_field ) {
+            sb.append( "</b>" );
+        }
+    }
+
+    private void boldStartIfSortedBy( final DomainSimilaritySortField sort_field, final StringBuffer sb ) {
+        if ( getSortField() == sort_field ) {
+            sb.append( "<b>" );
+        }
+    }
+
+    private int compareByDomainId( final DomainSimilarity other ) {
+        return getDomainId().compareTo( other.getDomainId() );
+    }
+
+    private int compareBySpeciesCount( final DomainSimilarity domain_similarity ) {
+        final int s_this = getSpeciesData().size();
+        final int s_other = domain_similarity.getSpeciesData().size();
+        if ( s_this < s_other ) {
+            return PrintableDomainSimilarity.BEFORE;
+        }
+        else if ( s_this > s_other ) {
+            return PrintableDomainSimilarity.AFTER;
+        }
+        else {
+            return PrintableDomainSimilarity.EQUAL;
+        }
+    }
+
+    public int compareTo( final DomainSimilarity domain_similarity ) {
+        if ( this == domain_similarity ) {
+            return PrintableDomainSimilarity.EQUAL;
+        }
+        else if ( domain_similarity == null ) {
+            throw new IllegalArgumentException( "attempt to compare " + this.getClass() + " to null" );
+        }
+        else if ( domain_similarity.getClass() != this.getClass() ) {
+            throw new IllegalArgumentException( "attempt to compare " + this.getClass() + " to "
+                    + domain_similarity.getClass() );
+        }
+        switch ( getSortField() ) {
+            case MIN:
+                if ( isSortBySpeciesCountFirst() ) {
+                    final int i = compareBySpeciesCount( domain_similarity );
+                    if ( i != PrintableDomainSimilarity.EQUAL ) {
+                        return i;
+                    }
+                }
+                if ( getMinimalSimilarityScore() < domain_similarity.getMinimalSimilarityScore() ) {
+                    return PrintableDomainSimilarity.BEFORE;
+                }
+                else if ( getMinimalSimilarityScore() > domain_similarity.getMinimalSimilarityScore() ) {
+                    return PrintableDomainSimilarity.AFTER;
+                }
+                else {
+                    return compareByDomainId( domain_similarity );
+                }
+            case MAX:
+                if ( isSortBySpeciesCountFirst() ) {
+                    final int i = compareBySpeciesCount( domain_similarity );
+                    if ( i != PrintableDomainSimilarity.EQUAL ) {
+                        return i;
+                    }
+                }
+                if ( getMaximalSimilarityScore() < domain_similarity.getMaximalSimilarityScore() ) {
+                    return PrintableDomainSimilarity.BEFORE;
+                }
+                else if ( getMaximalSimilarityScore() > domain_similarity.getMaximalSimilarityScore() ) {
+                    return PrintableDomainSimilarity.AFTER;
+                }
+                else {
+                    return compareByDomainId( domain_similarity );
+                }
+            case MEAN:
+                if ( isSortBySpeciesCountFirst() ) {
+                    final int i = compareBySpeciesCount( domain_similarity );
+                    if ( i != PrintableDomainSimilarity.EQUAL ) {
+                        return i;
+                    }
+                }
+                if ( getMeanSimilarityScore() < domain_similarity.getMeanSimilarityScore() ) {
+                    return PrintableDomainSimilarity.BEFORE;
+                }
+                else if ( getMeanSimilarityScore() > domain_similarity.getMeanSimilarityScore() ) {
+                    return PrintableDomainSimilarity.AFTER;
+                }
+                else {
+                    return compareByDomainId( domain_similarity );
+                }
+            case SD:
+                if ( isSortBySpeciesCountFirst() ) {
+                    final int i = compareBySpeciesCount( domain_similarity );
+                    if ( i != PrintableDomainSimilarity.EQUAL ) {
+                        return i;
+                    }
+                }
+                if ( getStandardDeviationOfSimilarityScore() < domain_similarity
+                        .getStandardDeviationOfSimilarityScore() ) {
+                    return PrintableDomainSimilarity.BEFORE;
+                }
+                else if ( getStandardDeviationOfSimilarityScore() > domain_similarity
+                        .getStandardDeviationOfSimilarityScore() ) {
+                    return PrintableDomainSimilarity.AFTER;
+                }
+                else {
+                    return compareByDomainId( domain_similarity );
+                }
+            case MAX_DIFFERENCE:
+                if ( isSortBySpeciesCountFirst() ) {
+                    final int i = compareBySpeciesCount( domain_similarity );
+                    if ( i != PrintableDomainSimilarity.EQUAL ) {
+                        return i;
+                    }
+                }
+                if ( getMaximalDifference() > domain_similarity.getMaximalDifference() ) {
+                    return PrintableDomainSimilarity.BEFORE;
+                }
+                else if ( getMaximalDifference() < domain_similarity.getMaximalDifference() ) {
+                    return PrintableDomainSimilarity.AFTER;
+                }
+                else {
+                    return compareByDomainId( domain_similarity );
+                }
+            case ABS_MAX_COUNTS_DIFFERENCE:
+                if ( isSortBySpeciesCountFirst() ) {
+                    final int i = compareBySpeciesCount( domain_similarity );
+                    if ( i != PrintableDomainSimilarity.EQUAL ) {
+                        return i;
+                    }
+                }
+                if ( Math.abs( getMaximalDifferenceInCounts() ) > Math.abs( domain_similarity
+                        .getMaximalDifferenceInCounts() ) ) {
+                    return PrintableDomainSimilarity.BEFORE;
+                }
+                else if ( Math.abs( getMaximalDifferenceInCounts() ) < Math.abs( domain_similarity
+                        .getMaximalDifferenceInCounts() ) ) {
+                    return PrintableDomainSimilarity.AFTER;
+                }
+                else {
+                    return compareByDomainId( domain_similarity );
+                }
+            case MAX_COUNTS_DIFFERENCE:
+                if ( getSpeciesData().size() != 2 ) {
+                    throw new RuntimeException( "attempt to sort by maximal difference with species not equal to two" );
+                }
+                if ( isSortBySpeciesCountFirst() ) {
+                    final int i = compareBySpeciesCount( domain_similarity );
+                    if ( i != PrintableDomainSimilarity.EQUAL ) {
+                        return i;
+                    }
+                }
+                if ( getMaximalDifferenceInCounts() > domain_similarity.getMaximalDifferenceInCounts() ) {
+                    return PrintableDomainSimilarity.BEFORE;
+                }
+                else if ( getMaximalDifferenceInCounts() < domain_similarity.getMaximalDifferenceInCounts() ) {
+                    return PrintableDomainSimilarity.AFTER;
+                }
+                else {
+                    return compareByDomainId( domain_similarity );
+                }
+            case SPECIES_COUNT:
+                final int i = compareBySpeciesCount( domain_similarity );
+                if ( i != PrintableDomainSimilarity.EQUAL ) {
+                    return i;
+                }
+                else {
+                    return compareByDomainId( domain_similarity );
+                }
+            case DOMAIN_ID:
+                return compareByDomainId( domain_similarity );
+        }
+        throw new AssertionError( "Unknown sort method: " + getSortField() );
+    }
+
+    public SortedSet<DomainId> getCombinableDomainIds( final Species species_of_combinable_domain ) {
+        final SortedSet<DomainId> sorted_ids = new TreeSet<DomainId>();
+        if ( getSpeciesData().containsKey( species_of_combinable_domain ) ) {
+            for( final DomainId id : getSpeciesData().get( species_of_combinable_domain )
+                    .getCombinableDomainIdToCountsMap().keySet() ) {
+                sorted_ids.add( id );
+            }
+        }
+        return sorted_ids;
+    }
+
+    private CombinableDomains getCombinableDomains() {
+        return _combinable_domains;
+    }
+
+    private DomainSimilarityCalculator.Detailedness getDetaildness() {
+        return _detailedness;
+    }
+
+    public DomainId getDomainId() {
+        return getCombinableDomains().getKeyDomain();
+    }
+
+    private DomainSimilarityCalculator.GoAnnotationOutput getGoAnnotationOutput() {
+        return _go_annotation_output;
+    }
+
+    private Map<GoId, GoTerm> getGoIdToTermMap() {
+        return _go_id_to_term_map;
+    }
+
+    public GoNameSpace getGoNamespaceLimit() {
+        return _go_namespace_limit;
+    }
+
+    public int getMaximalDifference() {
+        return _max_difference;
+    }
+
+    @Override
+    public int getMaximalDifferenceInCounts() {
+        return _max_difference_in_counts;
+    }
+
+    public double getMaximalSimilarityScore() {
+        return _max;
+    }
+
+    public double getMeanSimilarityScore() {
+        return _mean;
+    }
+
+    public double getMinimalSimilarityScore() {
+        return _min;
+    }
+
+    public int getN() {
+        return _n;
+    }
+
+    private DomainSimilaritySortField getSortField() {
+        return _sort_field;
+    }
+
+    public SortedSet<Species> getSpecies() {
+        final SortedSet<Species> species = new TreeSet<Species>();
+        for( final Species s : getSpeciesData().keySet() ) {
+            species.add( s );
+        }
+        return species;
+    }
+
+    public List<Species> getSpeciesCustomOrder() {
+        return _species_order;
+    }
+
+    public SortedMap<Species, SpeciesSpecificDomainSimilariyData> getSpeciesData() {
+        return _species_data;
+    }
+
+    private StringBuffer getSpeciesDataInAlphabeticalOrder( final boolean html ) {
+        final StringBuffer sb = new StringBuffer();
+        for( final Species species : getSpeciesData().keySet() ) {
+            addSpeciesSpecificDomainData( sb, species, html );
+        }
+        return sb;
+    }
+
+    private StringBuffer getSpeciesDataInCustomOrder( final boolean html ) {
+        final StringBuffer sb = new StringBuffer();
+        for( final Species order_species : getSpeciesCustomOrder() ) {
+            if ( getSpeciesData().keySet().contains( order_species ) ) {
+                addSpeciesSpecificDomainData( sb, order_species, html );
+            }
+            else {
+                sb.append( PrintableDomainSimilarity.NO_SPECIES );
+                sb.append( PrintableDomainSimilarity.SPECIES_SEPARATOR );
+            }
+        }
+        return sb;
+    }
+
+    public double getStandardDeviationOfSimilarityScore() {
+        return _sd;
+    }
+
+    private void init() {
+        _detailedness = DomainSimilarityCalculator.Detailedness.PUNCTILIOUS;
+        _go_annotation_output = null;
+        _go_id_to_term_map = null;
+    }
+
+    private boolean isSortBySpeciesCountFirst() {
+        return _sort_by_species_count_first;
+    }
+
+    private boolean isTreatAsBinaryComparison() {
+        return _treat_as_binary_comparison;
+    }
+
+    public void setDetailedness( final Detailedness detailedness ) {
+        _detailedness = detailedness;
+    }
+
+    public void setGoAnnotationOutput( final GoAnnotationOutput go_annotation_output ) {
+        _go_annotation_output = go_annotation_output;
+    }
+
+    public void setGoIdToTermMap( final Map<GoId, GoTerm> go_id_to_term_map ) {
+        _go_id_to_term_map = go_id_to_term_map;
+    }
+
+    public void setGoNamespaceLimit( final GoNameSpace go_namespace_limit ) {
+        _go_namespace_limit = go_namespace_limit;
+    }
+
+    public void setSpeciesOrder( final List<Species> species_order ) {
+        if ( !species_order.containsAll( getSpeciesData().keySet() ) ) {
+            throw new IllegalArgumentException( "list to order species must contain all species of multiple combinable domains similarity" );
+        }
+        _species_order = species_order;
+    }
+
+    @Override
+    public String toString() {
+        return toStringBuffer( null ).toString();
+    }
+
+    public StringBuffer toStringBuffer( final PrintableDomainSimilarity.PRINT_OPTION print_option ) {
+        switch ( print_option ) {
+            case SIMPLE_TAB_DELIMITED:
+                return toStringBufferSimpleTabDelimited();
+            case HTML:
+                return toStringBufferDetailedHTML();
+            default:
+                throw new AssertionError( "Unknown print option: " + print_option );
+        }
+    }
+
+    private StringBuffer toStringBufferDetailedHTML() {
+        final StringBuffer sb = new StringBuffer();
+        sb.append( "<tr>" );
+        sb.append( "<td>" );
+        boldStartIfSortedBy( DomainSimilaritySortField.DOMAIN_ID, sb );
+        sb.append( "<a href=\"" + SurfacingConstants.PFAM_FAMILY_ID_LINK + getDomainId() + "\">" + getDomainId()
+                + "</a>" );
+        boldEndIfSortedBy( DomainSimilaritySortField.DOMAIN_ID, sb );
+        sb.append( "</td>" );
+        sb.append( "<td>" );
+        boldStartIfSortedBy( DomainSimilaritySortField.MEAN, sb );
+        sb.append( ForesterUtil.round( getMeanSimilarityScore(), 3 ) );
+        boldEndIfSortedBy( DomainSimilaritySortField.MEAN, sb );
+        sb.append( "</td>" );
+        if ( !isTreatAsBinaryComparison() ) {
+            sb.append( "<td>" );
+            sb.append( "(" );
+            boldStartIfSortedBy( DomainSimilaritySortField.SD, sb );
+            sb.append( ForesterUtil.round( getStandardDeviationOfSimilarityScore(), 3 ) );
+            boldEndIfSortedBy( DomainSimilaritySortField.SD, sb );
+            sb.append( ")" );
+            sb.append( "</td>" );
+            sb.append( "<td>" );
+            sb.append( "[" );
+            boldStartIfSortedBy( DomainSimilaritySortField.MIN, sb );
+            sb.append( ForesterUtil.round( getMinimalSimilarityScore(), 3 ) );
+            boldEndIfSortedBy( DomainSimilaritySortField.MIN, sb );
+            sb.append( "," );
+            boldStartIfSortedBy( DomainSimilaritySortField.MAX, sb );
+            sb.append( ForesterUtil.round( getMaximalSimilarityScore(), 3 ) );
+            boldEndIfSortedBy( DomainSimilaritySortField.MAX, sb );
+            sb.append( "]" );
+            sb.append( "</td>" );
+        }
+        sb.append( "<td>" );
+        boldStartIfSortedBy( DomainSimilaritySortField.MAX_DIFFERENCE, sb );
+        sb.append( getMaximalDifference() );
+        boldEndIfSortedBy( DomainSimilaritySortField.MAX_DIFFERENCE, sb );
+        sb.append( "</td>" );
+        sb.append( "<td>" );
+        if ( isTreatAsBinaryComparison() ) {
+            boldStartIfSortedBy( DomainSimilaritySortField.MAX_COUNTS_DIFFERENCE, sb );
+            boldStartIfSortedBy( DomainSimilaritySortField.ABS_MAX_COUNTS_DIFFERENCE, sb );
+            sb.append( getMaximalDifferenceInCounts() );
+            boldEndIfSortedBy( DomainSimilaritySortField.ABS_MAX_COUNTS_DIFFERENCE, sb );
+            boldStartIfSortedBy( DomainSimilaritySortField.MAX_COUNTS_DIFFERENCE, sb );
+        }
+        else {
+            boldStartIfSortedBy( DomainSimilaritySortField.MAX_COUNTS_DIFFERENCE, sb );
+            boldStartIfSortedBy( DomainSimilaritySortField.ABS_MAX_COUNTS_DIFFERENCE, sb );
+            sb.append( Math.abs( getMaximalDifferenceInCounts() ) );
+            boldEndIfSortedBy( DomainSimilaritySortField.ABS_MAX_COUNTS_DIFFERENCE, sb );
+            boldStartIfSortedBy( DomainSimilaritySortField.MAX_COUNTS_DIFFERENCE, sb );
+        }
+        sb.append( "</td>" );
+        if ( !isTreatAsBinaryComparison() ) {
+            sb.append( "<td>" );
+            if ( ( getSortField() == DomainSimilaritySortField.SPECIES_COUNT ) || isSortBySpeciesCountFirst() ) {
+                sb.append( "<b>" );
+            }
+            sb.append( getSpeciesData().size() );
+            if ( ( getSortField() == DomainSimilaritySortField.SPECIES_COUNT ) || isSortBySpeciesCountFirst() ) {
+                sb.append( "</b>" );
+            }
+            sb.append( "</td>" );
+        }
+        if ( getGoAnnotationOutput() != DomainSimilarityCalculator.GoAnnotationOutput.NONE ) {
+            sb.append( "<td>" );
+            addGoInformation( sb, true, true );
+            sb.append( "</td>" );
+        }
+        if ( ( getSpeciesCustomOrder() == null ) || getSpeciesCustomOrder().isEmpty() ) {
+            sb.append( "<td>" );
+            sb.append( getSpeciesDataInAlphabeticalOrder( true ) );
+            sb.append( "</td>" );
+        }
+        else {
+            sb.append( "<td>" );
+            sb.append( getSpeciesDataInCustomOrder( true ) );
+            sb.append( "</td>" );
+        }
+        sb.append( "</tr>" );
+        return sb;
+    }
+
+    private StringBuffer toStringBufferSimpleTabDelimited() {
+        final StringBuffer sb = new StringBuffer();
+        sb.append( getDomainId() );
+        switch ( getSortField() ) {
+            case MIN:
+                sb.append( TAB );
+                sb.append( ForesterUtil.round( getMinimalSimilarityScore(), 3 ) );
+                break;
+            case MAX:
+                sb.append( TAB );
+                sb.append( ForesterUtil.round( getMaximalSimilarityScore(), 3 ) );
+                break;
+            case MEAN:
+                sb.append( TAB );
+                sb.append( ForesterUtil.round( getMeanSimilarityScore(), 3 ) );
+                break;
+            case SD:
+                sb.append( TAB );
+                sb.append( ForesterUtil.round( getStandardDeviationOfSimilarityScore(), 3 ) );
+                break;
+            case MAX_DIFFERENCE:
+                sb.append( TAB );
+                sb.append( getMaximalDifference() );
+            case ABS_MAX_COUNTS_DIFFERENCE:
+            case MAX_COUNTS_DIFFERENCE:
+                sb.append( TAB );
+                if ( isTreatAsBinaryComparison() ) {
+                    sb.append( getMaximalDifferenceInCounts() );
+                }
+                else {
+                    sb.append( Math.abs( getMaximalDifferenceInCounts() ) );
+                }
+                break;
+            case SPECIES_COUNT:
+                sb.append( TAB );
+                sb.append( getSpeciesData().size() );
+                break;
+            case DOMAIN_ID:
+                break;
+            default:
+                throw new AssertionError( "Unknown sort method: " + getSortField() );
+        }
+        if ( getGoAnnotationOutput() != DomainSimilarityCalculator.GoAnnotationOutput.NONE ) {
+            sb.append( TAB );
+            addGoInformation( sb, true, false );
+        }
+        return sb;
+    }
+
+    public static enum PRINT_OPTION {
+        SIMPLE_TAB_DELIMITED, HTML;
+    }
+}
diff --git a/forester/java/src/org/forester/surfacing/PrintableSpeciesSpecificDomainSimilariyData.java b/forester/java/src/org/forester/surfacing/PrintableSpeciesSpecificDomainSimilariyData.java

new file mode 100644 (file)

index 0000000..9c36890
--- /dev/null
+++ b/forester/java/src/org/forester/surfacing/PrintableSpeciesSpecificDomainSimilariyData.java
@@ -0,0 +1,141 @@
+// $Id:
+// 22:09:42 cmzmasek Exp $
+//
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.surfacing;
+
+import java.text.DecimalFormat;
+import java.text.NumberFormat;
+import java.util.Set;
+import java.util.SortedMap;
+import java.util.TreeMap;
+
+import org.forester.util.DescriptiveStatistics;
+
+class PrintableSpeciesSpecificDomainSimilariyData implements SpeciesSpecificDomainSimilariyData {
+
+    private final static NumberFormat   FORMATTER = new DecimalFormat( "0.0E0" );
+    final SortedMap<DomainId, Integer>  _combinable_domain_id_to_count_map;
+    final private int                   _key_domain_proteins_count;
+    final private int                   _key_domain_domains_count;
+    final private int                   _combinable_domains_count;
+    final private DescriptiveStatistics _key_domain_confidence_descriptive_statistics;
+
+    public PrintableSpeciesSpecificDomainSimilariyData( final int key_domain_proteins_count,
+                                                        final int key_domain_domains_count,
+                                                        final int combinable_domains,
+                                                        final DescriptiveStatistics key_domain_confidence_descriptive_statistics ) {
+        _key_domain_proteins_count = key_domain_proteins_count;
+        _key_domain_domains_count = key_domain_domains_count;
+        _combinable_domains_count = combinable_domains;
+        _key_domain_confidence_descriptive_statistics = key_domain_confidence_descriptive_statistics;
+        _combinable_domain_id_to_count_map = new TreeMap<DomainId, Integer>();
+    }
+
+    public void addProteinsExhibitingCombinationCount( final DomainId domain_id, final int count ) {
+        if ( getCombinableDomainIdToCountsMap().containsKey( domain_id ) ) {
+            throw new IllegalArgumentException( "Domain with id " + domain_id + " already exists" );
+        }
+        getCombinableDomainIdToCountsMap().put( domain_id, count );
+    }
+
+    public SortedMap<DomainId, Integer> getCombinableDomainIdToCountsMap() {
+        return _combinable_domain_id_to_count_map;
+    }
+
+    private int getCombinableDomainsCount() {
+        return _combinable_domains_count;
+    }
+
+    private DescriptiveStatistics getKeyDomainConfidenceDescriptiveStatistics() {
+        return _key_domain_confidence_descriptive_statistics;
+    }
+
+    private int getKeyDomainDomainsCount() {
+        return _key_domain_domains_count;
+    }
+
+    private int getKeyDomainProteinsCount() {
+        return _key_domain_proteins_count;
+    }
+
+    public int getNumberOfProteinsExhibitingCombinationWith( final DomainId domain_id ) {
+        if ( !getCombinableDomainIdToCountsMap().containsKey( domain_id ) ) {
+            throw new IllegalArgumentException( "Domain with id " + domain_id + " not found" );
+        }
+        return getCombinableDomainIdToCountsMap().get( domain_id );
+    }
+
+    @Override
+    public String toString() {
+        return toStringBuffer( DomainSimilarityCalculator.Detailedness.LIST_COMBINING_DOMAIN_FOR_EACH_SPECIES, false )
+                .toString();
+    }
+
+    public StringBuffer toStringBuffer( final DomainSimilarityCalculator.Detailedness detailedness, final boolean html ) {
+        final StringBuffer sb = new StringBuffer();
+        if ( detailedness == DomainSimilarityCalculator.Detailedness.PUNCTILIOUS ) {
+            sb.append( " " );
+            sb.append( getKeyDomainDomainsCount() );
+            sb.append( ", " );
+            sb.append( getKeyDomainProteinsCount() );
+            sb.append( ", " );
+            sb.append( getCombinableDomainsCount() );
+            sb.append( ", " );
+            if ( html ) {
+                sb.append( "<i>" );
+            }
+            sb.append( FORMATTER.format( getKeyDomainConfidenceDescriptiveStatistics().arithmeticMean() ) );
+            if ( html ) {
+                sb.append( "</i>" );
+            }
+            if ( !getCombinableDomainIdToCountsMap().isEmpty() ) {
+                sb.append( ":" );
+            }
+        }
+        final Set<DomainId> ids = getCombinableDomainIdToCountsMap().keySet();
+        int i = 0;
+        for( final DomainId domain_id : ids ) {
+            ++i;
+            sb.append( " " );
+            if ( html ) {
+                sb.append( "<a href=\"" + SurfacingConstants.PFAM_FAMILY_ID_LINK + domain_id.getId() + "\">"
+                        + domain_id.getId() + "</a>" );
+            }
+            else {
+                sb.append( domain_id.getId() );
+            }
+            if ( detailedness == DomainSimilarityCalculator.Detailedness.PUNCTILIOUS ) {
+                sb.append( ":" );
+                sb.append( getCombinableDomainIdToCountsMap().get( domain_id ) );
+            }
+            if ( i < ids.size() - 1 ) {
+                sb.append( "," );
+            }
+        }
+        return sb;
+    }
+}
diff --git a/forester/java/src/org/forester/surfacing/Protein.java b/forester/java/src/org/forester/surfacing/Protein.java

new file mode 100644 (file)

index 0000000..ecfd1e8
--- /dev/null
+++ b/forester/java/src/org/forester/surfacing/Protein.java
@@ -0,0 +1,68 @@
+// $Id:
+//
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.surfacing;
+
+import java.util.List;
+
+public interface Protein {
+
+    public void addProteinDomain( final Domain protein_domain );
+
+    /**
+     * If in_nc_order is set to true, this should return true only and only if
+     * the order in List 'domains' and this protein (as determined by the start positions
+     * of the domains of this proteins, _not_ by their index) are the same
+     * (interspersing, 'other', domains in this are ignored). 
+     * If in_nc_order is set to false, this should return true only and only if
+     * this contains all domains listed in 'domains' (order and count do not matter).
+     * 
+     * @param domains a list of domain ids in a certain order.
+     * @param in_nc_order to consider order
+     * @return
+     */
+    public boolean contains( final List<DomainId> domains, final boolean in_nc_order );
+
+    public String getAccession();
+
+    public String getDescription();
+
+    public String getName();
+
+    public int getNumberOfProteinDomains();
+
+    public Domain getProteinDomain( final int index );
+
+    public int getProteinDomainCount( final DomainId domain_id );
+
+    public List<Domain> getProteinDomains();
+
+    public List<Domain> getProteinDomains( final DomainId domain_id );
+
+    public ProteinId getProteinId();
+
+    public Species getSpecies();
+}
\ No newline at end of file
diff --git a/forester/java/src/org/forester/surfacing/ProteinCountsBasedPairwiseDomainSimilarityCalculator.java b/forester/java/src/org/forester/surfacing/ProteinCountsBasedPairwiseDomainSimilarityCalculator.java

new file mode 100644 (file)

index 0000000..d61252c
--- /dev/null
+++ b/forester/java/src/org/forester/surfacing/ProteinCountsBasedPairwiseDomainSimilarityCalculator.java
@@ -0,0 +1,41 @@
+// $Id:
+// 22:05:28 cmzmasek Exp $
+//
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.surfacing;
+
+public class ProteinCountsBasedPairwiseDomainSimilarityCalculator implements PairwiseDomainSimilarityCalculator {
+
+    public PairwiseDomainSimilarity calculateSimilarity( final CombinableDomains domains_1,
+                                                         final CombinableDomains domains_2 ) {
+        if ( !domains_1.getKeyDomain().equals( domains_2.getKeyDomain() ) ) {
+            throw new IllegalArgumentException( "attempt to calculate similarity between domain collection with different keys" );
+        }
+        final int pc1 = domains_1.getKeyDomainProteinsCount();
+        final int pc2 = domains_2.getKeyDomainProteinsCount();
+        return new CountsBasedPairwiseDomainSimilarity( pc1 - pc2, pc1 + pc2 );
+    }
+}
diff --git a/forester/java/src/org/forester/surfacing/ProteinId.java b/forester/java/src/org/forester/surfacing/ProteinId.java

new file mode 100644 (file)

index 0000000..afaf661
--- /dev/null
+++ b/forester/java/src/org/forester/surfacing/ProteinId.java
@@ -0,0 +1,80 @@
+// $Id:
+//
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.surfacing;
+
+import org.forester.util.ForesterUtil;
+
+public class ProteinId implements Comparable<ProteinId> {
+
+    final private String _id;
+
+    public ProteinId( final String id ) {
+        if ( ForesterUtil.isEmpty( id ) ) {
+            throw new IllegalArgumentException( "attempt to create new protein id from empty or null string" );
+        }
+        _id = id.trim();
+    }
+
+    @Override
+    public int compareTo( final ProteinId protein_id ) {
+        if ( this == protein_id ) {
+            return 0;
+        }
+        return getId().toLowerCase().compareTo( protein_id.getId().toLowerCase() );
+    }
+
+    @Override
+    public boolean equals( final Object o ) {
+        if ( this == o ) {
+            return true;
+        }
+        else if ( o == null ) {
+            throw new IllegalArgumentException( "attempt to check protein id equality to null" );
+        }
+        else if ( o.getClass() != this.getClass() ) {
+            throw new IllegalArgumentException( "attempt to check protein id equality to " + o + " [" + o.getClass()
+                    + "]" );
+        }
+        else {
+            return getId().equals( ( ( ProteinId ) o ).getId() );
+        }
+    }
+
+    public String getId() {
+        return _id;
+    }
+
+    @Override
+    public int hashCode() {
+        return getId().hashCode();
+    }
+
+    @Override
+    public String toString() {
+        return getId();
+    }
+}
diff --git a/forester/java/src/org/forester/surfacing/SimpleDomain.java b/forester/java/src/org/forester/surfacing/SimpleDomain.java

new file mode 100644 (file)

index 0000000..76d73bf
--- /dev/null
+++ b/forester/java/src/org/forester/surfacing/SimpleDomain.java
@@ -0,0 +1,122 @@
+// $Id:
+//
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.surfacing;
+
+import org.forester.go.GoId;
+import org.forester.util.ForesterUtil;
+
+/*
+ * A limited implementation of Domain. Its intended use is for when only a
+ * domain identifier is needed. Note intended for general use.
+ */
+public class SimpleDomain implements Domain {
+
+    final private DomainId _id;
+
+    public SimpleDomain( final String id_str ) {
+        if ( ForesterUtil.isEmpty( id_str ) ) {
+            throw new IllegalArgumentException( "attempt to create protein domain with null or empty id" );
+        }
+        _id = new DomainId( id_str );
+    }
+
+    @Override
+    public void addGoId( final GoId go_id ) {
+        throw new RuntimeException( "method not implemented" );
+    }
+
+    public int compareTo( final Domain domain ) {
+        if ( this == domain ) {
+            return 0;
+        }
+        return getDomainId().compareTo( domain.getDomainId() );
+    }
+
+    public DomainId getDomainId() {
+        return _id;
+    }
+
+    public int getFrom() {
+        throw new RuntimeException( "method not implemented" );
+    }
+
+    @Override
+    public GoId getGoId( final int i ) {
+        throw new RuntimeException( "method not implemented" );
+    }
+
+    public int getLength() {
+        throw new RuntimeException( "method not implemented" );
+    }
+
+    public short getNumber() {
+        throw new RuntimeException( "method not implemented" );
+    }
+
+    @Override
+    public int getNumberOfGoIds() {
+        throw new RuntimeException( "method not implemented" );
+    }
+
+    @Override
+    public double getPerDomainEvalue() {
+        throw new RuntimeException( "method not implemented" );
+    }
+
+    @Override
+    public double getPerDomainScore() {
+        throw new RuntimeException( "method not implemented" );
+    }
+
+    public double getPerSequenceEvalue() {
+        throw new RuntimeException( "method not implemented" );
+    }
+
+    public double getPerSequenceScore() {
+        throw new RuntimeException( "method not implemented" );
+    }
+
+    public String getSearchParameter() {
+        throw new RuntimeException( "method not implemented" );
+    }
+
+    public int getTo() {
+        throw new RuntimeException( "method not implemented" );
+    }
+
+    public short getTotalCount() {
+        throw new RuntimeException( "method not implemented" );
+    }
+
+    public boolean isCompleteQueryMatch() {
+        throw new RuntimeException( "method not implemented" );
+    }
+
+    public boolean isCompleteTargetMatch() {
+        throw new RuntimeException( "method not implemented" );
+    }
+}
diff --git a/forester/java/src/org/forester/surfacing/Species.java b/forester/java/src/org/forester/surfacing/Species.java

new file mode 100644 (file)

index 0000000..fb387f6
--- /dev/null
+++ b/forester/java/src/org/forester/surfacing/Species.java
@@ -0,0 +1,32 @@
+// $Id:
+//
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.surfacing;
+
+public interface Species extends Comparable<Species> {
+
+    public abstract String getSpeciesId();
+}
\ No newline at end of file
diff --git a/forester/java/src/org/forester/surfacing/SpeciesSpecificDomainSimilariyData.java b/forester/java/src/org/forester/surfacing/SpeciesSpecificDomainSimilariyData.java

new file mode 100644 (file)

index 0000000..8c5908d
--- /dev/null
+++ b/forester/java/src/org/forester/surfacing/SpeciesSpecificDomainSimilariyData.java
@@ -0,0 +1,50 @@
+// $Id:
+// cmzmasek Exp $
+//
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.surfacing;
+
+import java.util.SortedMap;
+
+/*
+ * A helper class for PrintableDomainSimilarity.
+ */
+interface SpeciesSpecificDomainSimilariyData {
+
+    public void addProteinsExhibitingCombinationCount( final DomainId domain_id, final int count );
+
+    /**
+     * This should return a sorted map mapping domain ids to their corresponding
+     * counts
+     * 
+     * @return a sorted map mapping domain ids to their corresponding counts
+     */
+    public SortedMap<DomainId, Integer> getCombinableDomainIdToCountsMap();
+
+    public int getNumberOfProteinsExhibitingCombinationWith( final DomainId domain_id );
+
+    public StringBuffer toStringBuffer( final DomainSimilarityCalculator.Detailedness detailedness, boolean html );
+}
diff --git a/forester/java/src/org/forester/surfacing/SurfacingConstants.java b/forester/java/src/org/forester/surfacing/SurfacingConstants.java

new file mode 100644 (file)

index 0000000..6bfd208
--- /dev/null
+++ b/forester/java/src/org/forester/surfacing/SurfacingConstants.java
@@ -0,0 +1,48 @@
+// $Id:
+//
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.surfacing;
+
+import org.forester.util.ForesterUtil;
+
+public class SurfacingConstants {
+
+    public static final String GOOGLE_WEB_SEARCH_LINK       = "http://www.google.com/search?q=";
+    public static final String GOOGLE_SCHOLAR_LINK          = "http://scholar.google.com/scholar?q=";
+    public static final String GOOGLE_SCHOLAR_LIMITS        = "&as_subj=bio&as_subj=med&as_subj=chm&num=100";
+    public static final String AMIGO_LINK                   = "http://amigo.geneontology.org/cgi-bin/amigo/go.cgi?view=details&search_constraint=terms&query=";
+    public static final String PFAM_FAMILY_ID_LINK          = "http://pfam.sanger.ac.uk/family?id=";
+    public static final String NL                           = ForesterUtil.LINE_SEPARATOR;
+    public static final String TAXONOMY_LINK                = "http://beta.uniprot.org/taxonomy/?query=";
+    static final boolean       SECONDARY_FEATURES_ARE_SCOP  = true;
+    static final String        SECONDARY_FEATURES_SCOP_LINK = "http://scop.mrc-lmb.cam.ac.uk/scop/search.cgi?key=";
+    public static final String NONE                         = "[none]";
+    public static final String UNIPROT_LINK                 = "http://beta.uniprot.org/taxonomy/?query=";
+    public static final String GO_LINK                      = "http://amigo.geneontology.org/cgi-bin/amigo/go.cgi?view=details&search_constraint=terms&query=";
+    public static final String EOL_LINK                     = "http://www.eol.org/search?q=";
+    public static final String TOL_LINK                     = "http://www.googlesyndicatedsearch.com/u/TreeofLife?q=";
+    public static final String WIKIPEDIA_LINK               = "http://wikipedia.org/wiki/";
+}
diff --git a/forester/java/src/org/forester/surfacing/SurfacingUtil.java b/forester/java/src/org/forester/surfacing/SurfacingUtil.java

new file mode 100644 (file)

index 0000000..119d014
--- /dev/null
+++ b/forester/java/src/org/forester/surfacing/SurfacingUtil.java
@@ -0,0 +1,2414 @@
+// $Id:
+//
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.surfacing;
+
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.Writer;
+import java.text.DecimalFormat;
+import java.text.NumberFormat;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.SortedMap;
+import java.util.SortedSet;
+import java.util.TreeMap;
+import java.util.TreeSet;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.forester.application.surfacing_old;
+import org.forester.evoinference.distance.NeighborJoining;
+import org.forester.evoinference.matrix.character.BasicCharacterStateMatrix;
+import org.forester.evoinference.matrix.character.CharacterStateMatrix;
+import org.forester.evoinference.matrix.character.CharacterStateMatrix.BinaryStates;
+import org.forester.evoinference.matrix.character.CharacterStateMatrix.Format;
+import org.forester.evoinference.matrix.character.CharacterStateMatrix.GainLossStates;
+import org.forester.evoinference.matrix.distance.DistanceMatrix;
+import org.forester.go.GoId;
+import org.forester.go.GoNameSpace;
+import org.forester.go.GoTerm;
+import org.forester.go.GoUtils;
+import org.forester.go.PfamToGoMapping;
+import org.forester.io.parsers.nexus.NexusConstants;
+import org.forester.io.writers.PhylogenyWriter;
+import org.forester.phylogeny.Phylogeny;
+import org.forester.phylogeny.PhylogenyMethods;
+import org.forester.phylogeny.PhylogenyNode;
+import org.forester.phylogeny.data.BinaryCharacters;
+import org.forester.phylogeny.data.Confidence;
+import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
+import org.forester.surfacing.DomainSimilarityCalculator.Detailedness;
+import org.forester.surfacing.DomainSimilarityCalculator.GoAnnotationOutput;
+import org.forester.surfacing.GenomeWideCombinableDomains.GenomeWideCombinableDomainsSortOrder;
+import org.forester.util.AsciiHistogram;
+import org.forester.util.BasicDescriptiveStatistics;
+import org.forester.util.BasicTable;
+import org.forester.util.BasicTableParser;
+import org.forester.util.DescriptiveStatistics;
+import org.forester.util.ForesterUtil;
+
+public final class SurfacingUtil {
+
+    private final static NumberFormat       FORMATTER                        = new DecimalFormat( "0.0E0" );
+    private final static NumberFormat       FORMATTER_3                      = new DecimalFormat( "0.000" );
+    private static final Comparator<Domain> ASCENDING_CONFIDENCE_VALUE_ORDER = new Comparator<Domain>() {
+
+                                                                                 public int compare( final Domain d1,
+                                                                                                     final Domain d2 ) {
+                                                                                     if ( d1.getPerSequenceEvalue() < d2
+                                                                                             .getPerSequenceEvalue() ) {
+                                                                                         return -1;
+                                                                                     }
+                                                                                     else if ( d1
+                                                                                             .getPerSequenceEvalue() > d2
+                                                                                             .getPerSequenceEvalue() ) {
+                                                                                         return 1;
+                                                                                     }
+                                                                                     else {
+                                                                                         return d1.compareTo( d2 );
+                                                                                     }
+                                                                                 }
+                                                                             };
+    public final static Pattern             PATTERN_SP_STYLE_TAXONOMY        = Pattern.compile( "^[A-Z0-9]{3,5}$" );
+
+    private SurfacingUtil() {
+        // Hidden constructor.
+    }
+
+    public static void addAllBinaryDomainCombinationToSet( final GenomeWideCombinableDomains genome,
+                                                           final SortedSet<BinaryDomainCombination> binary_domain_combinations ) {
+        final SortedMap<DomainId, CombinableDomains> all_cd = genome.getAllCombinableDomainsIds();
+        for( final DomainId domain_id : all_cd.keySet() ) {
+            binary_domain_combinations.addAll( all_cd.get( domain_id ).toBinaryDomainCombinations() );
+        }
+    }
+
+    public static void addAllDomainIdsToSet( final GenomeWideCombinableDomains genome,
+                                             final SortedSet<DomainId> domain_ids ) {
+        final SortedSet<DomainId> domains = genome.getAllDomainIds();
+        for( final DomainId domain : domains ) {
+            domain_ids.add( domain );
+        }
+    }
+
+    public static void addHtmlHead( final Writer w, final String title ) throws IOException {
+        w.write( SurfacingConstants.NL );
+        w.write( "<head>" );
+        w.write( "<title>" );
+        w.write( title );
+        w.write( "</title>" );
+        w.write( SurfacingConstants.NL );
+        w.write( "<style>" );
+        w.write( SurfacingConstants.NL );
+        w.write( "a:visited { color : #6633FF; text-decoration : none; }" );
+        w.write( SurfacingConstants.NL );
+        w.write( "a:link { color : #6633FF; text-decoration : none; }" );
+        w.write( SurfacingConstants.NL );
+        w.write( "a:active { color : #99FF00; text-decoration : none; }" );
+        w.write( SurfacingConstants.NL );
+        w.write( "a:hover { color : #FFFFFF; background-color : #99FF00; text-decoration : none; }" );
+        w.write( SurfacingConstants.NL );
+        w.write( "td { text-align: left; vertical-align: top; font-family: Verdana, Arial, Helvetica; font-size: 8pt}" );
+        w.write( SurfacingConstants.NL );
+        w.write( "h1 { color : #0000FF; font-family: Verdana, Arial, Helvetica; font-size: 18pt; font-weight: bold }" );
+        w.write( SurfacingConstants.NL );
+        w.write( "h2 { color : #0000FF; font-family: Verdana, Arial, Helvetica; font-size: 16pt; font-weight: bold }" );
+        w.write( SurfacingConstants.NL );
+        w.write( "</style>" );
+        w.write( SurfacingConstants.NL );
+        w.write( "</head>" );
+        w.write( SurfacingConstants.NL );
+    }
+
+    public static DescriptiveStatistics calculateDescriptiveStatisticsForMeanValues( final Set<DomainSimilarity> similarities ) {
+        final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
+        for( final DomainSimilarity similarity : similarities ) {
+            stats.addValue( similarity.getMeanSimilarityScore() );
+        }
+        return stats;
+    }
+
+    public static int calculateOverlap( final Domain domain, final List<Boolean> covered_positions ) {
+        int overlap_count = 0;
+        for( int i = domain.getFrom(); i <= domain.getTo(); ++i ) {
+            if ( ( i < covered_positions.size() ) && ( covered_positions.get( i ) == true ) ) {
+                ++overlap_count;
+            }
+        }
+        return overlap_count;
+    }
+
+    public static void checkForOutputFileWriteability( final File outfile ) {
+        final String error = ForesterUtil.isWritableFile( outfile );
+        if ( !ForesterUtil.isEmpty( error ) ) {
+            ForesterUtil.fatalError( surfacing_old.PRG_NAME, error );
+        }
+    }
+
+    private static SortedSet<String> collectAllDomainsChangedOnSubtree( final PhylogenyNode subtree_root,
+                                                                        final boolean get_gains ) {
+        final SortedSet<String> domains = new TreeSet<String>();
+        for( final PhylogenyNode descendant : PhylogenyMethods.getAllDescendants( subtree_root ) ) {
+            final BinaryCharacters chars = descendant.getNodeData().getBinaryCharacters();
+            if ( get_gains ) {
+                domains.addAll( chars.getGainedCharacters() );
+            }
+            else {
+                domains.addAll( chars.getLostCharacters() );
+            }
+        }
+        return domains;
+    }
+
+    public static void collectChangedDomainCombinationsFromBinaryStatesMatrixAsListToFile( final CharacterStateMatrix<CharacterStateMatrix.GainLossStates> matrix,
+                                                                                           final BinaryDomainCombination.DomainCombinationType dc_type,
+                                                                                           final List<BinaryDomainCombination> all_binary_domains_combination_gained,
+                                                                                           final boolean get_gains ) {
+        final SortedSet<String> sorted_ids = new TreeSet<String>();
+        for( int i = 0; i < matrix.getNumberOfIdentifiers(); ++i ) {
+            sorted_ids.add( matrix.getIdentifier( i ) );
+        }
+        for( final String id : sorted_ids ) {
+            for( int c = 0; c < matrix.getNumberOfCharacters(); ++c ) {
+                if ( ( get_gains && ( matrix.getState( id, c ) == CharacterStateMatrix.GainLossStates.GAIN ) )
+                        || ( !get_gains && ( matrix.getState( id, c ) == CharacterStateMatrix.GainLossStates.LOSS ) ) ) {
+                    if ( dc_type == BinaryDomainCombination.DomainCombinationType.DIRECTED_ADJACTANT ) {
+                        all_binary_domains_combination_gained.add( AdjactantDirectedBinaryDomainCombination
+                                .createInstance( matrix.getCharacter( c ) ) );
+                    }
+                    else if ( dc_type == BinaryDomainCombination.DomainCombinationType.DIRECTED ) {
+                        all_binary_domains_combination_gained.add( DirectedBinaryDomainCombination
+                                .createInstance( matrix.getCharacter( c ) ) );
+                    }
+                    else {
+                        all_binary_domains_combination_gained.add( BasicBinaryDomainCombination.createInstance( matrix
+                                .getCharacter( c ) ) );
+                    }
+                }
+            }
+        }
+    }
+
+    private static File createBaseDirForPerNodeDomainFiles( final String base_dir,
+                                                            final boolean domain_combinations,
+                                                            final CharacterStateMatrix.GainLossStates state,
+                                                            final String outfile ) {
+        File per_node_go_mapped_domain_gain_loss_files_base_dir = new File( new File( outfile ).getParent()
+                + ForesterUtil.FILE_SEPARATOR + base_dir );
+        if ( !per_node_go_mapped_domain_gain_loss_files_base_dir.exists() ) {
+            per_node_go_mapped_domain_gain_loss_files_base_dir.mkdir();
+        }
+        if ( domain_combinations ) {
+            per_node_go_mapped_domain_gain_loss_files_base_dir = new File( per_node_go_mapped_domain_gain_loss_files_base_dir
+                    + ForesterUtil.FILE_SEPARATOR + "DC" );
+        }
+        else {
+            per_node_go_mapped_domain_gain_loss_files_base_dir = new File( per_node_go_mapped_domain_gain_loss_files_base_dir
+                    + ForesterUtil.FILE_SEPARATOR + "DOMAINS" );
+        }
+        if ( !per_node_go_mapped_domain_gain_loss_files_base_dir.exists() ) {
+            per_node_go_mapped_domain_gain_loss_files_base_dir.mkdir();
+        }
+        if ( state == GainLossStates.GAIN ) {
+            per_node_go_mapped_domain_gain_loss_files_base_dir = new File( per_node_go_mapped_domain_gain_loss_files_base_dir
+                    + ForesterUtil.FILE_SEPARATOR + "GAINS" );
+        }
+        else if ( state == GainLossStates.LOSS ) {
+            per_node_go_mapped_domain_gain_loss_files_base_dir = new File( per_node_go_mapped_domain_gain_loss_files_base_dir
+                    + ForesterUtil.FILE_SEPARATOR + "LOSSES" );
+        }
+        else {
+            per_node_go_mapped_domain_gain_loss_files_base_dir = new File( per_node_go_mapped_domain_gain_loss_files_base_dir
+                    + ForesterUtil.FILE_SEPARATOR + "PRESENT" );
+        }
+        if ( !per_node_go_mapped_domain_gain_loss_files_base_dir.exists() ) {
+            per_node_go_mapped_domain_gain_loss_files_base_dir.mkdir();
+        }
+        return per_node_go_mapped_domain_gain_loss_files_base_dir;
+    }
+
+    public static Map<DomainId, List<GoId>> createDomainIdToGoIdMap( final List<PfamToGoMapping> pfam_to_go_mappings ) {
+        final Map<DomainId, List<GoId>> domain_id_to_go_ids_map = new HashMap<DomainId, List<GoId>>( pfam_to_go_mappings
+                .size() );
+        for( final PfamToGoMapping pfam_to_go : pfam_to_go_mappings ) {
+            if ( !domain_id_to_go_ids_map.containsKey( pfam_to_go.getKey() ) ) {
+                domain_id_to_go_ids_map.put( pfam_to_go.getKey(), new ArrayList<GoId>() );
+            }
+            domain_id_to_go_ids_map.get( pfam_to_go.getKey() ).add( pfam_to_go.getValue() );
+        }
+        return domain_id_to_go_ids_map;
+    }
+
+    public static Map<DomainId, Set<String>> createDomainIdToSecondaryFeaturesMap( final File secondary_features_map_file )
+            throws IOException {
+        final BasicTable<String> primary_table = BasicTableParser.parse( secondary_features_map_file, "\t" );
+        final Map<DomainId, Set<String>> map = new TreeMap<DomainId, Set<String>>();
+        for( int r = 0; r < primary_table.getNumberOfRows(); ++r ) {
+            final DomainId domain_id = new DomainId( primary_table.getValue( 0, r ) );
+            if ( !map.containsKey( domain_id ) ) {
+                map.put( domain_id, new HashSet<String>() );
+            }
+            map.get( domain_id ).add( primary_table.getValue( 1, r ) );
+        }
+        return map;
+    }
+
+    public static Phylogeny createNjTreeBasedOnMatrixToFile( final File nj_tree_outfile, final DistanceMatrix distance ) {
+        checkForOutputFileWriteability( nj_tree_outfile );
+        final NeighborJoining nj = NeighborJoining.createInstance();
+        final Phylogeny phylogeny = nj.execute( distance );
+        phylogeny.setName( nj_tree_outfile.getName() );
+        writePhylogenyToFile( phylogeny, nj_tree_outfile.toString() );
+        return phylogeny;
+    }
+
+    private static SortedSet<BinaryDomainCombination> createSetOfAllBinaryDomainCombinationsPerGenome( final GenomeWideCombinableDomains gwcd ) {
+        final SortedMap<DomainId, CombinableDomains> cds = gwcd.getAllCombinableDomainsIds();
+        final SortedSet<BinaryDomainCombination> binary_combinations = new TreeSet<BinaryDomainCombination>();
+        for( final DomainId domain_id : cds.keySet() ) {
+            final CombinableDomains cd = cds.get( domain_id );
+            binary_combinations.addAll( cd.toBinaryDomainCombinations() );
+        }
+        return binary_combinations;
+    }
+
+    public static void decoratePrintableDomainSimilarities( final SortedSet<DomainSimilarity> domain_similarities,
+                                                            final Detailedness detailedness,
+                                                            final GoAnnotationOutput go_annotation_output,
+                                                            final Map<GoId, GoTerm> go_id_to_term_map,
+                                                            final GoNameSpace go_namespace_limit ) {
+        if ( ( go_namespace_limit != null ) && ( ( go_id_to_term_map == null ) || go_id_to_term_map.isEmpty() ) ) {
+            throw new IllegalArgumentException( "attempt to use a GO namespace limit without a GO id to term map" );
+        }
+        for( final DomainSimilarity domain_similarity : domain_similarities ) {
+            if ( domain_similarity instanceof PrintableDomainSimilarity ) {
+                final PrintableDomainSimilarity printable_domain_similarity = ( PrintableDomainSimilarity ) domain_similarity;
+                printable_domain_similarity.setDetailedness( detailedness );
+                printable_domain_similarity.setGoAnnotationOutput( go_annotation_output );
+                printable_domain_similarity.setGoIdToTermMap( go_id_to_term_map );
+                printable_domain_similarity.setGoNamespaceLimit( go_namespace_limit );
+            }
+        }
+    }
+
+    public static void executeDomainLengthAnalysis( final String[][] input_file_properties,
+                                                    final int number_of_genomes,
+                                                    final DomainLengthsTable domain_lengths_table,
+                                                    final File outfile ) throws IOException {
+        final DecimalFormat df = new DecimalFormat( "#.00" );
+        checkForOutputFileWriteability( outfile );
+        final BufferedWriter out = new BufferedWriter( new FileWriter( outfile ) );
+        out.write( "MEAN BASED STATISTICS PER SPECIES" );
+        out.write( ForesterUtil.LINE_SEPARATOR );
+        out.write( domain_lengths_table.createMeanBasedStatisticsPerSpeciesTable().toString() );
+        out.write( ForesterUtil.LINE_SEPARATOR );
+        out.write( ForesterUtil.LINE_SEPARATOR );
+        final List<DomainLengths> domain_lengths_list = domain_lengths_table.getDomainLengthsList();
+        out.write( "OUTLIER SPECIES PER DOMAIN (Z>=1.5)" );
+        out.write( ForesterUtil.LINE_SEPARATOR );
+        for( final DomainLengths domain_lengths : domain_lengths_list ) {
+            final List<Species> species_list = domain_lengths.getMeanBasedOutlierSpecies( 1.5 );
+            if ( species_list.size() > 0 ) {
+                out.write( domain_lengths.getDomainId() + "\t" );
+                for( final Species species : species_list ) {
+                    out.write( species + "\t" );
+                }
+                out.write( ForesterUtil.LINE_SEPARATOR );
+                // DescriptiveStatistics stats_for_domain = domain_lengths
+                //         .calculateMeanBasedStatistics();
+                //AsciiHistogram histo = new AsciiHistogram( stats_for_domain );
+                //System.out.println( histo.toStringBuffer( 40, '=', 60, 4 ).toString() );
+            }
+        }
+        out.write( ForesterUtil.LINE_SEPARATOR );
+        out.write( ForesterUtil.LINE_SEPARATOR );
+        out.write( "OUTLIER SPECIES (Z 1.0)" );
+        out.write( ForesterUtil.LINE_SEPARATOR );
+        final DescriptiveStatistics stats_for_all_species = domain_lengths_table
+                .calculateMeanBasedStatisticsForAllSpecies();
+        out.write( stats_for_all_species.asSummary() );
+        out.write( ForesterUtil.LINE_SEPARATOR );
+        final AsciiHistogram histo = new AsciiHistogram( stats_for_all_species );
+        out.write( histo.toStringBuffer( 40, '=', 60, 4 ).toString() );
+        out.write( ForesterUtil.LINE_SEPARATOR );
+        final double population_sd = stats_for_all_species.sampleStandardDeviation();
+        final double population_mean = stats_for_all_species.arithmeticMean();
+        for( final Species species : domain_lengths_table.getSpecies() ) {
+            final double x = domain_lengths_table.calculateMeanBasedStatisticsForSpecies( species ).arithmeticMean();
+            final double z = ( x - population_mean ) / population_sd;
+            out.write( species + "\t" + z );
+            out.write( ForesterUtil.LINE_SEPARATOR );
+        }
+        out.write( ForesterUtil.LINE_SEPARATOR );
+        for( final Species species : domain_lengths_table.getSpecies() ) {
+            final DescriptiveStatistics stats_for_species = domain_lengths_table
+                    .calculateMeanBasedStatisticsForSpecies( species );
+            final double x = stats_for_species.arithmeticMean();
+            final double z = ( x - population_mean ) / population_sd;
+            if ( ( z <= -1.0 ) || ( z >= 1.0 ) ) {
+                out.write( species + "\t" + df.format( z ) + "\t" + stats_for_species.asSummary() );
+                out.write( ForesterUtil.LINE_SEPARATOR );
+            }
+        }
+        out.close();
+        //        final List<HistogramData> histogram_datas = new ArrayList<HistogramData>();
+        //        for( int i = 0; i < number_of_genomes; ++i ) {
+        //            final Species species = new BasicSpecies( input_file_properties[ i ][ 0 ] );
+        //            histogram_datas
+        //                    .add( new HistogramData( species.toString(), domain_lengths_table
+        //                            .calculateMeanBasedStatisticsForSpecies( species )
+        //                            .getDataAsDoubleArray(), 5, 600, null, 60 ) );
+        //        }
+        //        final HistogramsFrame hf = new HistogramsFrame( histogram_datas );
+        //        hf.setVisible( true );
+        System.gc();
+    }
+
+    /**
+     * 
+     * @param all_binary_domains_combination_lost_fitch 
+     * @param consider_directedness_and_adjacency_for_bin_combinations 
+     * @param all_binary_domains_combination_gained if null ignored, otherwise this is to list all binary domain combinations
+     * which were gained under unweighted (Fitch) parsimony.
+     */
+    public static void executeParsimonyAnalysis( final long random_number_seed_for_fitch_parsimony,
+                                                 final boolean radomize_fitch_parsimony,
+                                                 final String outfile_name,
+                                                 final DomainParsimonyCalculator domain_parsimony,
+                                                 final Phylogeny phylogeny,
+                                                 final Map<DomainId, List<GoId>> domain_id_to_go_ids_map,
+                                                 final Map<GoId, GoTerm> go_id_to_term_map,
+                                                 final GoNameSpace go_namespace_limit,
+                                                 final String parameters_str,
+                                                 final Map<DomainId, Set<String>>[] domain_id_to_secondary_features_maps,
+                                                 final SortedSet<DomainId> positive_filter,
+                                                 final boolean output_binary_domain_combinations_for_graphs,
+                                                 final List<BinaryDomainCombination> all_binary_domains_combination_gained_fitch,
+                                                 final List<BinaryDomainCombination> all_binary_domains_combination_lost_fitch,
+                                                 final BinaryDomainCombination.DomainCombinationType dc_type ) {
+        final String sep = ForesterUtil.LINE_SEPARATOR + "###################" + ForesterUtil.LINE_SEPARATOR;
+        final String date_time = ForesterUtil.getCurrentDateTime();
+        final SortedSet<String> all_pfams_encountered = new TreeSet<String>();
+        final SortedSet<String> all_pfams_gained_as_domains = new TreeSet<String>();
+        final SortedSet<String> all_pfams_lost_as_domains = new TreeSet<String>();
+        final SortedSet<String> all_pfams_gained_as_dom_combinations = new TreeSet<String>();
+        final SortedSet<String> all_pfams_lost_as_dom_combinations = new TreeSet<String>();
+        writeToNexus( outfile_name, domain_parsimony, phylogeny );
+        // DOLLO DOMAINS
+        // -------------
+        Phylogeny local_phylogeny_l = phylogeny.copy();
+        if ( ( positive_filter != null ) && ( positive_filter.size() > 0 ) ) {
+            domain_parsimony.executeDolloParsimonyOnDomainPresence( positive_filter );
+        }
+        else {
+            domain_parsimony.executeDolloParsimonyOnDomainPresence();
+        }
+        SurfacingUtil.writeMatrixToFile( domain_parsimony.getGainLossMatrix(), outfile_name
+                + surfacing_old.PARSIMONY_OUTPUT_GL_SUFFIX_DOLLO_DOMAINS, Format.FORESTER );
+        SurfacingUtil.writeMatrixToFile( domain_parsimony.getGainLossCountsMatrix(), outfile_name
+                + surfacing_old.PARSIMONY_OUTPUT_GL_COUNTS_SUFFIX_DOLLO_DOMAINS, Format.FORESTER );
+        SurfacingUtil.writeBinaryStatesMatrixAsListToFile( domain_parsimony.getGainLossMatrix(),
+                                                           CharacterStateMatrix.GainLossStates.GAIN,
+                                                           outfile_name + surfacing_old.PARSIMONY_OUTPUT_DOLLO_GAINS_D,
+                                                           sep,
+                                                           ForesterUtil.LINE_SEPARATOR,
+                                                           null );
+        SurfacingUtil
+                .writeBinaryStatesMatrixAsListToFile( domain_parsimony.getGainLossMatrix(),
+                                                      CharacterStateMatrix.GainLossStates.LOSS,
+                                                      outfile_name + surfacing_old.PARSIMONY_OUTPUT_DOLLO_LOSSES_D,
+                                                      sep,
+                                                      ForesterUtil.LINE_SEPARATOR,
+                                                      null );
+        SurfacingUtil.writeBinaryStatesMatrixAsListToFile( domain_parsimony.getGainLossMatrix(), null, outfile_name
+                + surfacing_old.PARSIMONY_OUTPUT_DOLLO_PRESENT_D, sep, ForesterUtil.LINE_SEPARATOR, null );
+        //HTML:
+        writeBinaryStatesMatrixToList( domain_id_to_go_ids_map,
+                                       go_id_to_term_map,
+                                       go_namespace_limit,
+                                       false,
+                                       domain_parsimony.getGainLossMatrix(),
+                                       CharacterStateMatrix.GainLossStates.GAIN,
+                                       outfile_name + surfacing_old.PARSIMONY_OUTPUT_DOLLO_GAINS_HTML_D,
+                                       sep,
+                                       ForesterUtil.LINE_SEPARATOR,
+                                       "Dollo Parsimony | Gains | Domains",
+                                       "+",
+                                       domain_id_to_secondary_features_maps,
+                                       all_pfams_encountered,
+                                       all_pfams_gained_as_domains,
+                                       "_dollo_gains_d" );
+        writeBinaryStatesMatrixToList( domain_id_to_go_ids_map,
+                                       go_id_to_term_map,
+                                       go_namespace_limit,
+                                       false,
+                                       domain_parsimony.getGainLossMatrix(),
+                                       CharacterStateMatrix.GainLossStates.LOSS,
+                                       outfile_name + surfacing_old.PARSIMONY_OUTPUT_DOLLO_LOSSES_HTML_D,
+                                       sep,
+                                       ForesterUtil.LINE_SEPARATOR,
+                                       "Dollo Parsimony | Losses | Domains",
+                                       "-",
+                                       domain_id_to_secondary_features_maps,
+                                       all_pfams_encountered,
+                                       all_pfams_lost_as_domains,
+                                       "_dollo_losses_d" );
+        writeBinaryStatesMatrixToList( domain_id_to_go_ids_map,
+                                       go_id_to_term_map,
+                                       go_namespace_limit,
+                                       false,
+                                       domain_parsimony.getGainLossMatrix(),
+                                       null,
+                                       outfile_name + surfacing_old.PARSIMONY_OUTPUT_DOLLO_PRESENT_HTML_D,
+                                       sep,
+                                       ForesterUtil.LINE_SEPARATOR,
+                                       "Dollo Parsimony | Present | Domains",
+                                       "",
+                                       domain_id_to_secondary_features_maps,
+                                       all_pfams_encountered,
+                                       null,
+                                       "_dollo_present_d" );
+        preparePhylogeny( local_phylogeny_l,
+                          domain_parsimony,
+                          date_time,
+                          "Dollo parsimony on domain presence/absence",
+                          "dollo_on_domains_" + outfile_name,
+                          parameters_str );
+        SurfacingUtil.writePhylogenyToFile( local_phylogeny_l, outfile_name
+                + surfacing_old.DOMAINS_PARSIMONY_TREE_OUTPUT_SUFFIX_DOLLO );
+        try {
+            writeAllDomainsChangedOnAllSubtrees( local_phylogeny_l, true, outfile_name, "_dollo_all_gains_d" );
+            writeAllDomainsChangedOnAllSubtrees( local_phylogeny_l, false, outfile_name, "_dollo_all_losses_d" );
+        }
+        catch ( final IOException e ) {
+            e.printStackTrace();
+            ForesterUtil.fatalError( surfacing_old.PRG_NAME, e.getLocalizedMessage() );
+        }
+        if ( domain_parsimony.calculateNumberOfBinaryDomainCombination() > 0 ) {
+            // FITCH DOMAIN COMBINATIONS
+            // -------------------------
+            local_phylogeny_l = phylogeny.copy();
+            String randomization = "no";
+            if ( radomize_fitch_parsimony ) {
+                domain_parsimony.executeFitchParsimonyOnBinaryDomainCombintion( random_number_seed_for_fitch_parsimony );
+                randomization = "yes, seed = " + random_number_seed_for_fitch_parsimony;
+            }
+            else {
+                domain_parsimony.executeFitchParsimonyOnBinaryDomainCombintion( false );
+            }
+            SurfacingUtil.writeMatrixToFile( domain_parsimony.getGainLossMatrix(), outfile_name
+                    + surfacing_old.PARSIMONY_OUTPUT_GL_SUFFIX_FITCH_BINARY_COMBINATIONS, Format.FORESTER );
+            SurfacingUtil.writeMatrixToFile( domain_parsimony.getGainLossCountsMatrix(), outfile_name
+                    + surfacing_old.PARSIMONY_OUTPUT_GL_COUNTS_SUFFIX_FITCH_BINARY_COMBINATIONS, Format.FORESTER );
+            SurfacingUtil.writeBinaryStatesMatrixAsListToFile( domain_parsimony.getGainLossMatrix(),
+                                                               CharacterStateMatrix.GainLossStates.GAIN,
+                                                               outfile_name
+                                                                       + surfacing_old.PARSIMONY_OUTPUT_FITCH_GAINS_BC,
+                                                               sep,
+                                                               ForesterUtil.LINE_SEPARATOR,
+                                                               null );
+            SurfacingUtil
+                    .writeBinaryStatesMatrixAsListToFile( domain_parsimony.getGainLossMatrix(),
+                                                          CharacterStateMatrix.GainLossStates.LOSS,
+                                                          outfile_name + surfacing_old.PARSIMONY_OUTPUT_FITCH_LOSSES_BC,
+                                                          sep,
+                                                          ForesterUtil.LINE_SEPARATOR,
+                                                          null );
+            SurfacingUtil.writeBinaryStatesMatrixAsListToFile( domain_parsimony.getGainLossMatrix(), null, outfile_name
+                    + surfacing_old.PARSIMONY_OUTPUT_FITCH_PRESENT_BC, sep, ForesterUtil.LINE_SEPARATOR, null );
+            if ( all_binary_domains_combination_gained_fitch != null ) {
+                collectChangedDomainCombinationsFromBinaryStatesMatrixAsListToFile( domain_parsimony
+                        .getGainLossMatrix(), dc_type, all_binary_domains_combination_gained_fitch, true );
+            }
+            if ( all_binary_domains_combination_lost_fitch != null ) {
+                collectChangedDomainCombinationsFromBinaryStatesMatrixAsListToFile( domain_parsimony
+                        .getGainLossMatrix(), dc_type, all_binary_domains_combination_lost_fitch, false );
+            }
+            if ( output_binary_domain_combinations_for_graphs ) {
+                SurfacingUtil
+                        .writeBinaryStatesMatrixAsListToFileForBinaryCombinationsForGraphAnalysis( domain_parsimony
+                                                                                                           .getGainLossMatrix(),
+                                                                                                   null,
+                                                                                                   outfile_name
+                                                                                                           + surfacing_old.PARSIMONY_OUTPUT_FITCH_PRESENT_BC_OUTPUTFILE_SUFFIX_FOR_GRAPH_ANALYSIS,
+                                                                                                   sep,
+                                                                                                   ForesterUtil.LINE_SEPARATOR,
+                                                                                                   BinaryDomainCombination.OutputFormat.DOT );
+            }
+            // HTML:
+            writeBinaryStatesMatrixToList( domain_id_to_go_ids_map,
+                                           go_id_to_term_map,
+                                           go_namespace_limit,
+                                           true,
+                                           domain_parsimony.getGainLossMatrix(),
+                                           CharacterStateMatrix.GainLossStates.GAIN,
+                                           outfile_name + surfacing_old.PARSIMONY_OUTPUT_FITCH_GAINS_HTML_BC,
+                                           sep,
+                                           ForesterUtil.LINE_SEPARATOR,
+                                           "Fitch Parsimony | Gains | Domain Combinations",
+                                           "+",
+                                           null,
+                                           all_pfams_encountered,
+                                           all_pfams_gained_as_dom_combinations,
+                                           "_fitch_gains_dc" );
+            writeBinaryStatesMatrixToList( domain_id_to_go_ids_map,
+                                           go_id_to_term_map,
+                                           go_namespace_limit,
+                                           true,
+                                           domain_parsimony.getGainLossMatrix(),
+                                           CharacterStateMatrix.GainLossStates.LOSS,
+                                           outfile_name + surfacing_old.PARSIMONY_OUTPUT_FITCH_LOSSES_HTML_BC,
+                                           sep,
+                                           ForesterUtil.LINE_SEPARATOR,
+                                           "Fitch Parsimony | Losses | Domain Combinations",
+                                           "-",
+                                           null,
+                                           all_pfams_encountered,
+                                           all_pfams_lost_as_dom_combinations,
+                                           "_fitch_losses_dc" );
+            writeBinaryStatesMatrixToList( domain_id_to_go_ids_map,
+                                           go_id_to_term_map,
+                                           go_namespace_limit,
+                                           true,
+                                           domain_parsimony.getGainLossMatrix(),
+                                           null,
+                                           outfile_name + surfacing_old.PARSIMONY_OUTPUT_FITCH_PRESENT_HTML_BC,
+                                           sep,
+                                           ForesterUtil.LINE_SEPARATOR,
+                                           "Fitch Parsimony | Present | Domain Combinations",
+                                           "",
+                                           null,
+                                           all_pfams_encountered,
+                                           null,
+                                           "_fitch_present_dc" );
+            writeAllEncounteredPfamsToFile( domain_id_to_go_ids_map,
+                                            go_id_to_term_map,
+                                            outfile_name,
+                                            all_pfams_encountered );
+            writePfamsToFile( outfile_name + surfacing_old.ALL_PFAMS_GAINED_AS_DOMAINS_SUFFIX,
+                              all_pfams_gained_as_domains );
+            writePfamsToFile( outfile_name + surfacing_old.ALL_PFAMS_LOST_AS_DOMAINS_SUFFIX, all_pfams_lost_as_domains );
+            writePfamsToFile( outfile_name + surfacing_old.ALL_PFAMS_GAINED_AS_DC_SUFFIX,
+                              all_pfams_gained_as_dom_combinations );
+            writePfamsToFile( outfile_name + surfacing_old.ALL_PFAMS_LOST_AS_DC_SUFFIX,
+                              all_pfams_lost_as_dom_combinations );
+            preparePhylogeny( local_phylogeny_l,
+                              domain_parsimony,
+                              date_time,
+                              "Fitch parsimony on binary domain combination presence/absence randomization: "
+                                      + randomization,
+                              "fitch_on_binary_domain_combinations_" + outfile_name,
+                              parameters_str );
+            SurfacingUtil.writePhylogenyToFile( local_phylogeny_l, outfile_name
+                    + surfacing_old.BINARY_DOMAIN_COMBINATIONS_PARSIMONY_TREE_OUTPUT_SUFFIX_FITCH );
+        }
+    }
+
+    public static void executeParsimonyAnalysisForSecondaryFeatures( final String outfile_name,
+                                                                     final DomainParsimonyCalculator secondary_features_parsimony,
+                                                                     final Phylogeny phylogeny,
+                                                                     final String parameters_str,
+                                                                     final Map<Species, MappingResults> mapping_results_map ) {
+        final String sep = ForesterUtil.LINE_SEPARATOR + "###################" + ForesterUtil.LINE_SEPARATOR;
+        final String date_time = ForesterUtil.getCurrentDateTime();
+        System.out.println();
+        writeToNexus( outfile_name + surfacing_old.NEXUS_SECONDARY_FEATURES, secondary_features_parsimony
+                .createMatrixOfSecondaryFeaturePresenceOrAbsence( null ), phylogeny );
+        final Phylogeny local_phylogeny_copy = phylogeny.copy();
+        secondary_features_parsimony.executeDolloParsimonyOnSecondaryFeatures( mapping_results_map );
+        SurfacingUtil.writeMatrixToFile( secondary_features_parsimony.getGainLossMatrix(), outfile_name
+                + surfacing_old.PARSIMONY_OUTPUT_GL_SUFFIX_DOLLO_SECONDARY_FEATURES, Format.FORESTER );
+        SurfacingUtil.writeMatrixToFile( secondary_features_parsimony.getGainLossCountsMatrix(), outfile_name
+                + surfacing_old.PARSIMONY_OUTPUT_GL_COUNTS_SUFFIX_DOLLO_SECONDARY_FEATURES, Format.FORESTER );
+        SurfacingUtil
+                .writeBinaryStatesMatrixAsListToFile( secondary_features_parsimony.getGainLossMatrix(),
+                                                      CharacterStateMatrix.GainLossStates.GAIN,
+                                                      outfile_name
+                                                              + surfacing_old.PARSIMONY_OUTPUT_DOLLO_GAINS_SECONDARY_FEATURES,
+                                                      sep,
+                                                      ForesterUtil.LINE_SEPARATOR,
+                                                      null );
+        SurfacingUtil
+                .writeBinaryStatesMatrixAsListToFile( secondary_features_parsimony.getGainLossMatrix(),
+                                                      CharacterStateMatrix.GainLossStates.LOSS,
+                                                      outfile_name
+                                                              + surfacing_old.PARSIMONY_OUTPUT_DOLLO_LOSSES_SECONDARY_FEATURES,
+                                                      sep,
+                                                      ForesterUtil.LINE_SEPARATOR,
+                                                      null );
+        SurfacingUtil
+                .writeBinaryStatesMatrixAsListToFile( secondary_features_parsimony.getGainLossMatrix(),
+                                                      null,
+                                                      outfile_name
+                                                              + surfacing_old.PARSIMONY_OUTPUT_DOLLO_PRESENT_SECONDARY_FEATURES,
+                                                      sep,
+                                                      ForesterUtil.LINE_SEPARATOR,
+                                                      null );
+        preparePhylogeny( local_phylogeny_copy,
+                          secondary_features_parsimony,
+                          date_time,
+                          "Dollo parsimony on secondary feature presence/absence",
+                          "dollo_on_secondary_features_" + outfile_name,
+                          parameters_str );
+        SurfacingUtil.writePhylogenyToFile( local_phylogeny_copy, outfile_name
+                + surfacing_old.SECONDARY_FEATURES_PARSIMONY_TREE_OUTPUT_SUFFIX_DOLLO );
+    }
+
+    public static void extractProteinNames( final List<Protein> proteins,
+                                            final List<DomainId> query_domain_ids_nc_order,
+                                            final Writer out,
+                                            final String separator ) throws IOException {
+        for( final Protein protein : proteins ) {
+            if ( protein.contains( query_domain_ids_nc_order, true ) ) {
+                out.write( protein.getSpecies().getSpeciesId() );
+                out.write( separator );
+                out.write( protein.getProteinId().getId() );
+                out.write( separator );
+                out.write( "[" );
+                final Set<DomainId> visited_domain_ids = new HashSet<DomainId>();
+                boolean first = true;
+                for( final Domain domain : protein.getProteinDomains() ) {
+                    if ( !visited_domain_ids.contains( domain.getDomainId() ) ) {
+                        visited_domain_ids.add( domain.getDomainId() );
+                        if ( first ) {
+                            first = false;
+                        }
+                        else {
+                            out.write( " " );
+                        }
+                        out.write( domain.getDomainId().getId() );
+                        out.write( " {" );
+                        out.write( "" + domain.getTotalCount() );
+                        out.write( "}" );
+                    }
+                }
+                out.write( "]" );
+                out.write( separator );
+                if ( !( ForesterUtil.isEmpty( protein.getDescription() ) || protein.getDescription()
+                        .equals( SurfacingConstants.NONE ) ) ) {
+                    out.write( protein.getDescription() );
+                }
+                out.write( separator );
+                if ( !( ForesterUtil.isEmpty( protein.getAccession() ) || protein.getAccession()
+                        .equals( SurfacingConstants.NONE ) ) ) {
+                    out.write( protein.getAccession() );
+                }
+                out.write( SurfacingConstants.NL );
+            }
+        }
+        out.flush();
+    }
+
+    public static void extractProteinNames( final SortedMap<Species, List<Protein>> protein_lists_per_species,
+                                            final DomainId domain_id,
+                                            final Writer out,
+                                            final String separator ) throws IOException {
+        for( final Species species : protein_lists_per_species.keySet() ) {
+            for( final Protein protein : protein_lists_per_species.get( species ) ) {
+                final List<Domain> domains = protein.getProteinDomains( domain_id );
+                if ( domains.size() > 0 ) {
+                    final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
+                    for( final Domain domain : domains ) {
+                        stats.addValue( domain.getPerSequenceEvalue() );
+                    }
+                    out.write( protein.getSpecies().getSpeciesId() );
+                    out.write( separator );
+                    out.write( protein.getProteinId().getId() );
+                    out.write( separator );
+                    out.write( "[" + FORMATTER.format( stats.median() ) + "]" );
+                    out.write( separator );
+                    if ( !( ForesterUtil.isEmpty( protein.getDescription() ) || protein.getDescription()
+                            .equals( SurfacingConstants.NONE ) ) ) {
+                        out.write( protein.getDescription() );
+                    }
+                    out.write( separator );
+                    if ( !( ForesterUtil.isEmpty( protein.getAccession() ) || protein.getAccession()
+                            .equals( SurfacingConstants.NONE ) ) ) {
+                        out.write( protein.getAccession() );
+                    }
+                    out.write( SurfacingConstants.NL );
+                }
+            }
+        }
+        out.flush();
+    }
+
+    public static SortedSet<DomainId> getAllDomainIds( final List<GenomeWideCombinableDomains> gwcd_list ) {
+        final SortedSet<DomainId> all_domains_ids = new TreeSet<DomainId>();
+        for( final GenomeWideCombinableDomains gwcd : gwcd_list ) {
+            final Set<DomainId> all_domains = gwcd.getAllDomainIds();
+            //    for( final Domain domain : all_domains ) {
+            all_domains_ids.addAll( all_domains );
+            //    }
+        }
+        return all_domains_ids;
+    }
+
+    public static SortedMap<String, Integer> getDomainCounts( final List<Protein> protein_domain_collections ) {
+        final SortedMap<String, Integer> map = new TreeMap<String, Integer>();
+        for( final Protein protein_domain_collection : protein_domain_collections ) {
+            for( final Object name : protein_domain_collection.getProteinDomains() ) {
+                final BasicDomain protein_domain = ( BasicDomain ) name;
+                final String id = protein_domain.getDomainId().getId();
+                if ( map.containsKey( id ) ) {
+                    map.put( id, map.get( id ) + 1 );
+                }
+                else {
+                    map.put( id, 1 );
+                }
+            }
+        }
+        return map;
+    }
+
+    public static int getNumberOfNodesLackingName( final Phylogeny p, final StringBuilder names ) {
+        final PhylogenyNodeIterator it = p.iteratorPostorder();
+        int c = 0;
+        while ( it.hasNext() ) {
+            final PhylogenyNode n = it.next();
+            if ( ForesterUtil.isEmpty( n.getName() )
+                    && ( !n.getNodeData().isHasTaxonomy() || ForesterUtil.isEmpty( n.getNodeData().getTaxonomy()
+                            .getScientificName() ) ) ) {
+                if ( n.getParent() != null ) {
+                    names.append( " " );
+                    names.append( n.getParent().getName() );
+                }
+                ++c;
+            }
+        }
+        return c;
+    }
+
+    /**
+     * Returns true is Domain domain falls in an uninterrupted stretch of
+     * covered positions.
+     * 
+     * @param domain
+     * @param covered_positions
+     * @return
+     */
+    public static boolean isEngulfed( final Domain domain, final List<Boolean> covered_positions ) {
+        for( int i = domain.getFrom(); i <= domain.getTo(); ++i ) {
+            if ( ( i >= covered_positions.size() ) || ( covered_positions.get( i ) != true ) ) {
+                return false;
+            }
+        }
+        return true;
+    }
+
+    public static void preparePhylogeny( final Phylogeny p,
+                                         final DomainParsimonyCalculator domain_parsimony,
+                                         final String date_time,
+                                         final String method,
+                                         final String name,
+                                         final String parameters_str ) {
+        domain_parsimony.decoratePhylogenyWithDomains( p );
+        final StringBuilder desc = new StringBuilder();
+        desc.append( "[Method: " + method + "] [Date: " + date_time + "] " );
+        desc.append( "[Cost: " + domain_parsimony.getCost() + "] " );
+        desc.append( "[Gains: " + domain_parsimony.getTotalGains() + "] " );
+        desc.append( "[Losses: " + domain_parsimony.getTotalLosses() + "] " );
+        desc.append( "[Unchanged: " + domain_parsimony.getTotalUnchanged() + "] " );
+        desc.append( "[Parameters: " + parameters_str + "]" );
+        p.setName( name );
+        p.setDescription( desc.toString() );
+        p.setConfidence( new Confidence( domain_parsimony.getCost(), "parsimony" ) );
+        p.setRerootable( false );
+        p.setRooted( true );
+    }
+
+    /**
+     * 
+     * Example regarding engulfment: ------------0.1 ----------0.2 --0.3 =>
+     * domain with 0.3 is ignored
+     * 
+     * -----------0.1 ----------0.2 --0.3 => domain with 0.3 is ignored
+     * 
+     * 
+     * ------------0.1 ----------0.3 --0.2 => domains with 0.3 and 0.2 are _not_
+     * ignored
+     * 
+     * @param max_allowed_overlap
+     *            maximal allowed overlap (inclusive) to be still considered not
+     *            overlapping (zero or negative value to allow any overlap)
+     * @param remove_engulfed_domains
+     *            to remove domains which are completely engulfed by coverage of
+     *            domains with better support
+     * @param protein
+     * @return
+     */
+    public static Protein removeOverlappingDomains( final int max_allowed_overlap,
+                                                    final boolean remove_engulfed_domains,
+                                                    final Protein protein ) {
+        final Protein pruned_protein = new BasicProtein( protein.getProteinId().getId(), protein.getSpecies()
+                .getSpeciesId() );
+        final List<Domain> sorted = SurfacingUtil.sortDomainsWithAscendingConfidenceValues( protein );
+        final List<Boolean> covered_positions = new ArrayList<Boolean>();
+        for( final Domain domain : sorted ) {
+            if ( ( ( max_allowed_overlap < 0 ) || ( SurfacingUtil.calculateOverlap( domain, covered_positions ) <= max_allowed_overlap ) )
+                    && ( !remove_engulfed_domains || !isEngulfed( domain, covered_positions ) ) ) {
+                final int covered_positions_size = covered_positions.size();
+                for( int i = covered_positions_size; i < domain.getFrom(); ++i ) {
+                    covered_positions.add( false );
+                }
+                final int new_covered_positions_size = covered_positions.size();
+                for( int i = domain.getFrom(); i <= domain.getTo(); ++i ) {
+                    if ( i < new_covered_positions_size ) {
+                        covered_positions.set( i, true );
+                    }
+                    else {
+                        covered_positions.add( true );
+                    }
+                }
+                pruned_protein.addProteinDomain( domain );
+            }
+        }
+        return pruned_protein;
+    }
+
+    static List<Domain> sortDomainsWithAscendingConfidenceValues( final Protein protein ) {
+        final List<Domain> domains = new ArrayList<Domain>();
+        for( final Domain d : protein.getProteinDomains() ) {
+            domains.add( d );
+        }
+        Collections.sort( domains, SurfacingUtil.ASCENDING_CONFIDENCE_VALUE_ORDER );
+        return domains;
+    }
+
+    public static void writeAllDomainsChangedOnAllSubtrees( final Phylogeny p,
+                                                            final boolean get_gains,
+                                                            final String outdir,
+                                                            final String suffix_for_filename ) throws IOException {
+        CharacterStateMatrix.GainLossStates state = CharacterStateMatrix.GainLossStates.GAIN;
+        if ( !get_gains ) {
+            state = CharacterStateMatrix.GainLossStates.LOSS;
+        }
+        final File base_dir = createBaseDirForPerNodeDomainFiles( surfacing_old.BASE_DIRECTORY_PER_SUBTREE_DOMAIN_GAIN_LOSS_FILES,
+                                                                  false,
+                                                                  state,
+                                                                  outdir );
+        for( final PhylogenyNodeIterator it = p.iteratorPostorder(); it.hasNext(); ) {
+            final PhylogenyNode node = it.next();
+            if ( !node.isExternal() ) {
+                final SortedSet<String> domains = collectAllDomainsChangedOnSubtree( node, get_gains );
+                if ( domains.size() > 0 ) {
+                    final Writer writer = ForesterUtil.createBufferedWriter( base_dir + ForesterUtil.FILE_SEPARATOR
+                            + node.getName() + suffix_for_filename );
+                    for( final String domain : domains ) {
+                        writer.write( domain );
+                        writer.write( ForesterUtil.LINE_SEPARATOR );
+                    }
+                    writer.close();
+                }
+            }
+        }
+    }
+
+    private static void writeAllEncounteredPfamsToFile( final Map<DomainId, List<GoId>> domain_id_to_go_ids_map,
+                                                        final Map<GoId, GoTerm> go_id_to_term_map,
+                                                        final String outfile_name,
+                                                        final SortedSet<String> all_pfams_encountered ) {
+        final File all_pfams_encountered_file = new File( outfile_name + surfacing_old.ALL_PFAMS_ENCOUNTERED_SUFFIX );
+        final File all_pfams_encountered_with_go_annotation_file = new File( outfile_name
+                + surfacing_old.ALL_PFAMS_ENCOUNTERED_WITH_GO_ANNOTATION_SUFFIX );
+        final File encountered_pfams_summary_file = new File( outfile_name
+                + surfacing_old.ENCOUNTERED_PFAMS_SUMMARY_SUFFIX );
+        int biological_process_counter = 0;
+        int cellular_component_counter = 0;
+        int molecular_function_counter = 0;
+        int pfams_with_mappings_counter = 0;
+        int pfams_without_mappings_counter = 0;
+        int pfams_without_mappings_to_bp_or_mf_counter = 0;
+        int pfams_with_mappings_to_bp_or_mf_counter = 0;
+        try {
+            final Writer all_pfams_encountered_writer = new BufferedWriter( new FileWriter( all_pfams_encountered_file ) );
+            final Writer all_pfams_encountered_with_go_annotation_writer = new BufferedWriter( new FileWriter( all_pfams_encountered_with_go_annotation_file ) );
+            final Writer summary_writer = new BufferedWriter( new FileWriter( encountered_pfams_summary_file ) );
+            summary_writer.write( "# Pfam to GO mapping summary" );
+            summary_writer.write( ForesterUtil.LINE_SEPARATOR );
+            summary_writer.write( "# Actual summary is at the end of this file." );
+            summary_writer.write( ForesterUtil.LINE_SEPARATOR );
+            summary_writer.write( "# Encountered Pfams without a GO mapping:" );
+            summary_writer.write( ForesterUtil.LINE_SEPARATOR );
+            for( final String pfam : all_pfams_encountered ) {
+                all_pfams_encountered_writer.write( pfam );
+                all_pfams_encountered_writer.write( ForesterUtil.LINE_SEPARATOR );
+                final DomainId domain_id = new DomainId( pfam );
+                if ( domain_id_to_go_ids_map.containsKey( domain_id ) ) {
+                    ++pfams_with_mappings_counter;
+                    all_pfams_encountered_with_go_annotation_writer.write( pfam );
+                    all_pfams_encountered_with_go_annotation_writer.write( ForesterUtil.LINE_SEPARATOR );
+                    final List<GoId> go_ids = domain_id_to_go_ids_map.get( domain_id );
+                    boolean maps_to_bp = false;
+                    boolean maps_to_cc = false;
+                    boolean maps_to_mf = false;
+                    for( final GoId go_id : go_ids ) {
+                        final GoTerm go_term = go_id_to_term_map.get( go_id );
+                        if ( go_term.getGoNameSpace().isBiologicalProcess() ) {
+                            maps_to_bp = true;
+                        }
+                        else if ( go_term.getGoNameSpace().isCellularComponent() ) {
+                            maps_to_cc = true;
+                        }
+                        else if ( go_term.getGoNameSpace().isMolecularFunction() ) {
+                            maps_to_mf = true;
+                        }
+                    }
+                    if ( maps_to_bp ) {
+                        ++biological_process_counter;
+                    }
+                    if ( maps_to_cc ) {
+                        ++cellular_component_counter;
+                    }
+                    if ( maps_to_mf ) {
+                        ++molecular_function_counter;
+                    }
+                    if ( maps_to_bp || maps_to_mf ) {
+                        ++pfams_with_mappings_to_bp_or_mf_counter;
+                    }
+                    else {
+                        ++pfams_without_mappings_to_bp_or_mf_counter;
+                    }
+                }
+                else {
+                    ++pfams_without_mappings_to_bp_or_mf_counter;
+                    ++pfams_without_mappings_counter;
+                    summary_writer.write( pfam );
+                    summary_writer.write( ForesterUtil.LINE_SEPARATOR );
+                }
+            }
+            all_pfams_encountered_writer.close();
+            all_pfams_encountered_with_go_annotation_writer.close();
+            ForesterUtil.programMessage( surfacing_old.PRG_NAME, "Wrote all [" + all_pfams_encountered.size()
+                    + "] encountered Pfams to: \"" + all_pfams_encountered_file + "\"" );
+            ForesterUtil.programMessage( surfacing_old.PRG_NAME, "Wrote all [" + pfams_with_mappings_counter
+                    + "] encountered Pfams with GO mappings to: \"" + all_pfams_encountered_with_go_annotation_file
+                    + "\"" );
+            ForesterUtil.programMessage( surfacing_old.PRG_NAME, "Wrote summary (including all ["
+                    + pfams_without_mappings_counter + "] encountered Pfams without GO mappings) to: \""
+                    + encountered_pfams_summary_file + "\"" );
+            ForesterUtil.programMessage( surfacing_old.PRG_NAME, "Sum of Pfams encountered                : "
+                    + all_pfams_encountered.size() );
+            ForesterUtil.programMessage( surfacing_old.PRG_NAME, "Pfams without a mapping                 : "
+                    + pfams_without_mappings_counter + " ["
+                    + ( 100 * pfams_without_mappings_counter / all_pfams_encountered.size() ) + "%]" );
+            ForesterUtil.programMessage( surfacing_old.PRG_NAME, "Pfams without mapping to proc. or func. : "
+                    + pfams_without_mappings_to_bp_or_mf_counter + " ["
+                    + ( 100 * pfams_without_mappings_to_bp_or_mf_counter / all_pfams_encountered.size() ) + "%]" );
+            ForesterUtil.programMessage( surfacing_old.PRG_NAME, "Pfams with a mapping                    : "
+                    + pfams_with_mappings_counter + " ["
+                    + ( 100 * pfams_with_mappings_counter / all_pfams_encountered.size() ) + "%]" );
+            ForesterUtil.programMessage( surfacing_old.PRG_NAME, "Pfams with a mapping to proc. or func.  : "
+                    + pfams_with_mappings_to_bp_or_mf_counter + " ["
+                    + ( 100 * pfams_with_mappings_to_bp_or_mf_counter / all_pfams_encountered.size() ) + "%]" );
+            ForesterUtil.programMessage( surfacing_old.PRG_NAME, "Pfams with mapping to biological process: "
+                    + biological_process_counter + " ["
+                    + ( 100 * biological_process_counter / all_pfams_encountered.size() ) + "%]" );
+            ForesterUtil.programMessage( surfacing_old.PRG_NAME, "Pfams with mapping to molecular function: "
+                    + molecular_function_counter + " ["
+                    + ( 100 * molecular_function_counter / all_pfams_encountered.size() ) + "%]" );
+            ForesterUtil.programMessage( surfacing_old.PRG_NAME, "Pfams with mapping to cellular component: "
+                    + cellular_component_counter + " ["
+                    + ( 100 * cellular_component_counter / all_pfams_encountered.size() ) + "%]" );
+            summary_writer.write( ForesterUtil.LINE_SEPARATOR );
+            summary_writer.write( "# Sum of Pfams encountered                : " + all_pfams_encountered.size() );
+            summary_writer.write( ForesterUtil.LINE_SEPARATOR );
+            summary_writer.write( "# Pfams without a mapping                 : " + pfams_without_mappings_counter
+                    + " [" + ( 100 * pfams_without_mappings_counter / all_pfams_encountered.size() ) + "%]" );
+            summary_writer.write( ForesterUtil.LINE_SEPARATOR );
+            summary_writer.write( "# Pfams without mapping to proc. or func. : "
+                    + pfams_without_mappings_to_bp_or_mf_counter + " ["
+                    + ( 100 * pfams_without_mappings_to_bp_or_mf_counter / all_pfams_encountered.size() ) + "%]" );
+            summary_writer.write( ForesterUtil.LINE_SEPARATOR );
+            summary_writer.write( "# Pfams with a mapping                    : " + pfams_with_mappings_counter + " ["
+                    + ( 100 * pfams_with_mappings_counter / all_pfams_encountered.size() ) + "%]" );
+            summary_writer.write( ForesterUtil.LINE_SEPARATOR );
+            summary_writer.write( "# Pfams with a mapping to proc. or func.  : "
+                    + pfams_with_mappings_to_bp_or_mf_counter + " ["
+                    + ( 100 * pfams_with_mappings_to_bp_or_mf_counter / all_pfams_encountered.size() ) + "%]" );
+            summary_writer.write( ForesterUtil.LINE_SEPARATOR );
+            summary_writer.write( "# Pfams with mapping to biological process: " + biological_process_counter + " ["
+                    + ( 100 * biological_process_counter / all_pfams_encountered.size() ) + "%]" );
+            summary_writer.write( ForesterUtil.LINE_SEPARATOR );
+            summary_writer.write( "# Pfams with mapping to molecular function: " + molecular_function_counter + " ["
+                    + ( 100 * molecular_function_counter / all_pfams_encountered.size() ) + "%]" );
+            summary_writer.write( ForesterUtil.LINE_SEPARATOR );
+            summary_writer.write( "# Pfams with mapping to cellular component: " + cellular_component_counter + " ["
+                    + ( 100 * cellular_component_counter / all_pfams_encountered.size() ) + "%]" );
+            summary_writer.write( ForesterUtil.LINE_SEPARATOR );
+            summary_writer.close();
+        }
+        catch ( final IOException e ) {
+            ForesterUtil.printWarningMessage( surfacing_old.PRG_NAME, "Failure to write: " + e );
+        }
+    }
+
+    public static void writeBinaryDomainCombinationsFileForGraphAnalysis( final String[][] input_file_properties,
+                                                                          final File output_dir,
+                                                                          final GenomeWideCombinableDomains gwcd,
+                                                                          final int i,
+                                                                          final GenomeWideCombinableDomainsSortOrder dc_sort_order ) {
+        File dc_outfile_dot = new File( input_file_properties[ i ][ 0 ]
+                + surfacing_old.DOMAIN_COMBINITONS_OUTPUTFILE_SUFFIX_FOR_GRAPH_ANALYSIS );
+        if ( output_dir != null ) {
+            dc_outfile_dot = new File( output_dir + ForesterUtil.FILE_SEPARATOR + dc_outfile_dot );
+        }
+        checkForOutputFileWriteability( dc_outfile_dot );
+        final SortedSet<BinaryDomainCombination> binary_combinations = createSetOfAllBinaryDomainCombinationsPerGenome( gwcd );
+        try {
+            final BufferedWriter out_dot = new BufferedWriter( new FileWriter( dc_outfile_dot ) );
+            for( final BinaryDomainCombination bdc : binary_combinations ) {
+                out_dot.write( bdc.toGraphDescribingLanguage( BinaryDomainCombination.OutputFormat.DOT, null, null )
+                        .toString() );
+                out_dot.write( SurfacingConstants.NL );
+            }
+            out_dot.close();
+        }
+        catch ( final IOException e ) {
+            ForesterUtil.fatalError( surfacing_old.PRG_NAME, e.getMessage() );
+        }
+        ForesterUtil.programMessage( surfacing_old.PRG_NAME, "Wrote binary domain combination for \""
+                + input_file_properties[ i ][ 0 ] + "\" (" + input_file_properties[ i ][ 1 ] + ", "
+                + input_file_properties[ i ][ 2 ] + ") to: \"" + dc_outfile_dot + "\"" );
+    }
+
+    /*
+     * species | protein id | n-terminal domain | c-terminal domain | n-terminal domain per domain E-value | c-terminal domain per domain E-value
+     * 
+     * 
+     */
+    static public StringBuffer proteinToDomainCombinations( final Protein protein,
+                                                            final String protein_id,
+                                                            final String separator ) {
+        final StringBuffer sb = new StringBuffer();
+        if ( protein.getSpecies() == null ) {
+            throw new IllegalArgumentException( "species must not be null" );
+        }
+        if ( ForesterUtil.isEmpty( protein.getSpecies().getSpeciesId() ) ) {
+            throw new IllegalArgumentException( "species id must not be empty" );
+        }
+        final List<Domain> domains = protein.getProteinDomains();
+        if ( domains.size() > 1 ) {
+            final Map<String, Integer> counts = new HashMap<String, Integer>();
+            for( final Domain domain : domains ) {
+                final String id = domain.getDomainId().getId();
+                if ( counts.containsKey( id ) ) {
+                    counts.put( id, counts.get( id ) + 1 );
+                }
+                else {
+                    counts.put( id, 1 );
+                }
+            }
+            for( int i = 1; i < domains.size(); ++i ) {
+                for( int j = 0; j < i; ++j ) {
+                    Domain domain_n = domains.get( i );
+                    Domain domain_c = domains.get( j );
+                    if ( domain_n.getFrom() > domain_c.getFrom() ) {
+                        domain_n = domains.get( j );
+                        domain_c = domains.get( i );
+                    }
+                    sb.append( protein.getSpecies() );
+                    sb.append( separator );
+                    sb.append( protein_id );
+                    sb.append( separator );
+                    sb.append( domain_n.getDomainId().getId() );
+                    sb.append( separator );
+                    sb.append( domain_c.getDomainId().getId() );
+                    sb.append( separator );
+                    sb.append( domain_n.getPerDomainEvalue() );
+                    sb.append( separator );
+                    sb.append( domain_c.getPerDomainEvalue() );
+                    sb.append( separator );
+                    sb.append( counts.get( domain_n.getDomainId().getId() ) );
+                    sb.append( separator );
+                    sb.append( counts.get( domain_c.getDomainId().getId() ) );
+                    sb.append( ForesterUtil.LINE_SEPARATOR );
+                }
+            }
+        }
+        else if ( domains.size() == 1 ) {
+            sb.append( protein.getSpecies() );
+            sb.append( separator );
+            sb.append( protein_id );
+            sb.append( separator );
+            sb.append( domains.get( 0 ).getDomainId().getId() );
+            sb.append( separator );
+            sb.append( separator );
+            sb.append( domains.get( 0 ).getPerDomainEvalue() );
+            sb.append( separator );
+            sb.append( separator );
+            sb.append( 1 );
+            sb.append( separator );
+            sb.append( ForesterUtil.LINE_SEPARATOR );
+        }
+        else {
+            sb.append( protein.getSpecies() );
+            sb.append( separator );
+            sb.append( protein_id );
+            sb.append( separator );
+            sb.append( separator );
+            sb.append( separator );
+            sb.append( separator );
+            sb.append( separator );
+            sb.append( separator );
+            sb.append( ForesterUtil.LINE_SEPARATOR );
+        }
+        return sb;
+    }
+
+    public static void writeBinaryStatesMatrixAsListToFile( final CharacterStateMatrix<CharacterStateMatrix.GainLossStates> matrix,
+                                                            final CharacterStateMatrix.GainLossStates state,
+                                                            final String filename,
+                                                            final String indentifier_characters_separator,
+                                                            final String character_separator,
+                                                            final Map<String, String> descriptions ) {
+        final File outfile = new File( filename );
+        checkForOutputFileWriteability( outfile );
+        final SortedSet<String> sorted_ids = new TreeSet<String>();
+        for( int i = 0; i < matrix.getNumberOfIdentifiers(); ++i ) {
+            sorted_ids.add( matrix.getIdentifier( i ) );
+        }
+        try {
+            final BufferedWriter out = new BufferedWriter( new FileWriter( outfile ) );
+            for( final String id : sorted_ids ) {
+                out.write( indentifier_characters_separator );
+                out.write( "#" + id );
+                out.write( indentifier_characters_separator );
+                for( int c = 0; c < matrix.getNumberOfCharacters(); ++c ) {
+                    // Not nice:
+                    // using null to indicate either UNCHANGED_PRESENT or GAIN.
+                    if ( ( matrix.getState( id, c ) == state )
+                            || ( ( state == null ) && ( ( matrix.getState( id, c ) == CharacterStateMatrix.GainLossStates.GAIN ) || ( matrix
+                                    .getState( id, c ) == CharacterStateMatrix.GainLossStates.UNCHANGED_PRESENT ) ) ) ) {
+                        out.write( matrix.getCharacter( c ) );
+                        if ( ( descriptions != null ) && !descriptions.isEmpty()
+                                && descriptions.containsKey( matrix.getCharacter( c ) ) ) {
+                            out.write( "\t" );
+                            out.write( descriptions.get( matrix.getCharacter( c ) ) );
+                        }
+                        out.write( character_separator );
+                    }
+                }
+            }
+            out.flush();
+            out.close();
+        }
+        catch ( final IOException e ) {
+            ForesterUtil.fatalError( surfacing_old.PRG_NAME, e.getMessage() );
+        }
+        ForesterUtil.programMessage( surfacing_old.PRG_NAME, "Wrote characters list: \"" + filename + "\"" );
+    }
+
+    public static void writeBinaryStatesMatrixAsListToFileForBinaryCombinationsForGraphAnalysis( final CharacterStateMatrix<CharacterStateMatrix.GainLossStates> matrix,
+                                                                                                 final CharacterStateMatrix.GainLossStates state,
+                                                                                                 final String filename,
+                                                                                                 final String indentifier_characters_separator,
+                                                                                                 final String character_separator,
+                                                                                                 final BinaryDomainCombination.OutputFormat bc_output_format ) {
+        final File outfile = new File( filename );
+        checkForOutputFileWriteability( outfile );
+        final SortedSet<String> sorted_ids = new TreeSet<String>();
+        for( int i = 0; i < matrix.getNumberOfIdentifiers(); ++i ) {
+            sorted_ids.add( matrix.getIdentifier( i ) );
+        }
+        try {
+            final BufferedWriter out = new BufferedWriter( new FileWriter( outfile ) );
+            for( final String id : sorted_ids ) {
+                out.write( indentifier_characters_separator );
+                out.write( "#" + id );
+                out.write( indentifier_characters_separator );
+                for( int c = 0; c < matrix.getNumberOfCharacters(); ++c ) {
+                    // Not nice:
+                    // using null to indicate either UNCHANGED_PRESENT or GAIN.
+                    if ( ( matrix.getState( id, c ) == state )
+                            || ( ( state == null ) && ( ( matrix.getState( id, c ) == CharacterStateMatrix.GainLossStates.GAIN ) || ( matrix
+                                    .getState( id, c ) == CharacterStateMatrix.GainLossStates.UNCHANGED_PRESENT ) ) ) ) {
+                        BinaryDomainCombination bdc = null;
+                        try {
+                            bdc = BasicBinaryDomainCombination.createInstance( matrix.getCharacter( c ) );
+                        }
+                        catch ( final Exception e ) {
+                            ForesterUtil.fatalError( surfacing_old.PRG_NAME, e.getLocalizedMessage() );
+                        }
+                        out.write( bdc.toGraphDescribingLanguage( bc_output_format, null, null ).toString() );
+                        out.write( character_separator );
+                    }
+                }
+            }
+            out.flush();
+            out.close();
+        }
+        catch ( final IOException e ) {
+            ForesterUtil.fatalError( surfacing_old.PRG_NAME, e.getMessage() );
+        }
+        ForesterUtil.programMessage( surfacing_old.PRG_NAME, "Wrote characters list: \"" + filename + "\"" );
+    }
+
+    public static void writeBinaryStatesMatrixToList( final Map<DomainId, List<GoId>> domain_id_to_go_ids_map,
+                                                      final Map<GoId, GoTerm> go_id_to_term_map,
+                                                      final GoNameSpace go_namespace_limit,
+                                                      final boolean domain_combinations,
+                                                      final CharacterStateMatrix<CharacterStateMatrix.GainLossStates> matrix,
+                                                      final CharacterStateMatrix.GainLossStates state,
+                                                      final String filename,
+                                                      final String indentifier_characters_separator,
+                                                      final String character_separator,
+                                                      final String title_for_html,
+                                                      final String prefix_for_html,
+                                                      final Map<DomainId, Set<String>>[] domain_id_to_secondary_features_maps,
+                                                      final SortedSet<String> all_pfams_encountered,
+                                                      final SortedSet<String> pfams_gained_or_lost,
+                                                      final String suffix_for_per_node_events_file ) {
+        if ( ( go_namespace_limit != null ) && ( ( go_id_to_term_map == null ) || ( go_id_to_term_map.size() < 1 ) ) ) {
+            throw new IllegalArgumentException( "attempt to use GO namespace limit without a GO-id to term map" );
+        }
+        else if ( ( ( domain_id_to_go_ids_map == null ) || ( domain_id_to_go_ids_map.size() < 1 ) ) ) {
+            throw new IllegalArgumentException( "attempt to output detailed HTML without a Pfam to GO map" );
+        }
+        else if ( ( ( go_id_to_term_map == null ) || ( go_id_to_term_map.size() < 1 ) ) ) {
+            throw new IllegalArgumentException( "attempt to output detailed HTML without a GO-id to term map" );
+        }
+        final File outfile = new File( filename );
+        checkForOutputFileWriteability( outfile );
+        final SortedSet<String> sorted_ids = new TreeSet<String>();
+        for( int i = 0; i < matrix.getNumberOfIdentifiers(); ++i ) {
+            sorted_ids.add( matrix.getIdentifier( i ) );
+        }
+        try {
+            final Writer out = new BufferedWriter( new FileWriter( outfile ) );
+            final File per_node_go_mapped_domain_gain_loss_files_base_dir = createBaseDirForPerNodeDomainFiles( surfacing_old.BASE_DIRECTORY_PER_NODE_DOMAIN_GAIN_LOSS_FILES,
+                                                                                                                domain_combinations,
+                                                                                                                state,
+                                                                                                                filename );
+            Writer per_node_go_mapped_domain_gain_loss_outfile_writer = null;
+            File per_node_go_mapped_domain_gain_loss_outfile = null;
+            int per_node_counter = 0;
+            out.write( "<html>" );
+            out.write( SurfacingConstants.NL );
+            addHtmlHead( out, title_for_html );
+            out.write( SurfacingConstants.NL );
+            out.write( "<body>" );
+            out.write( SurfacingConstants.NL );
+            out.write( "<h1>" );
+            out.write( SurfacingConstants.NL );
+            out.write( title_for_html );
+            out.write( SurfacingConstants.NL );
+            out.write( "</h1>" );
+            out.write( SurfacingConstants.NL );
+            out.write( "<table>" );
+            out.write( SurfacingConstants.NL );
+            for( final String id : sorted_ids ) {
+                final Matcher matcher = PATTERN_SP_STYLE_TAXONOMY.matcher( id );
+                if ( matcher.matches() ) {
+                    continue;
+                }
+                out.write( "<tr>" );
+                out.write( "<td>" );
+                out.write( "<a href=\"#" + id + "\">" + id + "</a>" );
+                out.write( "</td>" );
+                out.write( "</tr>" );
+                out.write( SurfacingConstants.NL );
+            }
+            out.write( "</table>" );
+            out.write( SurfacingConstants.NL );
+            for( final String id : sorted_ids ) {
+                final Matcher matcher = PATTERN_SP_STYLE_TAXONOMY.matcher( id );
+                if ( matcher.matches() ) {
+                    continue;
+                }
+                out.write( SurfacingConstants.NL );
+                out.write( "<h2>" );
+                out.write( "<a name=\"" + id + "\">" + id + "</a>" );
+                writeTaxonomyLinks( out, id );
+                out.write( "</h2>" );
+                out.write( SurfacingConstants.NL );
+                out.write( "<table>" );
+                out.write( SurfacingConstants.NL );
+                out.write( "<tr>" );
+                out.write( "<td><b>" );
+                out.write( "Pfam domain(s)" );
+                out.write( "</b></td><td><b>" );
+                out.write( "GO term acc" );
+                out.write( "</b></td><td><b>" );
+                out.write( "GO term" );
+                out.write( "</b></td><td><b>" );
+                out.write( "GO namespace" );
+                out.write( "</b></td>" );
+                out.write( "</tr>" );
+                out.write( SurfacingConstants.NL );
+                out.write( "</tr>" );
+                out.write( SurfacingConstants.NL );
+                per_node_counter = 0;
+                if ( matrix.getNumberOfCharacters() > 0 ) {
+                    per_node_go_mapped_domain_gain_loss_outfile = new File( per_node_go_mapped_domain_gain_loss_files_base_dir
+                            + ForesterUtil.FILE_SEPARATOR + id + suffix_for_per_node_events_file );
+                    SurfacingUtil.checkForOutputFileWriteability( per_node_go_mapped_domain_gain_loss_outfile );
+                    per_node_go_mapped_domain_gain_loss_outfile_writer = ForesterUtil
+                            .createBufferedWriter( per_node_go_mapped_domain_gain_loss_outfile );
+                }
+                else {
+                    per_node_go_mapped_domain_gain_loss_outfile = null;
+                    per_node_go_mapped_domain_gain_loss_outfile_writer = null;
+                }
+                for( int c = 0; c < matrix.getNumberOfCharacters(); ++c ) {
+                    // Not nice:
+                    // using null to indicate either UNCHANGED_PRESENT or GAIN.
+                    if ( ( matrix.getState( id, c ) == state )
+                            || ( ( state == null ) && ( ( matrix.getState( id, c ) == CharacterStateMatrix.GainLossStates.UNCHANGED_PRESENT ) || ( matrix
+                                    .getState( id, c ) == CharacterStateMatrix.GainLossStates.GAIN ) ) ) ) {
+                        final String character = matrix.getCharacter( c );
+                        String domain_0 = "";
+                        String domain_1 = "";
+                        if ( character.indexOf( BinaryDomainCombination.SEPARATOR ) > 0 ) {
+                            final String[] s = character.split( BinaryDomainCombination.SEPARATOR );
+                            if ( s.length != 2 ) {
+                                throw new AssertionError( "this should not have happened: unexpected format for domain combination: ["
+                                        + character + "]" );
+                            }
+                            domain_0 = s[ 0 ];
+                            domain_1 = s[ 1 ];
+                        }
+                        else {
+                            domain_0 = character;
+                        }
+                        writeDomainData( domain_id_to_go_ids_map,
+                                         go_id_to_term_map,
+                                         go_namespace_limit,
+                                         out,
+                                         domain_0,
+                                         domain_1,
+                                         prefix_for_html,
+                                         character_separator,
+                                         domain_id_to_secondary_features_maps,
+                                         null );
+                        all_pfams_encountered.add( domain_0 );
+                        if ( pfams_gained_or_lost != null ) {
+                            pfams_gained_or_lost.add( domain_0 );
+                        }
+                        if ( !ForesterUtil.isEmpty( domain_1 ) ) {
+                            all_pfams_encountered.add( domain_1 );
+                            if ( pfams_gained_or_lost != null ) {
+                                pfams_gained_or_lost.add( domain_1 );
+                            }
+                        }
+                        if ( per_node_go_mapped_domain_gain_loss_outfile_writer != null ) {
+                            writeDomainsToIndividualFilePerTreeNode( per_node_go_mapped_domain_gain_loss_outfile_writer,
+                                                                     domain_0,
+                                                                     domain_1 );
+                            per_node_counter++;
+                        }
+                    }
+                }
+                if ( per_node_go_mapped_domain_gain_loss_outfile_writer != null ) {
+                    per_node_go_mapped_domain_gain_loss_outfile_writer.close();
+                    if ( per_node_counter < 1 ) {
+                        per_node_go_mapped_domain_gain_loss_outfile.delete();
+                    }
+                    per_node_counter = 0;
+                }
+                out.write( "</table>" );
+                out.write( SurfacingConstants.NL );
+                out.write( "<hr>" );
+                out.write( SurfacingConstants.NL );
+            } // for( final String id : sorted_ids ) {  
+            out.write( "</body>" );
+            out.write( SurfacingConstants.NL );
+            out.write( "</html>" );
+            out.write( SurfacingConstants.NL );
+            out.flush();
+            out.close();
+        }
+        catch ( final IOException e ) {
+            ForesterUtil.fatalError( surfacing_old.PRG_NAME, e.getMessage() );
+        }
+        ForesterUtil.programMessage( surfacing_old.PRG_NAME, "Wrote characters detailed HTML list: \"" + filename
+                + "\"" );
+    }
+
+    public static void writeBinaryStatesMatrixToListORIGIG( final Map<DomainId, List<GoId>> domain_id_to_go_ids_map,
+                                                            final Map<GoId, GoTerm> go_id_to_term_map,
+                                                            final GoNameSpace go_namespace_limit,
+                                                            final boolean domain_combinations,
+                                                            final CharacterStateMatrix<CharacterStateMatrix.GainLossStates> matrix,
+                                                            final CharacterStateMatrix.GainLossStates state,
+                                                            final String filename,
+                                                            final String indentifier_characters_separator,
+                                                            final String character_separator,
+                                                            final String title_for_html,
+                                                            final String prefix_for_html,
+                                                            final Map<DomainId, Set<String>>[] domain_id_to_secondary_features_maps,
+                                                            final SortedSet<String> all_pfams_encountered,
+                                                            final SortedSet<String> pfams_gained_or_lost,
+                                                            final String suffix_for_per_node_events_file ) {
+        if ( ( go_namespace_limit != null ) && ( ( go_id_to_term_map == null ) || ( go_id_to_term_map.size() < 1 ) ) ) {
+            throw new IllegalArgumentException( "attempt to use GO namespace limit without a GO-id to term map" );
+        }
+        else if ( ( ( domain_id_to_go_ids_map == null ) || ( domain_id_to_go_ids_map.size() < 1 ) ) ) {
+            throw new IllegalArgumentException( "attempt to output detailed HTML without a Pfam to GO map" );
+        }
+        else if ( ( ( go_id_to_term_map == null ) || ( go_id_to_term_map.size() < 1 ) ) ) {
+            throw new IllegalArgumentException( "attempt to output detailed HTML without a GO-id to term map" );
+        }
+        final File outfile = new File( filename );
+        checkForOutputFileWriteability( outfile );
+        final SortedSet<String> sorted_ids = new TreeSet<String>();
+        for( int i = 0; i < matrix.getNumberOfIdentifiers(); ++i ) {
+            sorted_ids.add( matrix.getIdentifier( i ) );
+        }
+        try {
+            final Writer out = new BufferedWriter( new FileWriter( outfile ) );
+            final File per_node_go_mapped_domain_gain_loss_files_base_dir = createBaseDirForPerNodeDomainFiles( surfacing_old.BASE_DIRECTORY_PER_NODE_DOMAIN_GAIN_LOSS_FILES,
+                                                                                                                domain_combinations,
+                                                                                                                state,
+                                                                                                                filename );
+            Writer per_node_go_mapped_domain_gain_loss_outfile_writer = null;
+            File per_node_go_mapped_domain_gain_loss_outfile = null;
+            int per_node_counter = 0;
+            out.write( "<html>" );
+            out.write( SurfacingConstants.NL );
+            addHtmlHead( out, title_for_html );
+            out.write( SurfacingConstants.NL );
+            out.write( "<body>" );
+            out.write( SurfacingConstants.NL );
+            out.write( "<h1>" );
+            out.write( SurfacingConstants.NL );
+            out.write( title_for_html );
+            out.write( SurfacingConstants.NL );
+            out.write( "</h1>" );
+            out.write( SurfacingConstants.NL );
+            out.write( "<table>" );
+            out.write( SurfacingConstants.NL );
+            for( final String id : sorted_ids ) {
+                out.write( "<tr>" );
+                out.write( "<td>" );
+                out.write( "<a href=\"#" + id + "\">" + id + "</a>" );
+                writeTaxonomyLinks( out, id );
+                out.write( "</td>" );
+                out.write( "</tr>" );
+                out.write( SurfacingConstants.NL );
+            }
+            out.write( "</table>" );
+            out.write( SurfacingConstants.NL );
+            for( final String id : sorted_ids ) {
+                out.write( SurfacingConstants.NL );
+                out.write( "<h2>" );
+                out.write( "<a name=\"" + id + "\">" + id + "</a>" );
+                writeTaxonomyLinks( out, id );
+                out.write( "</h2>" );
+                out.write( SurfacingConstants.NL );
+                out.write( "<table>" );
+                out.write( SurfacingConstants.NL );
+                out.write( "<tr>" );
+                out.write( "<td><b>" );
+                out.write( "Pfam domain(s)" );
+                out.write( "</b></td><td><b>" );
+                out.write( "GO term acc" );
+                out.write( "</b></td><td><b>" );
+                out.write( "GO term" );
+                out.write( "</b></td><td><b>" );
+                out.write( "Penultimate GO term" );
+                out.write( "</b></td><td><b>" );
+                out.write( "GO namespace" );
+                out.write( "</b></td>" );
+                out.write( "</tr>" );
+                out.write( SurfacingConstants.NL );
+                out.write( "</tr>" );
+                out.write( SurfacingConstants.NL );
+                per_node_counter = 0;
+                if ( matrix.getNumberOfCharacters() > 0 ) {
+                    per_node_go_mapped_domain_gain_loss_outfile = new File( per_node_go_mapped_domain_gain_loss_files_base_dir
+                            + ForesterUtil.FILE_SEPARATOR + id + suffix_for_per_node_events_file );
+                    SurfacingUtil.checkForOutputFileWriteability( per_node_go_mapped_domain_gain_loss_outfile );
+                    per_node_go_mapped_domain_gain_loss_outfile_writer = ForesterUtil
+                            .createBufferedWriter( per_node_go_mapped_domain_gain_loss_outfile );
+                }
+                else {
+                    per_node_go_mapped_domain_gain_loss_outfile = null;
+                    per_node_go_mapped_domain_gain_loss_outfile_writer = null;
+                }
+                for( int c = 0; c < matrix.getNumberOfCharacters(); ++c ) {
+                    // Not nice:
+                    // using null to indicate either UNCHANGED_PRESENT or GAIN.
+                    if ( ( matrix.getState( id, c ) == state )
+                            || ( ( state == null ) && ( ( matrix.getState( id, c ) == CharacterStateMatrix.GainLossStates.UNCHANGED_PRESENT ) || ( matrix
+                                    .getState( id, c ) == CharacterStateMatrix.GainLossStates.GAIN ) ) ) ) {
+                        final String character = matrix.getCharacter( c );
+                        String domain_0 = "";
+                        String domain_1 = "";
+                        if ( character.indexOf( BinaryDomainCombination.SEPARATOR ) > 0 ) {
+                            final String[] s = character.split( BinaryDomainCombination.SEPARATOR );
+                            if ( s.length != 2 ) {
+                                throw new AssertionError( "this should not have happened: unexpected format for domain combination: ["
+                                        + character + "]" );
+                            }
+                            domain_0 = s[ 0 ];
+                            domain_1 = s[ 1 ];
+                        }
+                        else {
+                            domain_0 = character;
+                        }
+                        writeDomainData( domain_id_to_go_ids_map,
+                                         go_id_to_term_map,
+                                         go_namespace_limit,
+                                         out,
+                                         domain_0,
+                                         domain_1,
+                                         prefix_for_html,
+                                         character_separator,
+                                         domain_id_to_secondary_features_maps,
+                                         null );
+                        all_pfams_encountered.add( domain_0 );
+                        if ( pfams_gained_or_lost != null ) {
+                            pfams_gained_or_lost.add( domain_0 );
+                        }
+                        if ( !ForesterUtil.isEmpty( domain_1 ) ) {
+                            all_pfams_encountered.add( domain_1 );
+                            if ( pfams_gained_or_lost != null ) {
+                                pfams_gained_or_lost.add( domain_1 );
+                            }
+                        }
+                        if ( per_node_go_mapped_domain_gain_loss_outfile_writer != null ) {
+                            writeDomainsToIndividualFilePerTreeNode( per_node_go_mapped_domain_gain_loss_outfile_writer,
+                                                                     domain_0,
+                                                                     domain_1 );
+                            per_node_counter++;
+                        }
+                    }
+                }
+                if ( per_node_go_mapped_domain_gain_loss_outfile_writer != null ) {
+                    per_node_go_mapped_domain_gain_loss_outfile_writer.close();
+                    if ( per_node_counter < 1 ) {
+                        per_node_go_mapped_domain_gain_loss_outfile.delete();
+                    }
+                    per_node_counter = 0;
+                }
+                out.write( "</table>" );
+                out.write( SurfacingConstants.NL );
+                out.write( "<hr>" );
+                out.write( SurfacingConstants.NL );
+            } // for( final String id : sorted_ids ) {  
+            out.write( "</body>" );
+            out.write( SurfacingConstants.NL );
+            out.write( "</html>" );
+            out.write( SurfacingConstants.NL );
+            out.flush();
+            out.close();
+        }
+        catch ( final IOException e ) {
+            ForesterUtil.fatalError( surfacing_old.PRG_NAME, e.getMessage() );
+        }
+        ForesterUtil.programMessage( surfacing_old.PRG_NAME, "Wrote characters detailed HTML list: \"" + filename
+                + "\"" );
+    }
+
+    public static void writeDomainCombinationsCountsFile( final String[][] input_file_properties,
+                                                          final File output_dir,
+                                                          final Writer per_genome_domain_promiscuity_statistics_writer,
+                                                          final GenomeWideCombinableDomains gwcd,
+                                                          final int i,
+                                                          final GenomeWideCombinableDomains.GenomeWideCombinableDomainsSortOrder dc_sort_order ) {
+        File dc_outfile = new File( input_file_properties[ i ][ 0 ]
+                + surfacing_old.DOMAIN_COMBINITON_COUNTS_OUTPUTFILE_SUFFIX );
+        if ( output_dir != null ) {
+            dc_outfile = new File( output_dir + ForesterUtil.FILE_SEPARATOR + dc_outfile );
+        }
+        checkForOutputFileWriteability( dc_outfile );
+        try {
+            final BufferedWriter out = new BufferedWriter( new FileWriter( dc_outfile ) );
+            out.write( gwcd.toStringBuilder( dc_sort_order ).toString() );
+            out.close();
+        }
+        catch ( final IOException e ) {
+            ForesterUtil.fatalError( surfacing_old.PRG_NAME, e.getMessage() );
+        }
+        final DescriptiveStatistics stats = gwcd.getPerGenomeDomainPromiscuityStatistics();
+        try {
+            per_genome_domain_promiscuity_statistics_writer.write( input_file_properties[ i ][ 0 ] + "\t" );
+            per_genome_domain_promiscuity_statistics_writer.write( FORMATTER_3.format( stats.arithmeticMean() ) + "\t" );
+            if ( stats.getN() < 2 ) {
+                per_genome_domain_promiscuity_statistics_writer.write( "n/a" + "\t" );
+            }
+            else {
+                per_genome_domain_promiscuity_statistics_writer.write( FORMATTER_3.format( stats
+                        .sampleStandardDeviation() )
+                        + "\t" );
+            }
+            per_genome_domain_promiscuity_statistics_writer.write( FORMATTER_3.format( stats.median() ) + "\t" );
+            per_genome_domain_promiscuity_statistics_writer.write( ( int ) stats.getMin() + "\t" );
+            per_genome_domain_promiscuity_statistics_writer.write( ( int ) stats.getMax() + "\t" );
+            per_genome_domain_promiscuity_statistics_writer.write( stats.getN() + "\t" );
+            final SortedSet<DomainId> mpds = gwcd.getMostPromiscuosDomain();
+            for( final DomainId mpd : mpds ) {
+                per_genome_domain_promiscuity_statistics_writer.write( mpd.getId() + " " );
+            }
+            per_genome_domain_promiscuity_statistics_writer.write( ForesterUtil.LINE_SEPARATOR );
+        }
+        catch ( final IOException e ) {
+            ForesterUtil.fatalError( surfacing_old.PRG_NAME, e.getMessage() );
+        }
+        if ( input_file_properties[ i ].length == 3 ) {
+            ForesterUtil.programMessage( surfacing_old.PRG_NAME, "Wrote domain combination counts for \""
+                    + input_file_properties[ i ][ 0 ] + "\" (" + input_file_properties[ i ][ 1 ] + ", "
+                    + input_file_properties[ i ][ 2 ] + ") to: \"" + dc_outfile + "\"" );
+        }
+        else {
+            ForesterUtil.programMessage( surfacing_old.PRG_NAME, "Wrote domain combination counts for \""
+                    + input_file_properties[ i ][ 0 ] + "\" (" + input_file_properties[ i ][ 1 ] + ") to: \""
+                    + dc_outfile + "\"" );
+        }
+    }
+
+    private static void writeDomainData( final Map<DomainId, List<GoId>> domain_id_to_go_ids_map,
+                                         final Map<GoId, GoTerm> go_id_to_term_map,
+                                         final GoNameSpace go_namespace_limit,
+                                         final Writer out,
+                                         final String domain_0,
+                                         final String domain_1,
+                                         final String prefix_for_html,
+                                         final String character_separator_for_non_html_output,
+                                         final Map<DomainId, Set<String>>[] domain_id_to_secondary_features_maps,
+                                         final Set<GoId> all_go_ids ) throws IOException {
+        boolean any_go_annotation_present = false;
+        boolean first_has_no_go = false;
+        int domain_count = 2; // To distinguish between domains and binary domain combinations.
+        if ( ForesterUtil.isEmpty( domain_1 ) ) {
+            domain_count = 1;
+        }
+        // The following has a difficult to understand logic.  
+        for( int d = 0; d < domain_count; ++d ) {
+            List<GoId> go_ids = null;
+            boolean go_annotation_present = false;
+            if ( d == 0 ) {
+                final DomainId domain_id = new DomainId( domain_0 );
+                if ( domain_id_to_go_ids_map.containsKey( domain_id ) ) {
+                    go_annotation_present = true;
+                    any_go_annotation_present = true;
+                    go_ids = domain_id_to_go_ids_map.get( domain_id );
+                }
+                else {
+                    first_has_no_go = true;
+                }
+            }
+            else {
+                final DomainId domain_id = new DomainId( domain_1 );
+                if ( domain_id_to_go_ids_map.containsKey( domain_id ) ) {
+                    go_annotation_present = true;
+                    any_go_annotation_present = true;
+                    go_ids = domain_id_to_go_ids_map.get( domain_id );
+                }
+            }
+            if ( go_annotation_present ) {
+                boolean first = ( ( d == 0 ) || ( ( d == 1 ) && first_has_no_go ) );
+                for( final GoId go_id : go_ids ) {
+                    out.write( "<tr>" );
+                    if ( first ) {
+                        first = false;
+                        writeDomainIdsToHtml( out,
+                                              domain_0,
+                                              domain_1,
+                                              prefix_for_html,
+                                              domain_id_to_secondary_features_maps );
+                    }
+                    else {
+                        out.write( "<td></td>" );
+                    }
+                    if ( !go_id_to_term_map.containsKey( go_id ) ) {
+                        throw new IllegalArgumentException( "GO-id [" + go_id + "] not found in GO-id to GO-term map" );
+                    }
+                    final GoTerm go_term = go_id_to_term_map.get( go_id );
+                    if ( ( go_namespace_limit == null ) || go_namespace_limit.equals( go_term.getGoNameSpace() ) ) {
+                        // final String top = GoUtils.getPenultimateGoTerm( go_term, go_id_to_term_map ).getName();
+                        final String go_id_str = go_id.getId();
+                        out.write( "<td>" );
+                        out.write( "<a href=\"" + SurfacingConstants.AMIGO_LINK + go_id_str
+                                + "\" target=\"amigo_window\">" + go_id_str + "</a>" );
+                        out.write( "</td><td>" );
+                        out.write( go_term.getName() );
+                        if ( domain_count == 2 ) {
+                            out.write( " (" + d + ")" );
+                        }
+                        out.write( "</td><td>" );
+                        // out.write( top );
+                        // out.write( "</td><td>" );
+                        out.write( "[" );
+                        out.write( go_term.getGoNameSpace().toShortString() );
+                        out.write( "]" );
+                        out.write( "</td>" );
+                        if ( all_go_ids != null ) {
+                            all_go_ids.add( go_id );
+                        }
+                    }
+                    else {
+                        out.write( "<td>" );
+                        out.write( "</td><td>" );
+                        out.write( "</td><td>" );
+                        out.write( "</td><td>" );
+                        out.write( "</td>" );
+                    }
+                    out.write( "</tr>" );
+                    out.write( SurfacingConstants.NL );
+                }
+            }
+        } //  for( int d = 0; d < domain_count; ++d ) 
+        if ( !any_go_annotation_present ) {
+            out.write( "<tr>" );
+            writeDomainIdsToHtml( out, domain_0, domain_1, prefix_for_html, domain_id_to_secondary_features_maps );
+            out.write( "<td>" );
+            out.write( "</td><td>" );
+            out.write( "</td><td>" );
+            out.write( "</td><td>" );
+            out.write( "</td>" );
+            out.write( "</tr>" );
+            out.write( SurfacingConstants.NL );
+        }
+    }
+
+    private static void writeDomainDataORIG( final Map<DomainId, List<GoId>> domain_id_to_go_ids_map,
+                                             final Map<GoId, GoTerm> go_id_to_term_map,
+                                             final GoNameSpace go_namespace_limit,
+                                             final Writer out,
+                                             final String domain_0,
+                                             final String domain_1,
+                                             final String prefix_for_html,
+                                             final String character_separator_for_non_html_output,
+                                             final Map<DomainId, Set<String>>[] domain_id_to_secondary_features_maps,
+                                             final Set<GoId> all_go_ids ) throws IOException {
+        boolean any_go_annotation_present = false;
+        boolean first_has_no_go = false;
+        int domain_count = 2; // To distinguish between domains and binary domain combinations.
+        if ( ForesterUtil.isEmpty( domain_1 ) ) {
+            domain_count = 1;
+        }
+        // The following has a difficult to understand logic.  
+        for( int d = 0; d < domain_count; ++d ) {
+            List<GoId> go_ids = null;
+            boolean go_annotation_present = false;
+            if ( d == 0 ) {
+                final DomainId domain_id = new DomainId( domain_0 );
+                if ( domain_id_to_go_ids_map.containsKey( domain_id ) ) {
+                    go_annotation_present = true;
+                    any_go_annotation_present = true;
+                    go_ids = domain_id_to_go_ids_map.get( domain_id );
+                }
+                else {
+                    first_has_no_go = true;
+                }
+            }
+            else {
+                final DomainId domain_id = new DomainId( domain_1 );
+                if ( domain_id_to_go_ids_map.containsKey( domain_id ) ) {
+                    go_annotation_present = true;
+                    any_go_annotation_present = true;
+                    go_ids = domain_id_to_go_ids_map.get( domain_id );
+                }
+            }
+            if ( go_annotation_present ) {
+                boolean first = ( ( d == 0 ) || ( ( d == 1 ) && first_has_no_go ) );
+                for( final GoId go_id : go_ids ) {
+                    out.write( "<tr>" );
+                    if ( first ) {
+                        first = false;
+                        writeDomainIdsToHtml( out,
+                                              domain_0,
+                                              domain_1,
+                                              prefix_for_html,
+                                              domain_id_to_secondary_features_maps );
+                    }
+                    else {
+                        out.write( "<td></td>" );
+                    }
+                    if ( !go_id_to_term_map.containsKey( go_id ) ) {
+                        throw new IllegalArgumentException( "GO-id [" + go_id + "] not found in GO-id to GO-term map" );
+                    }
+                    final GoTerm go_term = go_id_to_term_map.get( go_id );
+                    if ( ( go_namespace_limit == null ) || go_namespace_limit.equals( go_term.getGoNameSpace() ) ) {
+                        final String top = GoUtils.getPenultimateGoTerm( go_term, go_id_to_term_map ).getName();
+                        final String go_id_str = go_id.getId();
+                        out.write( "<td>" );
+                        out.write( "<a href=\"" + SurfacingConstants.AMIGO_LINK + go_id_str
+                                + "\" target=\"amigo_window\">" + go_id_str + "</a>" );
+                        out.write( "</td><td>" );
+                        out.write( go_term.getName() );
+                        if ( domain_count == 2 ) {
+                            out.write( " (" + d + ")" );
+                        }
+                        out.write( "</td><td>" );
+                        out.write( top );
+                        out.write( "</td><td>" );
+                        out.write( "[" );
+                        out.write( go_term.getGoNameSpace().toShortString() );
+                        out.write( "]" );
+                        out.write( "</td>" );
+                        if ( all_go_ids != null ) {
+                            all_go_ids.add( go_id );
+                        }
+                    }
+                    else {
+                        out.write( "<td>" );
+                        out.write( "</td><td>" );
+                        out.write( "</td><td>" );
+                        out.write( "</td><td>" );
+                        out.write( "</td>" );
+                    }
+                    out.write( "</tr>" );
+                    out.write( SurfacingConstants.NL );
+                }
+            }
+        } //  for( int d = 0; d < domain_count; ++d ) 
+        if ( !any_go_annotation_present ) {
+            out.write( "<tr>" );
+            writeDomainIdsToHtml( out, domain_0, domain_1, prefix_for_html, domain_id_to_secondary_features_maps );
+            out.write( "<td>" );
+            out.write( "</td><td>" );
+            out.write( "</td><td>" );
+            out.write( "</td><td>" );
+            out.write( "</td>" );
+            out.write( "</tr>" );
+            out.write( SurfacingConstants.NL );
+        }
+    }
+
+    private static void writeDomainIdsToHtml( final Writer out,
+                                              final String domain_0,
+                                              final String domain_1,
+                                              final String prefix_for_detailed_html,
+                                              final Map<DomainId, Set<String>>[] domain_id_to_secondary_features_maps )
+            throws IOException {
+        out.write( "<td>" );
+        if ( !ForesterUtil.isEmpty( prefix_for_detailed_html ) ) {
+            out.write( prefix_for_detailed_html );
+            out.write( " " );
+        }
+        out.write( "<a href=\"" + SurfacingConstants.PFAM_FAMILY_ID_LINK + domain_0 + "\">" + domain_0 + "</a>" );
+        //if ( ForesterUtil.isEmpty( domain_1 ) ) {
+        //    out.write( " <a href=\"" + SurfacingConstants.GOOGLE_SCHOLAR_LINK + domain_0
+        //            + SurfacingConstants.GOOGLE_SCHOLAR_LIMITS + "\">[gs]</a>" );
+        //}
+        // if ( !ForesterUtil.isEmpty( domain_1 ) ) {
+        //     out.write( "=" );
+        //    out.write( "<a href=\"" + SurfacingConstants.PFAM_FAMILY_ID_LINK + domain_1 + "\">" + domain_1 + "</a>" );
+        //}
+        //        else if ( ( domain_id_to_secondary_features_maps != null )
+        //                && ( domain_id_to_secondary_features_maps.length > 0 ) ) {
+        //            out.write( " [" );
+        //            boolean first = true;
+        //            for( final Map<DomainId, Set<String>> domain_id_to_secondary_features_map : domain_id_to_secondary_features_maps ) {
+        //                final Set<String> sec_features = domain_id_to_secondary_features_map.get( new DomainId( domain_0 ) );
+        //                if ( ( sec_features != null ) && ( sec_features.size() > 0 ) ) {
+        //                    for( final String sec_feature : sec_features ) {
+        //                        if ( first ) {
+        //                            first = false;
+        //                        }
+        //                        else {
+        //                            out.write( ", " );
+        //                        }
+        //                        if ( SurfacingConstants.SECONDARY_FEATURES_ARE_SCOP
+        //                                && ( SurfacingConstants.SECONDARY_FEATURES_SCOP_LINK != null ) ) {
+        //                            out.write( "<a href=\"" + SurfacingConstants.SECONDARY_FEATURES_SCOP_LINK + sec_feature
+        //                                    + "\" target=\"scop_window\">" + sec_feature + "</a>" );
+        //                        }
+        //                        else {
+        //                            out.write( sec_feature );
+        //                        }
+        //                    }
+        //                }
+        //            }
+        //            out.write( "]" );
+        //        }
+        out.write( "</td>" );
+    }
+
+    private static void writeDomainIdsToHtmlORIG( final Writer out,
+                                                  final String domain_0,
+                                                  final String domain_1,
+                                                  final String prefix_for_detailed_html,
+                                                  final Map<DomainId, Set<String>>[] domain_id_to_secondary_features_maps )
+            throws IOException {
+        out.write( "<td>" );
+        if ( !ForesterUtil.isEmpty( prefix_for_detailed_html ) ) {
+            out.write( prefix_for_detailed_html );
+            out.write( " " );
+        }
+        out.write( "<a href=\"" + SurfacingConstants.PFAM_FAMILY_ID_LINK + domain_0 + "\">" + domain_0 + "</a>" );
+        if ( ForesterUtil.isEmpty( domain_1 ) ) {
+            out.write( " <a href=\"" + SurfacingConstants.GOOGLE_SCHOLAR_LINK + domain_0
+                    + SurfacingConstants.GOOGLE_SCHOLAR_LIMITS + "\">[gs]</a>" );
+        }
+        if ( !ForesterUtil.isEmpty( domain_1 ) ) {
+            out.write( "=" );
+            out.write( "<a href=\"" + SurfacingConstants.PFAM_FAMILY_ID_LINK + domain_1 + "\">" + domain_1 + "</a>" );
+        }
+        else if ( ( domain_id_to_secondary_features_maps != null )
+                && ( domain_id_to_secondary_features_maps.length > 0 ) ) {
+            out.write( " [" );
+            boolean first = true;
+            for( final Map<DomainId, Set<String>> domain_id_to_secondary_features_map : domain_id_to_secondary_features_maps ) {
+                final Set<String> sec_features = domain_id_to_secondary_features_map.get( new DomainId( domain_0 ) );
+                if ( ( sec_features != null ) && ( sec_features.size() > 0 ) ) {
+                    for( final String sec_feature : sec_features ) {
+                        if ( first ) {
+                            first = false;
+                        }
+                        else {
+                            out.write( ", " );
+                        }
+                        if ( SurfacingConstants.SECONDARY_FEATURES_ARE_SCOP
+                                && ( SurfacingConstants.SECONDARY_FEATURES_SCOP_LINK != null ) ) {
+                            out.write( "<a href=\"" + SurfacingConstants.SECONDARY_FEATURES_SCOP_LINK + sec_feature
+                                    + "\" target=\"scop_window\">" + sec_feature + "</a>" );
+                        }
+                        else {
+                            out.write( sec_feature );
+                        }
+                    }
+                }
+            }
+            out.write( "]" );
+        }
+        out.write( "</td>" );
+    }
+
+    public static DescriptiveStatistics writeDomainSimilaritiesToFile( final StringBuilder html_desc,
+                                                                       final StringBuilder html_title,
+                                                                       final Writer w,
+                                                                       final SortedSet<DomainSimilarity> similarities,
+                                                                       final boolean treat_as_binary,
+                                                                       final List<Species> species_order,
+                                                                       final PrintableDomainSimilarity.PRINT_OPTION print_option,
+                                                                       final DomainSimilarity.DomainSimilaritySortField sort_field,
+                                                                       final DomainSimilarity.DomainSimilarityScoring scoring,
+                                                                       final boolean verbose ) throws IOException {
+        final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
+        String histogram_title = null;
+        switch ( sort_field ) {
+            case ABS_MAX_COUNTS_DIFFERENCE:
+                if ( treat_as_binary ) {
+                    histogram_title = "absolute counts difference:";
+                }
+                else {
+                    histogram_title = "absolute (maximal) counts difference:";
+                }
+                break;
+            case MAX_COUNTS_DIFFERENCE:
+                if ( treat_as_binary ) {
+                    histogram_title = "counts difference:";
+                }
+                else {
+                    histogram_title = "(maximal) counts difference:";
+                }
+                break;
+            case DOMAIN_ID:
+                histogram_title = "score mean:";
+                break;
+            case MIN:
+                histogram_title = "score minimum:";
+                break;
+            case MAX:
+                histogram_title = "score maximum:";
+                break;
+            case MAX_DIFFERENCE:
+                if ( treat_as_binary ) {
+                    histogram_title = "difference:";
+                }
+                else {
+                    histogram_title = "(maximal) difference:";
+                }
+                break;
+            case MEAN:
+                histogram_title = "score mean:";
+                break;
+            case SD:
+                histogram_title = "score standard deviation:";
+                break;
+            case SPECIES_COUNT:
+                histogram_title = "species number:";
+                break;
+            default:
+                throw new AssertionError( "Unknown sort field: " + sort_field );
+        }
+        for( final DomainSimilarity similarity : similarities ) {
+            switch ( sort_field ) {
+                case ABS_MAX_COUNTS_DIFFERENCE:
+                    stats.addValue( Math.abs( similarity.getMaximalDifferenceInCounts() ) );
+                    break;
+                case MAX_COUNTS_DIFFERENCE:
+                    stats.addValue( similarity.getMaximalDifferenceInCounts() );
+                    break;
+                case DOMAIN_ID:
+                    stats.addValue( similarity.getMeanSimilarityScore() );
+                    break;
+                case MIN:
+                    stats.addValue( similarity.getMinimalSimilarityScore() );
+                    break;
+                case MAX:
+                    stats.addValue( similarity.getMaximalSimilarityScore() );
+                    break;
+                case MAX_DIFFERENCE:
+                    stats.addValue( similarity.getMaximalDifference() );
+                    break;
+                case MEAN:
+                    stats.addValue( similarity.getMeanSimilarityScore() );
+                    break;
+                case SD:
+                    stats.addValue( similarity.getStandardDeviationOfSimilarityScore() );
+                    break;
+                case SPECIES_COUNT:
+                    stats.addValue( similarity.getSpecies().size() );
+                    break;
+                default:
+                    throw new AssertionError( "Unknown sort field: " + sort_field );
+            }
+        }
+        //
+        // final HistogramData[] hists = new HistogramData[ 1 ];
+        //      
+        //        
+        // List<HistogramDataItem> data_items = new
+        // ArrayList<HistogramDataItem>();
+        // double[] values = stats.getDataAsDoubleArray();
+        // for( int i = 0; i < values.length; i++ ) {
+        // HistogramDataItem data_item = new BasicHistogramDataItem( "", values[
+        // i ] );
+        // data_items.add( data_item );
+        // }
+        //        
+        //        
+        // HistogramData hd0 = new HistogramData( "name",
+        // data_items,
+        // null, 20,
+        // 40 );
+        //        
+        //        
+        //        
+        //        
+        // hists[ 0 ] = hd0;
+        //       
+        // final HistogramsFrame hf = new HistogramsFrame( hists );
+        // hf.setVisible( true );
+        //
+        AsciiHistogram histo = null;
+        if ( stats.getMin() < stats.getMin() ) {
+            histo = new AsciiHistogram( stats, histogram_title );
+        }
+        if ( verbose ) {
+            if ( histo != null ) {
+                System.out.println( histo.toStringBuffer( 20, '|', 40, 5 ) );
+            }
+            System.out.println();
+            System.out.println( "N                   : " + stats.getN() );
+            System.out.println( "Min                 : " + stats.getMin() );
+            System.out.println( "Max                 : " + stats.getMax() );
+            System.out.println( "Mean                : " + stats.arithmeticMean() );
+            if ( stats.getN() > 1 ) {
+                System.out.println( "SD                  : " + stats.sampleStandardDeviation() );
+            }
+            else {
+                System.out.println( "SD                  : n/a" );
+            }
+            System.out.println( "Median              : " + stats.median() );
+            if ( stats.getN() > 1 ) {
+                System.out.println( "Pearsonian skewness : " + stats.pearsonianSkewness() );
+            }
+            else {
+                System.out.println( "Pearsonian skewness : n/a" );
+            }
+        }
+        switch ( print_option ) {
+            case SIMPLE_TAB_DELIMITED:
+                break;
+            case HTML:
+                w.write( "<html>" );
+                w.write( SurfacingConstants.NL );
+                addHtmlHead( w, "SURFACING :: " + html_title );
+                w.write( SurfacingConstants.NL );
+                w.write( "<body>" );
+                w.write( SurfacingConstants.NL );
+                w.write( html_desc.toString() );
+                w.write( SurfacingConstants.NL );
+                w.write( "<hr>" );
+                w.write( "<br>" );
+                w.write( SurfacingConstants.NL );
+                w.write( "<tt><pre>" );
+                w.write( SurfacingConstants.NL );
+                if ( histo != null ) {
+                    w.write( histo.toStringBuffer( 20, '|', 40, 5 ).toString() );
+                    w.write( SurfacingConstants.NL );
+                }
+                w.write( "</pre></tt>" );
+                w.write( SurfacingConstants.NL );
+                w.write( "<table>" );
+                w.write( SurfacingConstants.NL );
+                w.write( "<tr><td>N: </td><td>" + stats.getN() + "</td></tr>" );
+                w.write( SurfacingConstants.NL );
+                w.write( "<tr><td>Min: </td><td>" + stats.getMin() + "</td></tr>" );
+                w.write( SurfacingConstants.NL );
+                w.write( "<tr><td>Max: </td><td>" + stats.getMax() + "</td></tr>" );
+                w.write( SurfacingConstants.NL );
+                w.write( "<tr><td>Mean: </td><td>" + stats.arithmeticMean() + "</td></tr>" );
+                w.write( SurfacingConstants.NL );
+                if ( stats.getN() > 1 ) {
+                    w.write( "<tr><td>SD: </td><td>" + stats.sampleStandardDeviation() + "</td></tr>" );
+                }
+                else {
+                    w.write( "<tr><td>SD: </td><td>n/a</td></tr>" );
+                }
+                w.write( SurfacingConstants.NL );
+                w.write( "<tr><td>Median: </td><td>" + stats.median() + "</td></tr>" );
+                w.write( SurfacingConstants.NL );
+                if ( stats.getN() > 1 ) {
+                    w.write( "<tr><td>Pearsonian skewness: </td><td>" + stats.pearsonianSkewness() + "</td></tr>" );
+                }
+                else {
+                    w.write( "<tr><td>Pearsonian skewness: </td><td>n/a</td></tr>" );
+                }
+                w.write( SurfacingConstants.NL );
+                w.write( "</table>" );
+                w.write( SurfacingConstants.NL );
+                w.write( "<br>" );
+                w.write( SurfacingConstants.NL );
+                w.write( "<hr>" );
+                w.write( SurfacingConstants.NL );
+                w.write( "<br>" );
+                w.write( SurfacingConstants.NL );
+                w.write( "<table>" );
+                w.write( SurfacingConstants.NL );
+                break;
+        }
+        w.write( SurfacingConstants.NL );
+        for( final DomainSimilarity similarity : similarities ) {
+            if ( ( species_order != null ) && !species_order.isEmpty() ) {
+                ( ( PrintableDomainSimilarity ) similarity ).setSpeciesOrder( species_order );
+            }
+            w.write( similarity.toStringBuffer( print_option ).toString() );
+            w.write( SurfacingConstants.NL );
+        }
+        switch ( print_option ) {
+            case HTML:
+                w.write( SurfacingConstants.NL );
+                w.write( "</table>" );
+                w.write( SurfacingConstants.NL );
+                w.write( "</font>" );
+                w.write( SurfacingConstants.NL );
+                w.write( "</body>" );
+                w.write( SurfacingConstants.NL );
+                w.write( "</html>" );
+                w.write( SurfacingConstants.NL );
+                break;
+        }
+        w.flush();
+        w.close();
+        return stats;
+    }
+
+    private static void writeDomainsToIndividualFilePerTreeNode( final Writer individual_files_writer,
+                                                                 final String domain_0,
+                                                                 final String domain_1 ) throws IOException {
+        individual_files_writer.write( domain_0 );
+        individual_files_writer.write( ForesterUtil.LINE_SEPARATOR );
+        if ( !ForesterUtil.isEmpty( domain_1 ) ) {
+            individual_files_writer.write( domain_1 );
+            individual_files_writer.write( ForesterUtil.LINE_SEPARATOR );
+        }
+    }
+
+    public static void writeMatrixToFile( final CharacterStateMatrix<?> matrix,
+                                          final String filename,
+                                          final Format format ) {
+        final File outfile = new File( filename );
+        checkForOutputFileWriteability( outfile );
+        try {
+            final BufferedWriter out = new BufferedWriter( new FileWriter( outfile ) );
+            matrix.toWriter( out, format );
+            out.flush();
+            out.close();
+        }
+        catch ( final IOException e ) {
+            ForesterUtil.fatalError( surfacing_old.PRG_NAME, e.getMessage() );
+        }
+        ForesterUtil.programMessage( surfacing_old.PRG_NAME, "Wrote matrix: \"" + filename + "\"" );
+    }
+
+    public static void writeMatrixToFile( final File matrix_outfile, final List<DistanceMatrix> matrices ) {
+        checkForOutputFileWriteability( matrix_outfile );
+        try {
+            final BufferedWriter out = new BufferedWriter( new FileWriter( matrix_outfile ) );
+            for( final DistanceMatrix distance_matrix : matrices ) {
+                out.write( distance_matrix.toStringBuffer( DistanceMatrix.Format.PHYLIP ).toString() );
+                out.write( ForesterUtil.LINE_SEPARATOR );
+                out.flush();
+            }
+            out.close();
+        }
+        catch ( final IOException e ) {
+            ForesterUtil.fatalError( surfacing_old.PRG_NAME, e.getMessage() );
+        }
+        ForesterUtil.programMessage( surfacing_old.PRG_NAME, "Wrote distance matrices to \"" + matrix_outfile + "\"" );
+    }
+
+    private static void writePfamsToFile( final String outfile_name, final SortedSet<String> pfams ) {
+        try {
+            final Writer writer = new BufferedWriter( new FileWriter( new File( outfile_name ) ) );
+            for( final String pfam : pfams ) {
+                writer.write( pfam );
+                writer.write( ForesterUtil.LINE_SEPARATOR );
+            }
+            writer.close();
+            ForesterUtil.programMessage( surfacing_old.PRG_NAME, "Wrote " + pfams.size() + " pfams to [" + outfile_name
+                    + "]" );
+        }
+        catch ( final IOException e ) {
+            ForesterUtil.printWarningMessage( surfacing_old.PRG_NAME, "Failure to write: " + e );
+        }
+    }
+
+    public static void writePhylogenyToFile( final Phylogeny phylogeny, final String filename ) {
+        final PhylogenyWriter writer = new PhylogenyWriter();
+        try {
+            writer.toPhyloXML( new File( filename ), phylogeny, 1 );
+        }
+        catch ( final IOException e ) {
+            ForesterUtil.printWarningMessage( surfacing_old.PRG_NAME, "failed to write phylogeny to \"" + filename
+                    + "\": " + e );
+        }
+        ForesterUtil.programMessage( surfacing_old.PRG_NAME, "Wrote phylogeny to \"" + filename + "\"" );
+    }
+
+    public static void writeTaxonomyLinks( final Writer writer, final String species ) throws IOException {
+        if ( ( species.length() > 1 ) && ( species.indexOf( '_' ) < 1 ) ) {
+            final Matcher matcher = PATTERN_SP_STYLE_TAXONOMY.matcher( species );
+            writer.write( " [" );
+            if ( matcher.matches() ) {
+                writer.write( "<a href=\"" + SurfacingConstants.UNIPROT_LINK + species
+                        + "\" target=\"taxonomy_window\">uniprot</a>" );
+            }
+            else {
+                writer.write( "<a href=\"" + SurfacingConstants.EOL_LINK + species
+                        + "\" target=\"taxonomy_window\">eol</a>" );
+                writer.write( "|" );
+                writer.write( "<a href=\"" + SurfacingConstants.TOL_LINK + species
+                        + "\" target=\"taxonomy_window\">tol</a>" );
+            }
+            writer.write( "]" );
+        }
+    }
+
+    public static void writeTaxonomyLinksORIG( final Writer writer, final String species ) throws IOException {
+        if ( ( species.length() > 1 ) && ( species.indexOf( '_' ) < 1 ) ) {
+            final Matcher matcher = PATTERN_SP_STYLE_TAXONOMY.matcher( species );
+            writer.write( " [" );
+            if ( matcher.matches() ) {
+                writer.write( "<a href=\"" + SurfacingConstants.UNIPROT_LINK + species
+                        + "\" target=\"taxonomy_window\">uniprot</a>" );
+            }
+            else {
+                writer.write( "<a href=\"" + SurfacingConstants.EOL_LINK + species
+                        + "\" target=\"taxonomy_window\">eol</a>" );
+                writer.write( "|" );
+                writer.write( "<a href=\"" + SurfacingConstants.TOL_LINK + species
+                        + "\" target=\"taxonomy_window\">tol</a>" );
+                writer.write( "|" );
+                writer.write( "<a href=\"" + SurfacingConstants.WIKIPEDIA_LINK + species
+                        + "\" target=\"taxonomy_window\">wikipedia</a>" );
+                writer.write( "|" );
+                writer.write( "<a href=\"" + SurfacingConstants.GOOGLE_SCHOLAR_LINK + species
+                        + "\" target=\"taxonomy_window\">gs</a>" );
+            }
+            writer.write( "]" );
+        }
+    }
+
+    private static void writeToNexus( final String outfile_name, final CharacterStateMatrix<BinaryStates> matrix ) {
+        if ( !( matrix instanceof BasicCharacterStateMatrix ) ) {
+            throw new IllegalArgumentException( "can only write matrices of type [" + BasicCharacterStateMatrix.class
+                    + "] to nexus" );
+        }
+        final BasicCharacterStateMatrix<BinaryStates> my_matrix = ( org.forester.evoinference.matrix.character.BasicCharacterStateMatrix<BinaryStates> ) matrix;
+        try {
+            final BufferedWriter w = new BufferedWriter( new FileWriter( outfile_name ) );
+            w.write( NexusConstants.NEXUS );
+            w.write( ForesterUtil.LINE_SEPARATOR );
+            my_matrix.writeNexusTaxaBlock( w );
+            my_matrix.writeNexusBinaryChractersBlock( w );
+            w.flush();
+            w.close();
+            ForesterUtil.programMessage( surfacing_old.PRG_NAME, "Wrote Nexus file: \"" + outfile_name + "\"" );
+        }
+        catch ( final IOException e ) {
+            ForesterUtil.fatalError( surfacing_old.PRG_NAME, e.getMessage() );
+        }
+    }
+
+    private static void writeToNexus( final String outfile_name,
+                                      final CharacterStateMatrix<BinaryStates> matrix,
+                                      final Phylogeny phylogeny ) {
+        if ( !( matrix instanceof BasicCharacterStateMatrix ) ) {
+            throw new IllegalArgumentException( "can only write matrices of type [" + BasicCharacterStateMatrix.class
+                    + "] to nexus" );
+        }
+        final BasicCharacterStateMatrix<BinaryStates> my_matrix = ( org.forester.evoinference.matrix.character.BasicCharacterStateMatrix<BinaryStates> ) matrix;
+        final List<Phylogeny> phylogenies = new ArrayList<Phylogeny>( 1 );
+        phylogenies.add( phylogeny );
+        try {
+            final BufferedWriter w = new BufferedWriter( new FileWriter( outfile_name ) );
+            w.write( NexusConstants.NEXUS );
+            w.write( ForesterUtil.LINE_SEPARATOR );
+            my_matrix.writeNexusTaxaBlock( w );
+            my_matrix.writeNexusBinaryChractersBlock( w );
+            PhylogenyWriter.writeNexusTreesBlock( w, phylogenies );
+            w.flush();
+            w.close();
+            ForesterUtil.programMessage( surfacing_old.PRG_NAME, "Wrote Nexus file: \"" + outfile_name + "\"" );
+        }
+        catch ( final IOException e ) {
+            ForesterUtil.fatalError( surfacing_old.PRG_NAME, e.getMessage() );
+        }
+    }
+
+    private static void writeToNexus( final String outfile_name, final DomainParsimonyCalculator domain_parsimony ) {
+        writeToNexus( outfile_name + surfacing_old.NEXUS_EXTERNAL_DOMAINS, domain_parsimony
+                .createMatrixOfDomainPresenceOrAbsence() );
+        writeToNexus( outfile_name + surfacing_old.NEXUS_EXTERNAL_DOMAIN_COMBINATIONS, domain_parsimony
+                .createMatrixOfBinaryDomainCombinationPresenceOrAbsence() );
+    }
+
+    private static void writeToNexus( final String outfile_name,
+                                      final DomainParsimonyCalculator domain_parsimony,
+                                      final Phylogeny phylogeny ) {
+        writeToNexus( outfile_name + surfacing_old.NEXUS_EXTERNAL_DOMAINS, domain_parsimony
+                .createMatrixOfDomainPresenceOrAbsence(), phylogeny );
+        writeToNexus( outfile_name + surfacing_old.NEXUS_EXTERNAL_DOMAIN_COMBINATIONS, domain_parsimony
+                .createMatrixOfBinaryDomainCombinationPresenceOrAbsence(), phylogeny );
+    }
+}
diff --git a/forester/java/src/org/forester/surfacing/TestSurfacing.java b/forester/java/src/org/forester/surfacing/TestSurfacing.java

new file mode 100644 (file)

index 0000000..678259e
--- /dev/null
+++ b/forester/java/src/org/forester/surfacing/TestSurfacing.java
@@ -0,0 +1,6277 @@
+// $Id:
+//
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.surfacing;
+
+import java.io.File;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.SortedSet;
+import java.util.TreeSet;
+
+import org.forester.evoinference.matrix.character.BasicCharacterStateMatrix;
+import org.forester.evoinference.matrix.character.CharacterStateMatrix;
+import org.forester.evoinference.matrix.character.CharacterStateMatrix.BinaryStates;
+import org.forester.evoinference.matrix.character.CharacterStateMatrix.GainLossStates;
+import org.forester.io.parsers.HmmPfamOutputParser;
+import org.forester.io.parsers.nexus.PaupLogParser;
+import org.forester.io.parsers.nhx.NHXParser;
+import org.forester.phylogeny.Phylogeny;
+import org.forester.phylogeny.PhylogenyNode;
+import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
+import org.forester.phylogeny.factories.PhylogenyFactory;
+import org.forester.surfacing.BinaryDomainCombination.DomainCombinationType;
+import org.forester.test.Test;
+import org.forester.util.ForesterUtil;
+
+@SuppressWarnings( "unused")
+public class TestSurfacing {
+
+    private final static double ZERO_DIFF = 1.0E-9;
+
+    public static boolean isEqual( final double a, final double b ) {
+        return ( ( Math.abs( a - b ) ) < TestSurfacing.ZERO_DIFF );
+    }
+
+    private static StringBuffer mapToStringBuffer( final Map<PhylogenyNode, CharacterStateMatrix.BinaryStates> map ) {
+        final StringBuffer sb = new StringBuffer();
+        for( final PhylogenyNode key : map.keySet() ) {
+            if ( !key.isExternal() ) {
+                sb.append( key.getName() );
+                sb.append( " : " );
+                sb.append( map.get( key ).toString() );
+                sb.append( ForesterUtil.getLineSeparator() );
+            }
+        }
+        return sb;
+    }
+
+    public static boolean test( final File test_dir ) {
+        System.out.print( "  Domain id: " );
+        if ( !TestSurfacing.testDomainId() ) {
+            System.out.println( "failed." );
+            return false;
+        }
+        System.out.println( "OK." );
+        System.out.print( "  Protein id: " );
+        if ( !TestSurfacing.testProteinId() ) {
+            System.out.println( "failed." );
+            return false;
+        }
+        System.out.println( "OK." );
+        System.out.print( "  Species: " );
+        if ( !TestSurfacing.testSpecies() ) {
+            System.out.println( "failed." );
+            return false;
+        }
+        System.out.println( "OK." );
+        System.out.print( "  Basic domain: " );
+        if ( !TestSurfacing.testBasicDomain() ) {
+            System.out.println( "failed." );
+            return false;
+        }
+        System.out.println( "OK." );
+        System.out.print( "  Basic protein: " );
+        if ( !TestSurfacing.testBasicProtein() ) {
+            System.out.println( "failed." );
+            return false;
+        }
+        System.out.println( "OK." );
+        System.out.print( "  Combinable domains: " );
+        if ( !TestSurfacing.testCombinableDomains() ) {
+            System.out.println( "failed." );
+            return false;
+        }
+        System.out.println( "OK." );
+        System.out.print( "  Directed combinable domains: " );
+        if ( !TestSurfacing.testDirectedCombinableDomains() ) {
+            System.out.println( "failed." );
+            return false;
+        }
+        System.out.println( "OK." );
+        System.out.print( "  Genome wide specific combinable domains: " );
+        if ( !TestSurfacing.testGenomeWideCombinableDomains() ) {
+            System.out.println( "failed." );
+            return false;
+        }
+        System.out.println( "OK." );
+        System.out.print( "  Domain architecture based genome similarity calculator: " );
+        if ( !TestSurfacing.testDomainArchitectureBasedGenomeSimilarityCalculator() ) {
+            System.out.println( "failed." );
+            return false;
+        }
+        System.out.println( "OK." );
+        System.out.print( "  Hmmpfam output parser: " );
+        if ( !TestSurfacing.testHmmPfamOutputParser( test_dir ) ) {
+            System.out.println( "failed." );
+            return false;
+        }
+        System.out.println( "OK." );
+        System.out.print( "  Hmmpfam output parser with filter: " );
+        if ( !TestSurfacing.testHmmPfamOutputParserWithFilter( test_dir ) ) {
+            System.out.println( "failed." );
+            return false;
+        }
+        System.out.println( "OK." );
+        System.out.print( "  Combinations based pairwise similarity calculator: " );
+        if ( !TestSurfacing.testCombinationsBasedPairwiseSimilarityCalculator() ) {
+            System.out.println( "failed." );
+            return false;
+        }
+        System.out.println( "OK." );
+        System.out.print( "  Copy number based pairwise similarity calculator: " );
+        if ( !TestSurfacing.testCopyNumberBasedPairwiseSimilarityCalculator() ) {
+            return false;
+        }
+        System.out.println( "OK." );
+        System.out.print( "  Domain combination counting: " );
+        if ( !TestSurfacing.testDomainCombinationCounting( test_dir ) ) {
+            System.out.println( "failed." );
+            return false;
+        }
+        System.out.println( "OK." );
+        System.out.print( "  Basic domain similarity calculator: " );
+        if ( !TestSurfacing.testBasicDomainSimilarityCalculator() ) {
+            System.out.println( "failed." );
+            return false;
+        }
+        System.out.println( "OK." );
+        System.out.print( "  Basic domain similarity calculator not ignoring species specific domains: " );
+        if ( !TestSurfacing.testBasicDomainSimilarityCalculatorNotIgnoringSpeciesSpeficDomains() ) {
+            System.out.println( "failed." );
+            return false;
+        }
+        System.out.println( "OK." );
+        System.out.print( "  Basic domain similarity calculator removal of singles: " );
+        if ( !TestSurfacing.testBasicDomainSimilarityCalculatorRemovalOfSingles() ) {
+            System.out.println( "failed." );
+            return false;
+        }
+        System.out.println( "OK." );
+        System.out.print( "  Domain sorting: " );
+        if ( !TestSurfacing.testDomainSorting() ) {
+            System.out.println( "failed." );
+            return false;
+        }
+        System.out.println( "OK." );
+        System.out.print( "  Overlap removal: " );
+        if ( !TestSurfacing.testOverlapRemoval() ) {
+            System.out.println( "failed." );
+            return false;
+        }
+        System.out.println( "OK." );
+        System.out.print( "  Engulfing overlap removal: " );
+        if ( !TestSurfacing.testEngulfingOverlapRemoval() ) {
+            System.out.println( "failed." );
+            return false;
+        }
+        System.out.println( "OK." );
+        System.out.print( "  Binary domain combination: " );
+        if ( !TestSurfacing.testBinaryDomainCombination() ) {
+            System.out.println( "failed." );
+            return false;
+        }
+        System.out.println( "OK." );
+        System.out.print( "  Parsimony: " );
+        if ( !TestSurfacing.testParsimony() ) {
+            System.out.println( "failed." );
+            return false;
+        }
+        System.out.println( "OK." );
+        System.out.print( "  Directedness: " );
+        if ( !TestSurfacing.testDirectedness() ) {
+            System.out.println( "failed." );
+            return false;
+        }
+        System.out.println( "OK." );
+        System.out.print( "  Directedness and adjacency: " );
+        if ( !TestSurfacing.testDirectednessAndAdjacency() ) {
+            System.out.println( "failed." );
+            return false;
+        }
+        System.out.println( "OK." );
+        System.out.print( "  Dollo parsimony on secodary features: " );
+        if ( !TestSurfacing.testParsimonyOnSecondaryFeatures() ) {
+            System.out.println( "failed." );
+            return false;
+        }
+        System.out.println( "OK." );
+        System.out.print( "  Paup log parser: " );
+        if ( !TestSurfacing.testPaupLogParser( test_dir ) ) {
+            System.out.println( "failed." );
+            return false;
+        }
+        System.out.println( "OK." );
+        System.out.print( "  Binary state matrix to gain loss matrix: " );
+        if ( !TestSurfacing.testBinaryStateMatrixToGainLossMatrix( test_dir ) ) {
+            System.out.println( "failed." );
+            return false;
+        }
+        System.out.println( "OK." );
+        return true;
+    }
+
+    private static boolean testBasicDomain() {
+        try {
+            final Domain pd = new BasicDomain( "id", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 );
+            if ( !pd.getDomainId().getId().equals( "id" ) ) {
+                return false;
+            }
+            if ( pd.getNumber() != 1 ) {
+                return false;
+            }
+            if ( pd.getTotalCount() != 4 ) {
+                return false;
+            }
+            if ( !pd.equals( new BasicDomain( "id", 22, 111, ( short ) 1, ( short ) 4, 0.2, -12 ) ) ) {
+                return false;
+            }
+            final Domain a1 = new BasicDomain( "a", 1, 10, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final BasicDomain a1_copy = new BasicDomain( "a", 1, 10, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final BasicDomain a1_equal = new BasicDomain( "a", 524, 743994, ( short ) 1, ( short ) 300, 3.0005, 230 );
+            final BasicDomain a2 = new BasicDomain( "a", 1, 10, ( short ) 2, ( short ) 4, 0.1, -12 );
+            final BasicDomain a3 = new BasicDomain( "A", 1, 10, ( short ) 1, ( short ) 4, 0.1, -12 );
+            if ( !a1.equals( a1 ) ) {
+                return false;
+            }
+            if ( !a1.equals( a1_copy ) ) {
+                return false;
+            }
+            if ( !a1.equals( a1_equal ) ) {
+                return false;
+            }
+            if ( !a1.equals( a2 ) ) {
+                return false;
+            }
+            if ( a1.equals( a3 ) ) {
+                return false;
+            }
+            if ( a1.compareTo( a1 ) != 0 ) {
+                return false;
+            }
+            if ( a1.compareTo( a1_copy ) != 0 ) {
+                return false;
+            }
+            if ( a1.compareTo( a1_equal ) != 0 ) {
+                return false;
+            }
+            if ( a1.compareTo( a2 ) != 0 ) {
+                return false;
+            }
+            if ( a1.compareTo( a3 ) != 0 ) {
+                return false;
+            }
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace( System.out );
+            return false;
+        }
+        return true;
+    }
+
+    private static boolean testBasicDomainSimilarityCalculator() {
+        // mouse : ABCDE
+        // rabbit: A.C.EF
+        // ciona : A....FGX
+        // nemve : ABCDEFG
+        //
+        // domain A:
+        // m r c n
+        // m 2/(2+3) 0 4/(4+2)
+        // r 1/(1+4) 3/(3+3)
+        // c 2/(2+5)
+        // n
+        //
+        // mean = ( 2/5 + 0 + 2/3 + 1/5 + 1/2 + 2/7 ) / 6
+        // min = 0.0
+        // max = 2/3
+        // n = 6
+        //
+        //
+        // domain B:
+        // m n
+        // m 4/(4+2)
+        // n
+        //
+        // mean = 2/3
+        // min = 2/3
+        // max = 2/3
+        // sd = 0.0
+        // n = 1
+        //
+        //
+        // domain C:
+        // m r n
+        // m - 2/(2+3) 4/(4+2)
+        // r - - 3/(3+3)
+        // n - - -
+        //
+        // mean = (2/5 + 2/3 + 1/2)/3 =
+        // min = 2/5
+        // max = 2/3
+        // sd = 0.0
+        // n = 3
+        try {
+            final Domain A = new BasicDomain( "A", 1, 2, ( short ) 1, ( short ) 1, 0.15, -12 );
+            final Domain B = new BasicDomain( "B", 1, 2, ( short ) 1, ( short ) 1, 0.2, -12 );
+            final Domain C = new BasicDomain( "C", 1, 2, ( short ) 1, ( short ) 1, 0.3, -12 );
+            final Domain D = new BasicDomain( "D", 1, 2, ( short ) 1, ( short ) 1, 0.5, -12 );
+            final Domain E = new BasicDomain( "E", 1, 2, ( short ) 1, ( short ) 1, 0.5, -12 );
+            final Domain F = new BasicDomain( "F", 1, 2, ( short ) 1, ( short ) 1, 0.01, -12 );
+            final Domain G = new BasicDomain( "G", 1, 2, ( short ) 1, ( short ) 1, 0.001, -12 );
+            final Domain X = new BasicDomain( "X", 1, 2, ( short ) 1, ( short ) 1, 0.0001, -12 );
+            if ( !TestSurfacing.isEqual( X.getPerSequenceScore(), -12 ) ) {
+                return false;
+            }
+            final Protein mouse_1 = new BasicProtein( "1", "mouse" );
+            final Protein rabbit_1 = new BasicProtein( "1", "rabbit" );
+            final Protein ciona_1 = new BasicProtein( "1", "ciona" );
+            final Protein nemve_1 = new BasicProtein( "1", "nemve" );
+            mouse_1.addProteinDomain( A );
+            mouse_1.addProteinDomain( B );
+            mouse_1.addProteinDomain( C );
+            mouse_1.addProteinDomain( D );
+            mouse_1.addProteinDomain( E );
+            rabbit_1.addProteinDomain( A );
+            rabbit_1.addProteinDomain( C );
+            rabbit_1.addProteinDomain( E );
+            rabbit_1.addProteinDomain( F );
+            rabbit_1.addProteinDomain( F );
+            rabbit_1.addProteinDomain( F );
+            rabbit_1.addProteinDomain( F );
+            rabbit_1.addProteinDomain( F );
+            rabbit_1.addProteinDomain( F );
+            ciona_1.addProteinDomain( A );
+            ciona_1.addProteinDomain( A );
+            ciona_1.addProteinDomain( A );
+            ciona_1.addProteinDomain( A );
+            ciona_1.addProteinDomain( A );
+            ciona_1.addProteinDomain( F );
+            ciona_1.addProteinDomain( G );
+            ciona_1.addProteinDomain( X );
+            nemve_1.addProteinDomain( A );
+            nemve_1.addProteinDomain( B );
+            nemve_1.addProteinDomain( C );
+            nemve_1.addProteinDomain( D );
+            nemve_1.addProteinDomain( E );
+            nemve_1.addProteinDomain( F );
+            nemve_1.addProteinDomain( G );
+            final List<Protein> protein_list_mouse = new ArrayList<Protein>();
+            final List<Protein> protein_list_rabbit = new ArrayList<Protein>();
+            final List<Protein> protein_list_ciona = new ArrayList<Protein>();
+            final List<Protein> protein_list_nemve = new ArrayList<Protein>();
+            protein_list_mouse.add( mouse_1 );
+            protein_list_rabbit.add( rabbit_1 );
+            protein_list_ciona.add( ciona_1 );
+            protein_list_nemve.add( nemve_1 );
+            final List<GenomeWideCombinableDomains> cdc_list = new ArrayList<GenomeWideCombinableDomains>();
+            cdc_list.add( BasicGenomeWideCombinableDomains.createInstance( protein_list_mouse,
+                                                                           true,
+                                                                           new BasicSpecies( "mouse" ) ) );
+            cdc_list.add( BasicGenomeWideCombinableDomains.createInstance( protein_list_rabbit,
+                                                                           true,
+                                                                           new BasicSpecies( "rabbit" ) ) );
+            cdc_list.add( BasicGenomeWideCombinableDomains.createInstance( protein_list_ciona,
+                                                                           true,
+                                                                           new BasicSpecies( "ciona" ) ) );
+            cdc_list.add( BasicGenomeWideCombinableDomains.createInstance( protein_list_nemve,
+                                                                           true,
+                                                                           new BasicSpecies( "nemve" ) ) );
+            final DomainSimilarityCalculator calc = new BasicDomainSimilarityCalculator( DomainSimilarity.DomainSimilaritySortField.DOMAIN_ID,
+                                                                                         false,
+                                                                                         false );
+            final SortedSet<DomainSimilarity> sims = calc
+                    .calculateSimilarities( new CombinationsBasedPairwiseDomainSimilarityCalculator(),
+                                            cdc_list,
+                                            true,
+                                            true );
+            final Iterator<DomainSimilarity> sims_it = sims.iterator();
+            final DomainSimilarity sa = sims_it.next();
+            if ( !sa.getDomainId().getId().equals( "A" ) ) {
+                return false;
+            }
+            if ( sa.getSpeciesData().size() != 4 ) {
+                return false;
+            }
+            if ( !sa.getSpecies().contains( new BasicSpecies( "ciona" ) ) ) {
+                return false;
+            }
+            if ( !sa.getSpecies().contains( new BasicSpecies( "mouse" ) ) ) {
+                return false;
+            }
+            if ( !sa.getSpecies().contains( new BasicSpecies( "nemve" ) ) ) {
+                return false;
+            }
+            if ( !sa.getSpecies().contains( new BasicSpecies( "rabbit" ) ) ) {
+                return false;
+            }
+            if ( !TestSurfacing.isEqual( sa.getMeanSimilarityScore(),
+                                         ( 2.0 / 5 + 0 + 2.0 / 3 + 1.0 / 5 + 1.0 / 2 + 2.0 / 7 ) / 6 ) ) {
+                return false;
+            }
+            if ( !TestSurfacing.isEqual( sa.getStandardDeviationOfSimilarityScore(), ( 0.23410788192183737 ) ) ) {
+                return false;
+            }
+            if ( !TestSurfacing.isEqual( sa.getMaximalSimilarityScore(), ( 2.0 / 3 ) ) ) {
+                return false;
+            }
+            if ( !TestSurfacing.isEqual( sa.getMinimalSimilarityScore(), ( 0.0 ) ) ) {
+                return false;
+            }
+            if ( sa.getN() != 6 ) {
+                return false;
+            }
+            if ( sa.getMaximalDifference() != 7 ) {
+                return false;
+            }
+            if ( sa.getMaximalDifferenceInCounts() != 3 ) {
+                return false;
+            }
+            final DomainSimilarity sb = sims_it.next();
+            if ( !sb.getDomainId().getId().equals( "B" ) ) {
+                return false;
+            }
+            if ( sb.getSpeciesData().size() != 2 ) {
+                return false;
+            }
+            if ( !sb.getSpecies().contains( new BasicSpecies( "mouse" ) ) ) {
+                return false;
+            }
+            if ( !sb.getSpecies().contains( new BasicSpecies( "nemve" ) ) ) {
+                return false;
+            }
+            if ( !TestSurfacing.isEqual( sb.getMeanSimilarityScore(), 2.0 / 3 ) ) {
+                return false;
+            }
+            if ( !TestSurfacing.isEqual( sb.getStandardDeviationOfSimilarityScore(), 0.0 ) ) {
+                return false;
+            }
+            if ( !TestSurfacing.isEqual( sb.getMaximalSimilarityScore(), ( 2.0 / 3 ) ) ) {
+                return false;
+            }
+            if ( !TestSurfacing.isEqual( sb.getMinimalSimilarityScore(), ( 2.0 / 3 ) ) ) {
+                return false;
+            }
+            if ( sb.getN() != 1 ) {
+                return false;
+            }
+            if ( sb.getMaximalDifference() != 2 ) {
+                return false;
+            }
+            if ( sb.getMaximalDifferenceInCounts() != 2 ) {
+                return false;
+            }
+            final DomainSimilarity sc = sims_it.next();
+            if ( !sc.getDomainId().getId().equals( "C" ) ) {
+                return false;
+            }
+            if ( sc.getSpeciesData().size() != 3 ) {
+                return false;
+            }
+            if ( !sc.getSpecies().contains( new BasicSpecies( "mouse" ) ) ) {
+                return false;
+            }
+            if ( !sc.getSpecies().contains( new BasicSpecies( "rabbit" ) ) ) {
+                return false;
+            }
+            if ( !sc.getSpecies().contains( new BasicSpecies( "nemve" ) ) ) {
+                return false;
+            }
+            if ( !TestSurfacing.isEqual( sc.getMeanSimilarityScore(), ( 2.0 / 5 + 2.0 / 3 + 1.0 / 2 ) / 3 ) ) {
+                return false;
+            }
+            if ( !TestSurfacing.isEqual( sc.getStandardDeviationOfSimilarityScore(), 0.13471506281091264 ) ) {
+                return false;
+            }
+            if ( !TestSurfacing.isEqual( sc.getMaximalSimilarityScore(), ( 2.0 / 3 ) ) ) {
+                return false;
+            }
+            if ( !TestSurfacing.isEqual( sc.getMinimalSimilarityScore(), ( 2.0 / 5 ) ) ) {
+                return false;
+            }
+            if ( sc.getN() != 3 ) {
+                return false;
+            }
+            if ( sc.getMaximalDifference() != 3 ) {
+                return false;
+            }
+            if ( sc.getMaximalDifferenceInCounts() != 3 ) {
+                return false;
+            }
+            // mouse : ....ABCDE.....
+            // rabbit: ....A.C.EFFF..
+            // ciona : AAAAA......FGX
+            // nemve : ....ABCDEFG...
+            //
+            // domain A:
+            // m r c n
+            // m 2/(2+3) 0 4/(4+2)
+            // r - 1/(1+5) 3/(3+3)
+            // c - 2/(2+6)
+            // n
+            //
+            // mean = ( 2/5 + 0 + 2/3 + 1/6 + 1/2 + 2/8 ) / 6
+            // min = 0.0
+            // max = 2/3
+            // n = 6
+            final List<GenomeWideCombinableDomains> cdc_list2 = new ArrayList<GenomeWideCombinableDomains>();
+            cdc_list2.add( BasicGenomeWideCombinableDomains.createInstance( protein_list_mouse,
+                                                                            false,
+                                                                            new BasicSpecies( "mouse" ) ) );
+            cdc_list2.add( BasicGenomeWideCombinableDomains.createInstance( protein_list_rabbit,
+                                                                            false,
+                                                                            new BasicSpecies( "rabbit" ) ) );
+            cdc_list2.add( BasicGenomeWideCombinableDomains.createInstance( protein_list_ciona,
+                                                                            false,
+                                                                            new BasicSpecies( "ciona" ) ) );
+            cdc_list2.add( BasicGenomeWideCombinableDomains.createInstance( protein_list_nemve,
+                                                                            false,
+                                                                            new BasicSpecies( "nemve" ) ) );
+            final DomainSimilarityCalculator calc2 = new BasicDomainSimilarityCalculator( DomainSimilarity.DomainSimilaritySortField.DOMAIN_ID,
+                                                                                          false,
+                                                                                          false );
+            final SortedSet<DomainSimilarity> sims2 = calc2
+                    .calculateSimilarities( new CombinationsBasedPairwiseDomainSimilarityCalculator(),
+                                            cdc_list2,
+                                            false,
+                                            true );
+            final Iterator<DomainSimilarity> sims_it2 = sims2.iterator();
+            final DomainSimilarity sa2 = sims_it2.next();
+            if ( !sa2.getDomainId().getId().equals( "A" ) ) {
+                return false;
+            }
+            if ( sa2.getSpeciesData().size() != 4 ) {
+                return false;
+            }
+            if ( !sa2.getSpecies().contains( new BasicSpecies( "ciona" ) ) ) {
+                return false;
+            }
+            if ( !sa2.getSpecies().contains( new BasicSpecies( "mouse" ) ) ) {
+                return false;
+            }
+            if ( !sa2.getSpecies().contains( new BasicSpecies( "nemve" ) ) ) {
+                return false;
+            }
+            if ( !sa2.getSpeciesData().keySet().contains( new BasicSpecies( "rabbit" ) ) ) {
+                return false;
+            }
+            if ( !TestSurfacing.isEqual( sa2.getMeanSimilarityScore(),
+                                         ( 2.0 / 5 + 0 + 2.0 / 3 + 1.0 / 6 + 1.0 / 2 + 2.0 / 8 ) / 6 ) ) {
+                return false;
+            }
+            if ( !TestSurfacing.isEqual( sa2.getStandardDeviationOfSimilarityScore(), ( 0.2404663678647683 ) ) ) {
+                return false;
+            }
+            if ( !TestSurfacing.isEqual( sa2.getMaximalSimilarityScore(), ( 2.0 / 3 ) ) ) {
+                return false;
+            }
+            if ( !TestSurfacing.isEqual( sa2.getMinimalSimilarityScore(), ( 0.0 ) ) ) {
+                return false;
+            }
+            if ( sa2.getN() != 6 ) {
+                return false;
+            }
+            if ( sa2.getMaximalDifference() != 8 ) {
+                return false;
+            }
+            if ( sa2.getMaximalDifferenceInCounts() != 3 ) {
+                return false;
+            }
+            final Protein ciona_2 = new BasicProtein( "2", "ciona" );
+            ciona_2.addProteinDomain( A );
+            ciona_2.addProteinDomain( A );
+            ciona_2.addProteinDomain( A );
+            ciona_2.addProteinDomain( B );
+            ciona_2.addProteinDomain( B );
+            ciona_2.addProteinDomain( B );
+            ciona_2.addProteinDomain( F );
+            ciona_2.addProteinDomain( F );
+            ciona_2.addProteinDomain( F );
+            ciona_2.addProteinDomain( F );
+            ciona_2.addProteinDomain( G );
+            ciona_2.addProteinDomain( X );
+            final Protein ciona_3 = new BasicProtein( "3", "ciona" );
+            ciona_3.addProteinDomain( A );
+            ciona_3.addProteinDomain( A );
+            ciona_3.addProteinDomain( A );
+            ciona_3.addProteinDomain( A );
+            ciona_3.addProteinDomain( B );
+            ciona_3.addProteinDomain( B );
+            ciona_3.addProteinDomain( X );
+            ciona_3.addProteinDomain( X );
+            protein_list_ciona.add( ciona_2 );
+            protein_list_ciona.add( ciona_3 );
+            final List<GenomeWideCombinableDomains> cdc_list3 = new ArrayList<GenomeWideCombinableDomains>();
+            cdc_list3.add( BasicGenomeWideCombinableDomains.createInstance( protein_list_mouse,
+                                                                            true,
+                                                                            new BasicSpecies( "mouse" ) ) );
+            cdc_list3.add( BasicGenomeWideCombinableDomains.createInstance( protein_list_rabbit,
+                                                                            true,
+                                                                            new BasicSpecies( "rabbit" ) ) );
+            cdc_list3.add( BasicGenomeWideCombinableDomains.createInstance( protein_list_ciona,
+                                                                            true,
+                                                                            new BasicSpecies( "ciona" ) ) );
+            cdc_list3.add( BasicGenomeWideCombinableDomains.createInstance( protein_list_nemve,
+                                                                            true,
+                                                                            new BasicSpecies( "nemve" ) ) );
+            final DomainSimilarityCalculator calc3 = new BasicDomainSimilarityCalculator( DomainSimilarity.DomainSimilaritySortField.DOMAIN_ID,
+                                                                                          false,
+                                                                                          false );
+            final SortedSet<DomainSimilarity> sims3 = calc3
+                    .calculateSimilarities( new CombinationsBasedPairwiseDomainSimilarityCalculator(),
+                                            cdc_list3,
+                                            false,
+                                            true );
+            final Iterator<DomainSimilarity> sims_it3 = sims3.iterator();
+            final DomainSimilarity sa3 = sims_it3.next();
+            if ( !sa3.getDomainId().getId().equals( "A" ) ) {
+                return false;
+            }
+            final SpeciesSpecificDomainSimilariyData ssdsd = sa3.getSpeciesData().get( new BasicSpecies( "ciona" ) );
+            if ( ssdsd.getCombinableDomainIdToCountsMap().size() != 4 ) {
+                return false;
+            }
+            if ( ssdsd.getNumberOfProteinsExhibitingCombinationWith( new DomainId( "B" ) ) != 2 ) {
+                return false;
+            }
+            if ( ssdsd.getNumberOfProteinsExhibitingCombinationWith( new DomainId( "F" ) ) != 2 ) {
+                return false;
+            }
+            if ( ssdsd.getNumberOfProteinsExhibitingCombinationWith( new DomainId( "G" ) ) != 2 ) {
+                return false;
+            }
+            if ( ssdsd.getNumberOfProteinsExhibitingCombinationWith( new DomainId( "X" ) ) != 3 ) {
+                return false;
+            }
+            final List<GenomeWideCombinableDomains> cdc_list4 = new ArrayList<GenomeWideCombinableDomains>();
+            cdc_list4.add( BasicGenomeWideCombinableDomains.createInstance( protein_list_mouse,
+                                                                            false,
+                                                                            new BasicSpecies( "mouse" ) ) );
+            cdc_list4.add( BasicGenomeWideCombinableDomains.createInstance( protein_list_rabbit,
+                                                                            false,
+                                                                            new BasicSpecies( "rabbit" ) ) );
+            cdc_list4.add( BasicGenomeWideCombinableDomains.createInstance( protein_list_ciona,
+                                                                            false,
+                                                                            new BasicSpecies( "ciona" ) ) );
+            ;
+            cdc_list4.add( BasicGenomeWideCombinableDomains.createInstance( protein_list_nemve,
+                                                                            false,
+                                                                            new BasicSpecies( "nemve" ) ) );
+            final DomainSimilarityCalculator calc4 = new BasicDomainSimilarityCalculator( DomainSimilarity.DomainSimilaritySortField.DOMAIN_ID,
+                                                                                          true,
+                                                                                          false );
+            final SortedSet<DomainSimilarity> sims4 = calc4
+                    .calculateSimilarities( new CombinationsBasedPairwiseDomainSimilarityCalculator(),
+                                            cdc_list4,
+                                            false,
+                                            true );
+            final Iterator<DomainSimilarity> sims_it4 = sims4.iterator();
+            final DomainSimilarity sa4 = sims_it4.next();
+            if ( !sa4.getDomainId().getId().equals( "A" ) ) {
+                return false;
+            }
+            final SpeciesSpecificDomainSimilariyData ssdsd4 = sa4.getSpeciesData().get( new BasicSpecies( "ciona" ) );
+            if ( ssdsd4.getCombinableDomainIdToCountsMap().size() != 5 ) {
+                return false;
+            }
+            if ( ssdsd4.getNumberOfProteinsExhibitingCombinationWith( new DomainId( "A" ) ) != 3 ) {
+                return false;
+            }
+            if ( ssdsd4.getNumberOfProteinsExhibitingCombinationWith( new DomainId( "B" ) ) != 2 ) {
+                return false;
+            }
+            if ( ssdsd4.getNumberOfProteinsExhibitingCombinationWith( new DomainId( "F" ) ) != 2 ) {
+                return false;
+            }
+            if ( ssdsd4.getNumberOfProteinsExhibitingCombinationWith( new DomainId( "G" ) ) != 2 ) {
+                return false;
+            }
+            if ( ssdsd4.getNumberOfProteinsExhibitingCombinationWith( new DomainId( "X" ) ) != 3 ) {
+                return false;
+            }
+            final SortedSet<DomainSimilarity> sims4_d = calc4
+                    .calculateSimilarities( new DomainCountsBasedPairwiseSimilarityCalculator(), cdc_list4, false, true );
+            final Iterator<DomainSimilarity> sims_it4_d = sims4_d.iterator();
+            final DomainSimilarity sa4_d = sims_it4_d.next();
+            if ( !sa4_d.getDomainId().getId().equals( "A" ) ) {
+                return false;
+            }
+            if ( sa4_d.getCombinableDomainIds( new BasicSpecies( "ciona" ) ).size() != 5 ) {
+                return false;
+            }
+            if ( !TestSurfacing.isEqual( sa4_d.getMeanSimilarityScore(), ( 1 + 1 - 11.0 / 13 + 1 - 11.0 / 13 + 1 + 1
+                    + 1 - 11.0 / 13 ) / 6.0 ) ) {
+                return false;
+            }
+            if ( !TestSurfacing.isEqual( sa4_d.getMaximalSimilarityScore(), 1.0 ) ) {
+                return false;
+            }
+            if ( !TestSurfacing.isEqual( sa4_d.getMinimalSimilarityScore(), ( 1 - 11.0 / 13 ) ) ) {
+                return false;
+            }
+            if ( sa4_d.getN() != 6 ) {
+                return false;
+            }
+            final SortedSet<DomainSimilarity> sims4_p = calc4
+                    .calculateSimilarities( new ProteinCountsBasedPairwiseDomainSimilarityCalculator(),
+                                            cdc_list4,
+                                            false,
+                                            true );
+            final Iterator<DomainSimilarity> sims_it4_p = sims4_p.iterator();
+            final DomainSimilarity sa4_p = sims_it4_p.next();
+            if ( !sa4_p.getDomainId().getId().equals( "A" ) ) {
+                return false;
+            }
+            if ( sa4_p.getCombinableDomainIds( new BasicSpecies( "ciona" ) ).size() != 5 ) {
+                return false;
+            }
+            if ( !sa4_p.getCombinableDomainIds( new BasicSpecies( "ciona" ) ).contains( new DomainId( "A" ) ) ) {
+                return false;
+            }
+            if ( !sa4_p.getCombinableDomainIds( new BasicSpecies( "ciona" ) ).contains( new DomainId( "B" ) ) ) {
+                return false;
+            }
+            if ( !sa4_p.getCombinableDomainIds( new BasicSpecies( "ciona" ) ).contains( new DomainId( "F" ) ) ) {
+                return false;
+            }
+            if ( !sa4_p.getCombinableDomainIds( new BasicSpecies( "ciona" ) ).contains( new DomainId( "G" ) ) ) {
+                return false;
+            }
+            if ( !sa4_p.getCombinableDomainIds( new BasicSpecies( "ciona" ) ).contains( new DomainId( "X" ) ) ) {
+                return false;
+            }
+            if ( !TestSurfacing.isEqual( sa4_p.getMeanSimilarityScore(),
+                                         ( 1 + 1 - 2.0 / 4 + 1 - 2.0 / 4 + 1 + 1 + 1 - 2.0 / 4 ) / 6.0 ) ) {
+                return false;
+            }
+            if ( !TestSurfacing.isEqual( sa4_p.getMaximalSimilarityScore(), 1 ) ) {
+                return false;
+            }
+            if ( !TestSurfacing.isEqual( sa4_p.getMinimalSimilarityScore(), ( 1 - 2.0 / 4 ) ) ) {
+                return false;
+            }
+            if ( sa4_p.getN() != 6 ) {
+                return false;
+            }
+            final List<GenomeWideCombinableDomains> cdc_list5 = new ArrayList<GenomeWideCombinableDomains>();
+            cdc_list5.add( BasicGenomeWideCombinableDomains.createInstance( protein_list_mouse,
+                                                                            true,
+                                                                            new BasicSpecies( "mouse" ) ) );
+            cdc_list5.add( BasicGenomeWideCombinableDomains.createInstance( protein_list_rabbit,
+                                                                            true,
+                                                                            new BasicSpecies( "rabbit" ) ) );
+            cdc_list5.add( BasicGenomeWideCombinableDomains.createInstance( protein_list_ciona,
+                                                                            true,
+                                                                            new BasicSpecies( "ciona" ) ) );
+            cdc_list5.add( BasicGenomeWideCombinableDomains.createInstance( protein_list_nemve,
+                                                                            true,
+                                                                            new BasicSpecies( "nemve" ) ) );
+            final SortedSet<DomainSimilarity> sims5_d = calc4
+                    .calculateSimilarities( new DomainCountsBasedPairwiseSimilarityCalculator(), cdc_list5, false, true );
+            final Iterator<DomainSimilarity> sims_it5_d = sims5_d.iterator();
+            final DomainSimilarity sa5_d = sims_it5_d.next();
+            if ( sa5_d.getSpecies().size() != 4 ) {
+                return false;
+            }
+            if ( !sa5_d.getSpecies().last().equals( new BasicSpecies( "rabbit" ) ) ) {
+                return false;
+            }
+            final SpeciesSpecificDomainSimilariyData ssdsd5 = sa5_d.getSpeciesData().get( new BasicSpecies( "ciona" ) );
+            if ( ssdsd5.getCombinableDomainIdToCountsMap().size() != 4 ) {
+                return false;
+            }
+            if ( ssdsd5.getNumberOfProteinsExhibitingCombinationWith( new DomainId( "B" ) ) != 2 ) {
+                return false;
+            }
+            if ( ssdsd5.getNumberOfProteinsExhibitingCombinationWith( new DomainId( "F" ) ) != 2 ) {
+                return false;
+            }
+            if ( ssdsd5.getNumberOfProteinsExhibitingCombinationWith( new DomainId( "G" ) ) != 2 ) {
+                return false;
+            }
+            if ( ssdsd5.getNumberOfProteinsExhibitingCombinationWith( new DomainId( "X" ) ) != 3 ) {
+                return false;
+            }
+            if ( !sa5_d.getDomainId().getId().equals( "A" ) ) {
+                return false;
+            }
+            final Species ciona = new BasicSpecies( "ciona" );
+            if ( sa5_d.getCombinableDomainIds( ciona ).size() != 4 ) {
+                return false;
+            }
+            if ( sa5_d.getCombinableDomainIds( ciona ).contains( new DomainId( "A" ) ) ) {
+                return false;
+            }
+            if ( !sa5_d.getCombinableDomainIds( ciona ).contains( new DomainId( "B" ) ) ) {
+                return false;
+            }
+            if ( !sa5_d.getCombinableDomainIds( ciona ).contains( new DomainId( "F" ) ) ) {
+                return false;
+            }
+            if ( !sa5_d.getCombinableDomainIds( ciona ).contains( new DomainId( "G" ) ) ) {
+                return false;
+            }
+            if ( !sa5_d.getCombinableDomainIds( ciona ).contains( new DomainId( "X" ) ) ) {
+                return false;
+            }
+            if ( !TestSurfacing.isEqual( sa5_d.getMeanSimilarityScore(), ( 1 + 1 - 11.0 / 13 + 1 - 11.0 / 13 + 1 + 1
+                    + 1 - 11.0 / 13 ) / 6.0 ) ) {
+                return false;
+            }
+            if ( !TestSurfacing.isEqual( sa5_d.getMaximalSimilarityScore(), 1.0 ) ) {
+                return false;
+            }
+            if ( !TestSurfacing.isEqual( sa5_d.getMinimalSimilarityScore(), ( 1 - 11.0 / 13 ) ) ) {
+                return false;
+            }
+            if ( sa5_d.getN() != 6 ) {
+                return false;
+            }
+            if ( sa5_d.getMaximalDifference() != sa5_d.getMaximalDifferenceInCounts() ) {
+                return false;
+            }
+            if ( sa5_d.getMaximalDifference() != 11 ) {
+                return false;
+            }
+            if ( sa5_d.getMaximalDifferenceInCounts() != 11 ) {
+                return false;
+            }
+            final SortedSet<DomainSimilarity> sims5_p = calc4
+                    .calculateSimilarities( new ProteinCountsBasedPairwiseDomainSimilarityCalculator(),
+                                            cdc_list5,
+                                            false,
+                                            true );
+            final Iterator<DomainSimilarity> sims_it5_p = sims5_p.iterator();
+            final DomainSimilarity sa5_p = sims_it5_p.next();
+            if ( !sa5_p.getDomainId().getId().equals( "A" ) ) {
+                return false;
+            }
+            if ( sa5_p.getCombinableDomainIds( ciona ).size() != 4 ) {
+                return false;
+            }
+            if ( sa5_p.getCombinableDomainIds( ciona ).contains( new DomainId( "A" ) ) ) {
+                return false;
+            }
+            if ( !sa5_p.getCombinableDomainIds( ciona ).contains( new DomainId( "B" ) ) ) {
+                return false;
+            }
+            if ( !sa5_p.getCombinableDomainIds( ciona ).contains( new DomainId( "F" ) ) ) {
+                return false;
+            }
+            if ( !sa5_p.getCombinableDomainIds( ciona ).contains( new DomainId( "G" ) ) ) {
+                return false;
+            }
+            if ( !sa5_p.getCombinableDomainIds( ciona ).contains( new DomainId( "X" ) ) ) {
+                return false;
+            }
+            if ( !TestSurfacing.isEqual( sa5_p.getMeanSimilarityScore(),
+                                         ( 1 + 1 - 2.0 / 4 + 1 - 2.0 / 4 + 1 + 1 + 1 - 2.0 / 4 ) / 6.0 ) ) {
+                return false;
+            }
+            if ( !TestSurfacing.isEqual( sa5_p.getMaximalSimilarityScore(), 1 ) ) {
+                return false;
+            }
+            if ( !TestSurfacing.isEqual( sa5_p.getMinimalSimilarityScore(), ( 1 - 2.0 / 4 ) ) ) {
+                return false;
+            }
+            if ( sa5_p.getN() != 6 ) {
+                return false;
+            }
+            if ( sa5_p.getMaximalDifference() != sa5_p.getMaximalDifferenceInCounts() ) {
+                return false;
+            }
+            if ( sa5_p.getMaximalDifference() != 2 ) {
+                return false;
+            }
+            if ( sa5_p.getMaximalDifferenceInCounts() != 2 ) {
+                return false;
+            }
+            final List<GenomeWideCombinableDomains> cdc_list6 = new ArrayList<GenomeWideCombinableDomains>();
+            cdc_list6.add( BasicGenomeWideCombinableDomains.createInstance( protein_list_mouse,
+                                                                            false,
+                                                                            new BasicSpecies( "mouse" ) ) );
+            cdc_list6.add( BasicGenomeWideCombinableDomains.createInstance( protein_list_rabbit,
+                                                                            false,
+                                                                            new BasicSpecies( "rabbit" ) ) );
+            cdc_list6.add( BasicGenomeWideCombinableDomains.createInstance( protein_list_ciona,
+                                                                            false,
+                                                                            new BasicSpecies( "ciona" ) ) );
+            cdc_list6.add( BasicGenomeWideCombinableDomains.createInstance( protein_list_nemve,
+                                                                            false,
+                                                                            new BasicSpecies( "nemve" ) ) );
+            final SortedSet<DomainSimilarity> sims6_d = calc4
+                    .calculateSimilarities( new DomainCountsBasedPairwiseSimilarityCalculator(), cdc_list6, false, true );
+            final Iterator<DomainSimilarity> sims_it6_d = sims6_d.iterator();
+            final DomainSimilarity sa6_d = sims_it6_d.next();
+            if ( sa6_d.getSpecies().size() != 4 ) {
+                return false;
+            }
+            if ( !sa6_d.getSpecies().last().equals( new BasicSpecies( "rabbit" ) ) ) {
+                return false;
+            }
+            final SpeciesSpecificDomainSimilariyData ssdsd6 = sa6_d.getSpeciesData().get( new BasicSpecies( "ciona" ) );
+            if ( ssdsd6.getCombinableDomainIdToCountsMap().size() != 5 ) {
+                return false;
+            }
+            if ( ssdsd6.getNumberOfProteinsExhibitingCombinationWith( new DomainId( "B" ) ) != 2 ) {
+                return false;
+            }
+            if ( ssdsd6.getNumberOfProteinsExhibitingCombinationWith( new DomainId( "F" ) ) != 2 ) {
+                return false;
+            }
+            if ( ssdsd6.getNumberOfProteinsExhibitingCombinationWith( new DomainId( "G" ) ) != 2 ) {
+                return false;
+            }
+            if ( ssdsd6.getNumberOfProteinsExhibitingCombinationWith( new DomainId( "X" ) ) != 3 ) {
+                return false;
+            }
+            if ( !sa5_d.getDomainId().getId().equals( "A" ) ) {
+                return false;
+            }
+            final Species ciona6 = new BasicSpecies( "ciona" );
+            if ( sa6_d.getCombinableDomainIds( ciona6 ).size() != 5 ) {
+                return false;
+            }
+            if ( !sa6_d.getCombinableDomainIds( ciona6 ).contains( new DomainId( "A" ) ) ) {
+                return false;
+            }
+            if ( !sa6_d.getCombinableDomainIds( ciona6 ).contains( new DomainId( "B" ) ) ) {
+                return false;
+            }
+            if ( !sa6_d.getCombinableDomainIds( ciona6 ).contains( new DomainId( "F" ) ) ) {
+                return false;
+            }
+            if ( !sa6_d.getCombinableDomainIds( ciona6 ).contains( new DomainId( "G" ) ) ) {
+                return false;
+            }
+            if ( !sa6_d.getCombinableDomainIds( ciona6 ).contains( new DomainId( "X" ) ) ) {
+                return false;
+            }
+            if ( !TestSurfacing.isEqual( sa6_d.getMeanSimilarityScore(), ( 1 + 1 - 11.0 / 13 + 1 - 11.0 / 13 + 1 + 1
+                    + 1 - 11.0 / 13 ) / 6.0 ) ) {
+                return false;
+            }
+            if ( !TestSurfacing.isEqual( sa6_d.getMaximalSimilarityScore(), 1.0 ) ) {
+                return false;
+            }
+            if ( !TestSurfacing.isEqual( sa6_d.getMinimalSimilarityScore(), ( 1 - 11.0 / 13 ) ) ) {
+                return false;
+            }
+            if ( sa6_d.getN() != 6 ) {
+                return false;
+            }
+            if ( sa6_d.getMaximalDifference() != sa6_d.getMaximalDifferenceInCounts() ) {
+                return false;
+            }
+            if ( sa6_d.getMaximalDifference() != 11 ) {
+                return false;
+            }
+            if ( sa6_d.getMaximalDifferenceInCounts() != 11 ) {
+                return false;
+            }
+            final SortedSet<DomainSimilarity> sims6_p = calc4
+                    .calculateSimilarities( new ProteinCountsBasedPairwiseDomainSimilarityCalculator(),
+                                            cdc_list6,
+                                            false,
+                                            true );
+            final Iterator<DomainSimilarity> sims_it6_p = sims6_p.iterator();
+            final DomainSimilarity sa6_p = sims_it6_p.next();
+            if ( !sa6_p.getDomainId().getId().equals( "A" ) ) {
+                return false;
+            }
+            if ( sa6_p.getCombinableDomainIds( ciona ).size() != 5 ) {
+                return false;
+            }
+            if ( !sa6_p.getCombinableDomainIds( ciona ).contains( new DomainId( "A" ) ) ) {
+                return false;
+            }
+            if ( !sa6_p.getCombinableDomainIds( ciona ).contains( new DomainId( "B" ) ) ) {
+                return false;
+            }
+            if ( !sa6_p.getCombinableDomainIds( ciona ).contains( new DomainId( "F" ) ) ) {
+                return false;
+            }
+            if ( !sa6_p.getCombinableDomainIds( ciona ).contains( new DomainId( "G" ) ) ) {
+                return false;
+            }
+            if ( !sa6_p.getCombinableDomainIds( ciona ).contains( new DomainId( "X" ) ) ) {
+                return false;
+            }
+            if ( !TestSurfacing.isEqual( sa6_p.getMeanSimilarityScore(),
+                                         ( 1 + 1 - 2.0 / 4 + 1 - 2.0 / 4 + 1 + 1 + 1 - 2.0 / 4 ) / 6.0 ) ) {
+                return false;
+            }
+            if ( !TestSurfacing.isEqual( sa6_p.getMaximalSimilarityScore(), 1 ) ) {
+                return false;
+            }
+            if ( !TestSurfacing.isEqual( sa6_p.getMinimalSimilarityScore(), ( 1 - 2.0 / 4 ) ) ) {
+                return false;
+            }
+            if ( sa6_p.getN() != 6 ) {
+                return false;
+            }
+            if ( sa6_p.getMaximalDifference() != sa6_p.getMaximalDifferenceInCounts() ) {
+                return false;
+            }
+            if ( sa6_p.getMaximalDifference() != 2 ) {
+                return false;
+            }
+            if ( sa6_p.getMaximalDifferenceInCounts() != 2 ) {
+                return false;
+            }
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace( System.out );
+            return false;
+        }
+        return true;
+    }
+
+    private static boolean testBasicDomainSimilarityCalculatorNotIgnoringSpeciesSpeficDomains() {
+        try {
+            final Domain A = new BasicDomain( "A", 1, 2, ( short ) 1, ( short ) 1, 0.15, -12 );
+            final Domain B = new BasicDomain( "B", 1, 2, ( short ) 1, ( short ) 1, 0.2, -12 );
+            final Domain D = new BasicDomain( "D", 1, 2, ( short ) 1, ( short ) 1, 0.5, -12 );
+            final Domain E = new BasicDomain( "E", 1, 2, ( short ) 1, ( short ) 1, 0.5, -12 );
+            final Domain F = new BasicDomain( "F", 1, 2, ( short ) 1, ( short ) 1, 0.01, -12 );
+            final Domain G = new BasicDomain( "G", 1, 2, ( short ) 1, ( short ) 1, 0.001, -12 );
+            final Domain X = new BasicDomain( "X", 1, 2, ( short ) 1, ( short ) 1, 0.0001, -12 );
+            if ( !TestSurfacing.isEqual( X.getPerSequenceScore(), -12 ) ) {
+                return false;
+            }
+            final Protein mouse_1 = new BasicProtein( "1", "mouse" );
+            final Protein rabbit_1 = new BasicProtein( "1", "rabbit" );
+            final Protein ciona_1 = new BasicProtein( "1", "ciona" );
+            final Protein nemve_1 = new BasicProtein( "1", "nemve" );
+            mouse_1.addProteinDomain( A );
+            mouse_1.addProteinDomain( D );
+            mouse_1.addProteinDomain( E );
+            rabbit_1.addProteinDomain( B );
+            rabbit_1.addProteinDomain( E );
+            rabbit_1.addProteinDomain( F );
+            rabbit_1.addProteinDomain( F );
+            rabbit_1.addProteinDomain( F );
+            rabbit_1.addProteinDomain( F );
+            rabbit_1.addProteinDomain( F );
+            rabbit_1.addProteinDomain( F );
+            ciona_1.addProteinDomain( F );
+            ciona_1.addProteinDomain( G );
+            ciona_1.addProteinDomain( X );
+            nemve_1.addProteinDomain( D );
+            nemve_1.addProteinDomain( E );
+            nemve_1.addProteinDomain( F );
+            nemve_1.addProteinDomain( G );
+            final List<Protein> protein_list_mouse = new ArrayList<Protein>();
+            final List<Protein> protein_list_rabbit = new ArrayList<Protein>();
+            final List<Protein> protein_list_ciona = new ArrayList<Protein>();
+            final List<Protein> protein_list_nemve = new ArrayList<Protein>();
+            protein_list_mouse.add( mouse_1 );
+            protein_list_rabbit.add( rabbit_1 );
+            protein_list_ciona.add( ciona_1 );
+            protein_list_nemve.add( nemve_1 );
+            final List<GenomeWideCombinableDomains> cdc_list = new ArrayList<GenomeWideCombinableDomains>();
+            cdc_list.add( BasicGenomeWideCombinableDomains.createInstance( protein_list_mouse,
+                                                                           true,
+                                                                           new BasicSpecies( "mouse" ) ) );
+            cdc_list.add( BasicGenomeWideCombinableDomains.createInstance( protein_list_rabbit,
+                                                                           true,
+                                                                           new BasicSpecies( "rabbit" ) ) );
+            cdc_list.add( BasicGenomeWideCombinableDomains.createInstance( protein_list_ciona,
+                                                                           true,
+                                                                           new BasicSpecies( "ciona" ) ) );
+            cdc_list.add( BasicGenomeWideCombinableDomains.createInstance( protein_list_nemve,
+                                                                           true,
+                                                                           new BasicSpecies( "nemve" ) ) );
+            final DomainSimilarityCalculator calc = new BasicDomainSimilarityCalculator( DomainSimilarity.DomainSimilaritySortField.DOMAIN_ID,
+                                                                                         false,
+                                                                                         false );
+            final SortedSet<DomainSimilarity> sims = calc
+                    .calculateSimilarities( new CombinationsBasedPairwiseDomainSimilarityCalculator(),
+                                            cdc_list,
+                                            true,
+                                            false );
+            final Iterator<DomainSimilarity> sims_it = sims.iterator();
+            final DomainSimilarity sa = sims_it.next();
+            if ( !sa.getDomainId().getId().equals( "A" ) ) {
+                return false;
+            }
+            if ( sa.getSpeciesData().size() != 1 ) {
+                return false;
+            }
+            if ( !sa.getSpecies().contains( new BasicSpecies( "mouse" ) ) ) {
+                return false;
+            }
+            if ( !TestSurfacing.isEqual( sa.getMeanSimilarityScore(), 1.0 ) ) {
+                return false;
+            }
+            if ( !TestSurfacing.isEqual( sa.getStandardDeviationOfSimilarityScore(), 0.0 ) ) {
+                return false;
+            }
+            if ( !TestSurfacing.isEqual( sa.getMaximalSimilarityScore(), 1.0 ) ) {
+                return false;
+            }
+            if ( !TestSurfacing.isEqual( sa.getMinimalSimilarityScore(), 1.0 ) ) {
+                return false;
+            }
+            if ( sa.getN() != 0 ) {
+                return false;
+            }
+            if ( sa.getMaximalDifference() != 0 ) {
+                return false;
+            }
+            if ( sa.getMaximalDifferenceInCounts() != 0 ) {
+                return false;
+            }
+            final DomainSimilarity sb = sims_it.next();
+            if ( !sb.getDomainId().getId().equals( "B" ) ) {
+                return false;
+            }
+            if ( sb.getSpeciesData().size() != 1 ) {
+                return false;
+            }
+            if ( !sb.getSpecies().contains( new BasicSpecies( "rabbit" ) ) ) {
+                return false;
+            }
+            final SortedSet<DomainSimilarity> sims2 = calc
+                    .calculateSimilarities( new CombinationsBasedPairwiseDomainSimilarityCalculator(),
+                                            cdc_list,
+                                            true,
+                                            true );
+            final Iterator<DomainSimilarity> sims_it2 = sims2.iterator();
+            final DomainSimilarity sa2 = sims_it2.next();
+            if ( !sa2.getDomainId().getId().equals( "D" ) ) {
+                return false;
+            }
+            if ( sa2.getSpeciesData().size() != 2 ) {
+                return false;
+            }
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace( System.out );
+            return false;
+        }
+        return true;
+    }
+
+    private static boolean testBasicDomainSimilarityCalculatorRemovalOfSingles() {
+        try {
+            final Domain A = new BasicDomain( "A", 1, 2, ( short ) 1, ( short ) 1, 0.15, -12 );
+            final Domain B = new BasicDomain( "B", 1, 2, ( short ) 1, ( short ) 1, 0.2, -12 );
+            final Protein mouse_1 = new BasicProtein( "1", "mouse" );
+            final Protein rabbit_1 = new BasicProtein( "1", "rabbit" );
+            final Protein ciona_1 = new BasicProtein( "1", "ciona" );
+            final Protein nemve_1 = new BasicProtein( "1", "nemve" );
+            mouse_1.addProteinDomain( A );
+            rabbit_1.addProteinDomain( A );
+            ciona_1.addProteinDomain( A );
+            ciona_1.addProteinDomain( A );
+            ciona_1.addProteinDomain( A );
+            ciona_1.addProteinDomain( A );
+            ciona_1.addProteinDomain( A );
+            nemve_1.addProteinDomain( A );
+            final List<Protein> protein_list_mouse = new ArrayList<Protein>();
+            final List<Protein> protein_list_rabbit = new ArrayList<Protein>();
+            final List<Protein> protein_list_ciona = new ArrayList<Protein>();
+            final List<Protein> protein_list_nemve = new ArrayList<Protein>();
+            protein_list_mouse.add( mouse_1 );
+            protein_list_rabbit.add( rabbit_1 );
+            protein_list_ciona.add( ciona_1 );
+            protein_list_nemve.add( nemve_1 );
+            final List<GenomeWideCombinableDomains> cdc_list = new ArrayList<GenomeWideCombinableDomains>();
+            cdc_list.add( BasicGenomeWideCombinableDomains.createInstance( protein_list_mouse,
+                                                                           true,
+                                                                           new BasicSpecies( "mouse" ) ) );
+            cdc_list.add( BasicGenomeWideCombinableDomains.createInstance( protein_list_rabbit,
+                                                                           true,
+                                                                           new BasicSpecies( "rabbit" ) ) );
+            cdc_list.add( BasicGenomeWideCombinableDomains.createInstance( protein_list_ciona,
+                                                                           true,
+                                                                           new BasicSpecies( "ciona" ) ) );
+            cdc_list.add( BasicGenomeWideCombinableDomains.createInstance( protein_list_nemve,
+                                                                           true,
+                                                                           new BasicSpecies( "nemve" ) ) );
+            final DomainSimilarityCalculator calc = new BasicDomainSimilarityCalculator( DomainSimilarity.DomainSimilaritySortField.DOMAIN_ID,
+                                                                                         false,
+                                                                                         false );
+            final SortedSet<DomainSimilarity> sims = calc
+                    .calculateSimilarities( new CombinationsBasedPairwiseDomainSimilarityCalculator(),
+                                            cdc_list,
+                                            false,
+                                            true );
+            if ( sims.size() != 1 ) {
+                return false;
+            }
+            final Iterator<DomainSimilarity> sims_it = sims.iterator();
+            final DomainSimilarity sa = sims_it.next();
+            if ( !sa.getDomainId().getId().equals( "A" ) ) {
+                return false;
+            }
+            if ( sa.getSpeciesData().size() != 4 ) {
+                return false;
+            }
+            if ( !sa.getSpecies().contains( new BasicSpecies( "ciona" ) ) ) {
+                return false;
+            }
+            if ( !sa.getSpecies().contains( new BasicSpecies( "mouse" ) ) ) {
+                return false;
+            }
+            if ( !sa.getSpecies().contains( new BasicSpecies( "nemve" ) ) ) {
+                return false;
+            }
+            if ( !sa.getSpecies().contains( new BasicSpecies( "rabbit" ) ) ) {
+                return false;
+            }
+            final SortedSet<DomainSimilarity> sims_ns = calc
+                    .calculateSimilarities( new CombinationsBasedPairwiseDomainSimilarityCalculator(),
+                                            cdc_list,
+                                            true,
+                                            true );
+            if ( sims_ns.size() != 0 ) {
+                return false;
+            }
+            final Protein mouse_2 = new BasicProtein( "1", "mouse" );
+            final Protein rabbit_2 = new BasicProtein( "1", "rabbit" );
+            final Protein ciona_2 = new BasicProtein( "1", "ciona" );
+            final Protein nemve_2 = new BasicProtein( "1", "nemve" );
+            mouse_2.addProteinDomain( A );
+            rabbit_2.addProteinDomain( A );
+            ciona_2.addProteinDomain( A );
+            ciona_2.addProteinDomain( A );
+            ciona_2.addProteinDomain( B );
+            ciona_2.addProteinDomain( A );
+            ciona_2.addProteinDomain( A );
+            ciona_2.addProteinDomain( A );
+            nemve_2.addProteinDomain( A );
+            final List<Protein> protein_list_mouse2 = new ArrayList<Protein>();
+            final List<Protein> protein_list_rabbit2 = new ArrayList<Protein>();
+            final List<Protein> protein_list_ciona2 = new ArrayList<Protein>();
+            final List<Protein> protein_list_nemve2 = new ArrayList<Protein>();
+            protein_list_mouse2.add( mouse_2 );
+            protein_list_rabbit2.add( rabbit_2 );
+            protein_list_ciona2.add( ciona_2 );
+            protein_list_nemve2.add( nemve_2 );
+            final List<GenomeWideCombinableDomains> cdc_list2 = new ArrayList<GenomeWideCombinableDomains>();
+            cdc_list2.add( BasicGenomeWideCombinableDomains.createInstance( protein_list_mouse2,
+                                                                            true,
+                                                                            new BasicSpecies( "mouse" ) ) );
+            cdc_list2.add( BasicGenomeWideCombinableDomains.createInstance( protein_list_rabbit2,
+                                                                            true,
+                                                                            new BasicSpecies( "rabbit" ) ) );
+            cdc_list2.add( BasicGenomeWideCombinableDomains.createInstance( protein_list_ciona2,
+                                                                            true,
+                                                                            new BasicSpecies( "ciona" ) ) );
+            cdc_list2.add( BasicGenomeWideCombinableDomains.createInstance( protein_list_nemve2,
+                                                                            true,
+                                                                            new BasicSpecies( "nemve" ) ) );
+            final SortedSet<DomainSimilarity> sims2 = calc
+                    .calculateSimilarities( new CombinationsBasedPairwiseDomainSimilarityCalculator(),
+                                            cdc_list2,
+                                            true,
+                                            true );
+            if ( sims2.size() != 1 ) {
+                return false;
+            }
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace( System.out );
+            return false;
+        }
+        return true;
+    }
+
+    private static boolean testBasicProtein() {
+        try {
+            // A0  A10  B15  A20  B25  A30  B35  B40  C50  A60  C70  D80
+            final Domain A0 = new BasicDomain( "A", 0, 25, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final Domain A10 = new BasicDomain( "A", 10, 11, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final Domain B15 = new BasicDomain( "B", 11, 16, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final Domain A20 = new BasicDomain( "A", 20, 100, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final Domain B25 = new BasicDomain( "B", 25, 26, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final Domain A30 = new BasicDomain( "A", 30, 31, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final Domain B35 = new BasicDomain( "B", 31, 40, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final Domain B40 = new BasicDomain( "B", 40, 600, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final Domain C50 = new BasicDomain( "C", 50, 59, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final Domain A60 = new BasicDomain( "A", 60, 395, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final Domain C70 = new BasicDomain( "C", 70, 71, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final Domain D80 = new BasicDomain( "D", 80, 81, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final BasicProtein p = new BasicProtein( "p", "owl" );
+            p.addProteinDomain( B15 );
+            p.addProteinDomain( C50 );
+            p.addProteinDomain( A60 );
+            p.addProteinDomain( A30 );
+            p.addProteinDomain( C70 );
+            p.addProteinDomain( B35 );
+            p.addProteinDomain( B40 );
+            p.addProteinDomain( A0 );
+            p.addProteinDomain( A10 );
+            p.addProteinDomain( A20 );
+            p.addProteinDomain( B25 );
+            p.addProteinDomain( D80 );
+            List<DomainId> domains_ids = new ArrayList<DomainId>();
+            domains_ids.add( new DomainId( "A" ) );
+            domains_ids.add( new DomainId( "B" ) );
+            domains_ids.add( new DomainId( "C" ) );
+            if ( !p.contains( domains_ids, false ) ) {
+                return false;
+            }
+            if ( !p.contains( domains_ids, true ) ) {
+                return false;
+            }
+            domains_ids.add( new DomainId( "X" ) );
+            if ( p.contains( domains_ids, false ) ) {
+                return false;
+            }
+            if ( p.contains( domains_ids, true ) ) {
+                return false;
+            }
+            domains_ids = new ArrayList<DomainId>();
+            domains_ids.add( new DomainId( "A" ) );
+            domains_ids.add( new DomainId( "C" ) );
+            domains_ids.add( new DomainId( "D" ) );
+            if ( !p.contains( domains_ids, false ) ) {
+                return false;
+            }
+            if ( !p.contains( domains_ids, true ) ) {
+                return false;
+            }
+            domains_ids = new ArrayList<DomainId>();
+            domains_ids.add( new DomainId( "A" ) );
+            domains_ids.add( new DomainId( "D" ) );
+            domains_ids.add( new DomainId( "C" ) );
+            if ( !p.contains( domains_ids, false ) ) {
+                return false;
+            }
+            if ( p.contains( domains_ids, true ) ) {
+                return false;
+            }
+            domains_ids = new ArrayList<DomainId>();
+            domains_ids.add( new DomainId( "A" ) );
+            domains_ids.add( new DomainId( "A" ) );
+            domains_ids.add( new DomainId( "B" ) );
+            if ( !p.contains( domains_ids, false ) ) {
+                return false;
+            }
+            if ( !p.contains( domains_ids, true ) ) {
+                return false;
+            }
+            domains_ids = new ArrayList<DomainId>();
+            domains_ids.add( new DomainId( "A" ) );
+            domains_ids.add( new DomainId( "A" ) );
+            domains_ids.add( new DomainId( "A" ) );
+            domains_ids.add( new DomainId( "B" ) );
+            domains_ids.add( new DomainId( "B" ) );
+            if ( !p.contains( domains_ids, false ) ) {
+                return false;
+            }
+            if ( !p.contains( domains_ids, true ) ) {
+                return false;
+            }
+            domains_ids = new ArrayList<DomainId>();
+            domains_ids.add( new DomainId( "A" ) );
+            domains_ids.add( new DomainId( "A" ) );
+            domains_ids.add( new DomainId( "A" ) );
+            domains_ids.add( new DomainId( "A" ) );
+            domains_ids.add( new DomainId( "B" ) );
+            domains_ids.add( new DomainId( "B" ) );
+            if ( !p.contains( domains_ids, false ) ) {
+                return false;
+            }
+            if ( !p.contains( domains_ids, true ) ) {
+                return false;
+            }
+            domains_ids = new ArrayList<DomainId>();
+            domains_ids.add( new DomainId( "A" ) );
+            domains_ids.add( new DomainId( "A" ) );
+            domains_ids.add( new DomainId( "A" ) );
+            domains_ids.add( new DomainId( "A" ) );
+            domains_ids.add( new DomainId( "A" ) );
+            domains_ids.add( new DomainId( "B" ) );
+            domains_ids.add( new DomainId( "B" ) );
+            if ( !p.contains( domains_ids, false ) ) {
+                return false;
+            }
+            if ( p.contains( domains_ids, true ) ) {
+                return false;
+            }
+            domains_ids = new ArrayList<DomainId>();
+            domains_ids.add( new DomainId( "A" ) );
+            domains_ids.add( new DomainId( "A" ) );
+            domains_ids.add( new DomainId( "B" ) );
+            domains_ids.add( new DomainId( "A" ) );
+            domains_ids.add( new DomainId( "B" ) );
+            domains_ids.add( new DomainId( "A" ) );
+            domains_ids.add( new DomainId( "B" ) );
+            domains_ids.add( new DomainId( "B" ) );
+            domains_ids.add( new DomainId( "C" ) );
+            domains_ids.add( new DomainId( "A" ) );
+            domains_ids.add( new DomainId( "C" ) );
+            domains_ids.add( new DomainId( "D" ) );
+            if ( !p.contains( domains_ids, false ) ) {
+                return false;
+            }
+            if ( !p.contains( domains_ids, true ) ) {
+                return false;
+            }
+            domains_ids = new ArrayList<DomainId>();
+            domains_ids.add( new DomainId( "A" ) );
+            domains_ids.add( new DomainId( "B" ) );
+            domains_ids.add( new DomainId( "A" ) );
+            domains_ids.add( new DomainId( "B" ) );
+            domains_ids.add( new DomainId( "A" ) );
+            domains_ids.add( new DomainId( "B" ) );
+            domains_ids.add( new DomainId( "B" ) );
+            domains_ids.add( new DomainId( "A" ) );
+            domains_ids.add( new DomainId( "C" ) );
+            domains_ids.add( new DomainId( "D" ) );
+            if ( !p.contains( domains_ids, false ) ) {
+                return false;
+            }
+            if ( !p.contains( domains_ids, true ) ) {
+                return false;
+            }
+            domains_ids = new ArrayList<DomainId>();
+            domains_ids.add( new DomainId( "A" ) );
+            domains_ids.add( new DomainId( "A" ) );
+            domains_ids.add( new DomainId( "B" ) );
+            domains_ids.add( new DomainId( "A" ) );
+            domains_ids.add( new DomainId( "B" ) );
+            domains_ids.add( new DomainId( "A" ) );
+            domains_ids.add( new DomainId( "B" ) );
+            domains_ids.add( new DomainId( "B" ) );
+            domains_ids.add( new DomainId( "C" ) );
+            domains_ids.add( new DomainId( "C" ) );
+            domains_ids.add( new DomainId( "A" ) );
+            domains_ids.add( new DomainId( "C" ) );
+            domains_ids.add( new DomainId( "D" ) );
+            if ( !p.contains( domains_ids, false ) ) {
+                return false;
+            }
+            if ( p.contains( domains_ids, true ) ) {
+                return false;
+            }
+            domains_ids = new ArrayList<DomainId>();
+            domains_ids.add( new DomainId( "A" ) );
+            domains_ids.add( new DomainId( "A" ) );
+            domains_ids.add( new DomainId( "A" ) );
+            domains_ids.add( new DomainId( "B" ) );
+            domains_ids.add( new DomainId( "A" ) );
+            domains_ids.add( new DomainId( "B" ) );
+            domains_ids.add( new DomainId( "A" ) );
+            domains_ids.add( new DomainId( "B" ) );
+            domains_ids.add( new DomainId( "B" ) );
+            domains_ids.add( new DomainId( "C" ) );
+            domains_ids.add( new DomainId( "A" ) );
+            domains_ids.add( new DomainId( "C" ) );
+            domains_ids.add( new DomainId( "D" ) );
+            if ( !p.contains( domains_ids, false ) ) {
+                return false;
+            }
+            if ( p.contains( domains_ids, true ) ) {
+                return false;
+            }
+            domains_ids = new ArrayList<DomainId>();
+            domains_ids.add( new DomainId( "A" ) );
+            domains_ids.add( new DomainId( "A" ) );
+            domains_ids.add( new DomainId( "B" ) );
+            domains_ids.add( new DomainId( "A" ) );
+            domains_ids.add( new DomainId( "B" ) );
+            domains_ids.add( new DomainId( "A" ) );
+            domains_ids.add( new DomainId( "B" ) );
+            domains_ids.add( new DomainId( "B" ) );
+            domains_ids.add( new DomainId( "A" ) );
+            domains_ids.add( new DomainId( "D" ) );
+            if ( !p.contains( domains_ids, false ) ) {
+                return false;
+            }
+            if ( !p.contains( domains_ids, true ) ) {
+                return false;
+            }
+            domains_ids = new ArrayList<DomainId>();
+            domains_ids.add( new DomainId( "A" ) );
+            domains_ids.add( new DomainId( "A" ) );
+            domains_ids.add( new DomainId( "B" ) );
+            domains_ids.add( new DomainId( "A" ) );
+            domains_ids.add( new DomainId( "B" ) );
+            domains_ids.add( new DomainId( "A" ) );
+            domains_ids.add( new DomainId( "B" ) );
+            domains_ids.add( new DomainId( "B" ) );
+            domains_ids.add( new DomainId( "C" ) );
+            domains_ids.add( new DomainId( "A" ) );
+            domains_ids.add( new DomainId( "C" ) );
+            domains_ids.add( new DomainId( "D" ) );
+            domains_ids.add( new DomainId( "X" ) );
+            if ( p.contains( domains_ids, false ) ) {
+                return false;
+            }
+            if ( p.contains( domains_ids, true ) ) {
+                return false;
+            }
+            domains_ids = new ArrayList<DomainId>();
+            domains_ids.add( new DomainId( "X" ) );
+            domains_ids.add( new DomainId( "A" ) );
+            domains_ids.add( new DomainId( "A" ) );
+            domains_ids.add( new DomainId( "B" ) );
+            domains_ids.add( new DomainId( "A" ) );
+            domains_ids.add( new DomainId( "B" ) );
+            domains_ids.add( new DomainId( "A" ) );
+            domains_ids.add( new DomainId( "B" ) );
+            domains_ids.add( new DomainId( "B" ) );
+            domains_ids.add( new DomainId( "C" ) );
+            domains_ids.add( new DomainId( "A" ) );
+            domains_ids.add( new DomainId( "C" ) );
+            domains_ids.add( new DomainId( "D" ) );
+            if ( p.contains( domains_ids, false ) ) {
+                return false;
+            }
+            if ( p.contains( domains_ids, true ) ) {
+                return false;
+            }
+            domains_ids = new ArrayList<DomainId>();
+            domains_ids.add( new DomainId( "A" ) );
+            domains_ids.add( new DomainId( "A" ) );
+            domains_ids.add( new DomainId( "B" ) );
+            domains_ids.add( new DomainId( "A" ) );
+            domains_ids.add( new DomainId( "B" ) );
+            domains_ids.add( new DomainId( "B" ) );
+            domains_ids.add( new DomainId( "A" ) );
+            domains_ids.add( new DomainId( "B" ) );
+            domains_ids.add( new DomainId( "C" ) );
+            domains_ids.add( new DomainId( "A" ) );
+            domains_ids.add( new DomainId( "C" ) );
+            domains_ids.add( new DomainId( "D" ) );
+            if ( !p.contains( domains_ids, false ) ) {
+                return false;
+            }
+            if ( p.contains( domains_ids, true ) ) {
+                return false;
+            }
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace( System.out );
+            return false;
+        }
+        return true;
+    }
+
+    private static boolean testBinaryDomainCombination() {
+        try {
+            final BasicBinaryDomainCombination s0 = new BasicBinaryDomainCombination( "a", "a" );
+            final BasicBinaryDomainCombination s1 = new BasicBinaryDomainCombination( "b", "a" );
+            final BasicBinaryDomainCombination s2 = new BasicBinaryDomainCombination( "a", "b" );
+            final BasicBinaryDomainCombination s3 = new BasicBinaryDomainCombination( "B", "A" );
+            final BasicBinaryDomainCombination s4 = new BasicBinaryDomainCombination( "A", "B" );
+            final BasicBinaryDomainCombination s5 = new BasicBinaryDomainCombination( "c", "a" );
+            final BasicBinaryDomainCombination s6 = new BasicBinaryDomainCombination( "b", "c" );
+            final BasicBinaryDomainCombination s7 = new BasicBinaryDomainCombination( "d", "a" );
+            final BasicBinaryDomainCombination s8 = new BasicBinaryDomainCombination( "b", "d" );
+            final BinaryDomainCombination s9 = BasicBinaryDomainCombination.createInstance( " z-z=a-aa " );
+            if ( !s9.toString().equals( "a-aa=z-z" ) ) {
+                return false;
+            }
+            if ( !s0.equals( s0 ) ) {
+                return false;
+            }
+            if ( s0.equals( s1 ) ) {
+                return false;
+            }
+            if ( s1.equals( s0 ) ) {
+                return false;
+            }
+            if ( !s1.equals( s2 ) ) {
+                return false;
+            }
+            if ( !s2.equals( s1 ) ) {
+                return false;
+            }
+            if ( s2.equals( s3 ) ) {
+                return false;
+            }
+            if ( s2.equals( s3 ) ) {
+                return false;
+            }
+            if ( s2.equals( s4 ) ) {
+                return false;
+            }
+            final SortedSet<BasicBinaryDomainCombination> sorted = new TreeSet<BasicBinaryDomainCombination>();
+            sorted.add( s0 );
+            sorted.add( s1 );
+            sorted.add( s2 );
+            sorted.add( s3 );
+            sorted.add( s3 );
+            sorted.add( s3 );
+            sorted.add( s4 );
+            sorted.add( s5 );
+            sorted.add( s6 );
+            sorted.add( s7 );
+            sorted.add( s7 );
+            sorted.add( s8 );
+            if ( sorted.size() != 6 ) {
+                return false;
+            }
+            final DirectedBinaryDomainCombination aa = new DirectedBinaryDomainCombination( "a", "a" );
+            final DirectedBinaryDomainCombination ba = new DirectedBinaryDomainCombination( "b", "a" );
+            final DirectedBinaryDomainCombination ab = new DirectedBinaryDomainCombination( "a", "b" );
+            final DirectedBinaryDomainCombination bb = new DirectedBinaryDomainCombination( "b", "b" );
+            if ( !aa.equals( aa ) ) {
+                return false;
+            }
+            if ( aa.equals( bb ) ) {
+                return false;
+            }
+            if ( ab.equals( ba ) ) {
+                return false;
+            }
+            if ( ba.equals( ab ) ) {
+                return false;
+            }
+            if ( !ab.equals( ab ) ) {
+                return false;
+            }
+            if ( ab.equals( aa ) ) {
+                return false;
+            }
+            if ( ab.equals( bb ) ) {
+                return false;
+            }
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace( System.out );
+            return false;
+        }
+        return true;
+    }
+
+    private static boolean testBinaryStateMatrixToGainLossMatrix( final File test_dir ) {
+        final BinaryStates I = BinaryStates.PRESENT;
+        final BinaryStates O = BinaryStates.ABSENT;
+        try {
+            final CharacterStateMatrix<BinaryStates> binary_states_matrix_0 = new BasicCharacterStateMatrix<BinaryStates>( 7,
+                                                                                                                           6 );
+            binary_states_matrix_0.setIdentifier( 0, "A" );
+            binary_states_matrix_0.setIdentifier( 1, "B" );
+            binary_states_matrix_0.setIdentifier( 2, "C" );
+            binary_states_matrix_0.setIdentifier( 3, "D" );
+            binary_states_matrix_0.setIdentifier( 4, "1" );
+            binary_states_matrix_0.setIdentifier( 5, "2" );
+            binary_states_matrix_0.setIdentifier( 6, "3" );
+            binary_states_matrix_0.setState( 0, 0, O );
+            binary_states_matrix_0.setState( 1, 0, O );
+            binary_states_matrix_0.setState( 2, 0, O );
+            binary_states_matrix_0.setState( 3, 0, O );
+            binary_states_matrix_0.setState( 4, 0, O );
+            binary_states_matrix_0.setState( 5, 0, O );
+            binary_states_matrix_0.setState( 6, 0, O );
+            binary_states_matrix_0.setState( 0, 1, I );
+            binary_states_matrix_0.setState( 1, 1, O );
+            binary_states_matrix_0.setState( 2, 1, O );
+            binary_states_matrix_0.setState( 3, 1, O );
+            binary_states_matrix_0.setState( 4, 1, O );
+            binary_states_matrix_0.setState( 5, 1, O );
+            binary_states_matrix_0.setState( 6, 1, O );
+            binary_states_matrix_0.setState( 0, 2, O );
+            binary_states_matrix_0.setState( 1, 2, O );
+            binary_states_matrix_0.setState( 2, 2, O );
+            binary_states_matrix_0.setState( 3, 2, O );
+            binary_states_matrix_0.setState( 4, 2, I );
+            binary_states_matrix_0.setState( 5, 2, O );
+            binary_states_matrix_0.setState( 6, 2, O );
+            binary_states_matrix_0.setState( 0, 3, I );
+            binary_states_matrix_0.setState( 1, 3, O );
+            binary_states_matrix_0.setState( 2, 3, O );
+            binary_states_matrix_0.setState( 3, 3, O );
+            binary_states_matrix_0.setState( 4, 3, I );
+            binary_states_matrix_0.setState( 5, 3, O );
+            binary_states_matrix_0.setState( 6, 3, I );
+            binary_states_matrix_0.setState( 0, 4, I );
+            binary_states_matrix_0.setState( 1, 4, O );
+            binary_states_matrix_0.setState( 2, 4, I );
+            binary_states_matrix_0.setState( 3, 4, O );
+            binary_states_matrix_0.setState( 4, 4, I );
+            binary_states_matrix_0.setState( 5, 4, O );
+            binary_states_matrix_0.setState( 6, 4, I );
+            binary_states_matrix_0.setState( 0, 5, I );
+            binary_states_matrix_0.setState( 1, 5, I );
+            binary_states_matrix_0.setState( 2, 5, I );
+            binary_states_matrix_0.setState( 3, 5, I );
+            binary_states_matrix_0.setState( 4, 5, I );
+            binary_states_matrix_0.setState( 5, 5, I );
+            binary_states_matrix_0.setState( 6, 5, I );
+            final String[] character_labels_0 = new String[ 6 ];
+            character_labels_0[ 0 ] = "first";
+            character_labels_0[ 1 ] = "second";
+            character_labels_0[ 2 ] = "third";
+            character_labels_0[ 3 ] = "forth";
+            character_labels_0[ 4 ] = "fifth";
+            character_labels_0[ 5 ] = "sixth";
+            final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
+            final Phylogeny phylogeny_0 = factory.create( "(((A,B)1,C)2,D)3", new NHXParser() )[ 0 ];
+            final DomainParsimonyCalculator dom_pars = DomainParsimonyCalculator.createInstance( phylogeny_0 );
+            dom_pars.executeOnGivenBinaryStatesMatrix( binary_states_matrix_0, character_labels_0 );
+            final CharacterStateMatrix<GainLossStates> gl_matrix_0 = dom_pars.getGainLossMatrix();
+            // final StringWriter sw = new StringWriter();
+            //  gl_matrix_0.toWriter( sw );
+            // System.out.println( sw.toString() );
+            if ( dom_pars.getCost() != 13 ) {
+                return false;
+            }
+            if ( dom_pars.getTotalGains() != 5 ) {
+                return false;
+            }
+            if ( dom_pars.getTotalLosses() != 8 ) {
+                return false;
+            }
+            if ( dom_pars.getTotalUnchanged() != 29 ) {
+                return false;
+            }
+            if ( gl_matrix_0.getState( "A", 1 ) != GainLossStates.GAIN ) {
+                return false;
+            }
+            if ( gl_matrix_0.getState( "A", 4 ) != GainLossStates.UNCHANGED_PRESENT ) {
+                return false;
+            }
+            if ( gl_matrix_0.getState( "B", 4 ) != GainLossStates.LOSS ) {
+                return false;
+            }
+            if ( gl_matrix_0.getState( "C", 4 ) != GainLossStates.GAIN ) {
+                return false;
+            }
+            if ( gl_matrix_0.getState( "D", 4 ) != GainLossStates.LOSS ) {
+                return false;
+            }
+            if ( gl_matrix_0.getState( "1", 4 ) != GainLossStates.GAIN ) {
+                return false;
+            }
+            if ( gl_matrix_0.getState( "2", 4 ) != GainLossStates.LOSS ) {
+                return false;
+            }
+            if ( gl_matrix_0.getState( "3", 4 ) != GainLossStates.UNCHANGED_PRESENT ) {
+                return false;
+            }
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace( System.out );
+            return false;
+        }
+        return true;
+    }
+
+    private static boolean testCombinableDomains() {
+        try {
+            final Domain key0 = new BasicDomain( "key0", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final Domain a = new BasicDomain( "a", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final Domain b = new BasicDomain( "b", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final Domain c = new BasicDomain( "c", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final CombinableDomains cd0 = new BasicCombinableDomains( key0.getDomainId(), new BasicSpecies( "eel" ) );
+            cd0.addCombinableDomain( a.getDomainId() );
+            cd0.addCombinableDomain( b.getDomainId() );
+            cd0.addCombinableDomain( b.getDomainId() );
+            cd0.addCombinableDomain( c.getDomainId() );
+            cd0.addCombinableDomain( c.getDomainId() );
+            cd0.addCombinableDomain( c.getDomainId() );
+            if ( cd0.getNumberOfCombinableDomains() != 3 ) {
+                return false;
+            }
+            if ( cd0.getNumberOfProteinsExhibitingCombination( a.getDomainId() ) != 1 ) {
+                return false;
+            }
+            if ( cd0.getNumberOfProteinsExhibitingCombination( b.getDomainId() ) != 2 ) {
+                return false;
+            }
+            if ( cd0.getNumberOfProteinsExhibitingCombination( c.getDomainId() ) != 3 ) {
+                return false;
+            }
+            if ( cd0.getNumberOfProteinsExhibitingCombination( key0.getDomainId() ) != 0 ) {
+                return false;
+            }
+            if ( cd0.getAllDomains().size() != 4 ) {
+                return false;
+            }
+            if ( !cd0.getAllDomains().contains( a.getDomainId() ) ) {
+                return false;
+            }
+            if ( !cd0.getAllDomains().contains( b.getDomainId() ) ) {
+                return false;
+            }
+            if ( !cd0.getAllDomains().contains( c.getDomainId() ) ) {
+                return false;
+            }
+            if ( !cd0.getAllDomains().contains( key0.getDomainId() ) ) {
+                return false;
+            }
+            if ( cd0.toBinaryDomainCombinations().size() != 3 ) {
+                return false;
+            }
+            final BasicBinaryDomainCombination s0 = new BasicBinaryDomainCombination( "key0", "a" );
+            final BasicBinaryDomainCombination s1 = new BasicBinaryDomainCombination( "b", "key0" );
+            final BasicBinaryDomainCombination s2 = new BasicBinaryDomainCombination( "key0", "c" );
+            final BasicBinaryDomainCombination s3 = new BasicBinaryDomainCombination( "key0", "cc" );
+            final BasicBinaryDomainCombination s4 = new BasicBinaryDomainCombination( "c", "key0" );
+            if ( !cd0.toBinaryDomainCombinations().contains( s0 ) ) {
+                return false;
+            }
+            if ( !cd0.toBinaryDomainCombinations().contains( s1 ) ) {
+                return false;
+            }
+            if ( !cd0.toBinaryDomainCombinations().contains( s2 ) ) {
+                return false;
+            }
+            if ( cd0.toBinaryDomainCombinations().contains( s3 ) ) {
+                return false;
+            }
+            if ( !cd0.toBinaryDomainCombinations().contains( s4 ) ) {
+                return false;
+            }
+            final Domain key1 = new BasicDomain( "key1", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final Domain a1 = new BasicDomain( "a1", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final Domain b1 = new BasicDomain( "b1", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final Domain c1 = new BasicDomain( "c1", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final CombinableDomains cd1 = new BasicCombinableDomains( key1.getDomainId(), new BasicSpecies( "eel" ) );
+            cd1.addCombinableDomain( a1.getDomainId() );
+            cd1.addCombinableDomain( b1.getDomainId() );
+            cd1.addCombinableDomain( c1.getDomainId() );
+            cd1.addCombinableDomain( key1.getDomainId() );
+            if ( cd1.getNumberOfCombinableDomains() != 4 ) {
+                return false;
+            }
+            if ( cd1.getNumberOfProteinsExhibitingCombination( a1.getDomainId() ) != 1 ) {
+                return false;
+            }
+            if ( cd1.getNumberOfProteinsExhibitingCombination( b1.getDomainId() ) != 1 ) {
+                return false;
+            }
+            if ( cd1.getNumberOfProteinsExhibitingCombination( c1.getDomainId() ) != 1 ) {
+                return false;
+            }
+            if ( cd1.getNumberOfProteinsExhibitingCombination( key1.getDomainId() ) != 1 ) {
+                return false;
+            }
+            if ( cd1.getAllDomains().size() != 4 ) {
+                return false;
+            }
+            if ( cd1.toBinaryDomainCombinations().size() != 4 ) {
+                return false;
+            }
+            final BasicBinaryDomainCombination kk = new BasicBinaryDomainCombination( "key1", "key1" );
+            if ( !cd1.toBinaryDomainCombinations().contains( kk ) ) {
+                return false;
+            }
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace( System.out );
+            return false;
+        }
+        return true;
+    }
+
+    private static boolean testCombinationsBasedPairwiseSimilarityCalculator() {
+        try {
+            final Domain a = new BasicDomain( "A", 1, 25, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final Domain b = new BasicDomain( "B", 2, 25, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final Domain c = new BasicDomain( "C", 3, 25, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final Domain one_key = new BasicDomain( "bcl2", 4, 25, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final Domain two_key = new BasicDomain( "bcl2", 5, 25, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final CombinableDomains one = new BasicCombinableDomains( one_key.getDomainId(), new BasicSpecies( "mouse" ) );
+            final CombinableDomains two = new BasicCombinableDomains( two_key.getDomainId(),
+                                                                      new BasicSpecies( "rabbit" ) );
+            one.addCombinableDomain( a.getDomainId() );
+            one.addCombinableDomain( a.getDomainId() );
+            two.addCombinableDomain( new BasicDomain( "A", 1, 5, ( short ) 1, ( short ) 4, 0.1, -12 ).getDomainId() );
+            two.addCombinableDomain( b.getDomainId() );
+            two.addCombinableDomain( c.getDomainId() );
+            final PairwiseDomainSimilarityCalculator calc = new CombinationsBasedPairwiseDomainSimilarityCalculator();
+            final PairwiseDomainSimilarity s1 = calc.calculateSimilarity( one, two );
+            if ( !TestSurfacing.isEqual( s1.getSimilarityScore(), 1.0 / ( 1 + 2 ) ) ) {
+                return false;
+            }
+            if ( s1.getDifferenceInCounts() != ( 1 - 3 ) ) {
+                return false;
+            }
+            if ( ( ( CombinationsBasedPairwiseDomainSimilarity ) s1 ).getNumberOfDifferentDomains() != 2 ) {
+                return false;
+            }
+            one.addCombinableDomain( b.getDomainId() );
+            one.addCombinableDomain( c.getDomainId() );
+            final PairwiseDomainSimilarity s2 = calc.calculateSimilarity( one, two );
+            if ( !TestSurfacing.isEqual( s2.getSimilarityScore(), 3.0 / ( 0 + 3 ) ) ) {
+                return false;
+            }
+            if ( s2.getDifferenceInCounts() != 0 ) {
+                return false;
+            }
+            if ( ( ( CombinationsBasedPairwiseDomainSimilarity ) s2 ).getNumberOfDifferentDomains() != 0 ) {
+                return false;
+            }
+            final Domain d = new BasicDomain( "D", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final Domain e = new BasicDomain( "E", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final Domain f = new BasicDomain( "F", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 );
+            one.addCombinableDomain( d.getDomainId() );
+            one.addCombinableDomain( d.getDomainId() );
+            one.addCombinableDomain( e.getDomainId() );
+            one.addCombinableDomain( f.getDomainId() );
+            final PairwiseDomainSimilarity s3 = calc.calculateSimilarity( one, two );
+            if ( !TestSurfacing.isEqual( s3.getSimilarityScore(), 3.0 / ( 3 + 3 ) ) ) {
+                return false;
+            }
+            if ( s3.getDifferenceInCounts() != ( 6 - 3 ) ) {
+                return false;
+            }
+            if ( ( ( CombinationsBasedPairwiseDomainSimilarity ) s3 ).getNumberOfDifferentDomains() != 3 ) {
+                return false;
+            }
+            final Domain aaa = new BasicDomain( "aaa", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final Domain bbb = new BasicDomain( "bbb", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final Domain three_key = new BasicDomain( "bcl2", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final Domain four_key = new BasicDomain( "bcl2", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final CombinableDomains three = new BasicCombinableDomains( three_key.getDomainId(),
+                                                                        new BasicSpecies( "mouse" ) );
+            final CombinableDomains four = new BasicCombinableDomains( four_key.getDomainId(),
+                                                                       new BasicSpecies( "rabbit" ) );
+            three.addCombinableDomain( aaa.getDomainId() );
+            four.addCombinableDomain( bbb.getDomainId() );
+            final PairwiseDomainSimilarityCalculator calc2 = new CombinationsBasedPairwiseDomainSimilarityCalculator();
+            final PairwiseDomainSimilarity s4 = calc2.calculateSimilarity( three, four );
+            if ( !TestSurfacing.isEqual( s4.getSimilarityScore(), 0.0 / ( 0 + 2 ) ) ) {
+                return false;
+            }
+            final Domain aaa2 = new BasicDomain( "aaa", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 );
+            four.addCombinableDomain( aaa2.getDomainId() );
+            final PairwiseDomainSimilarity s5 = calc.calculateSimilarity( three, four );
+            if ( !TestSurfacing.isEqual( s5.getSimilarityScore(), 1.0 / ( 1 + 1 ) ) ) {
+                return false;
+            }
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace( System.out );
+            return false;
+        }
+        return true;
+    }
+
+    private static boolean testCopyNumberBasedPairwiseSimilarityCalculator() {
+        try {
+            final Domain one_key = new BasicDomain( "bcl2", 4, 25, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final Domain two_key = new BasicDomain( "bcl2", 5, 25, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final CombinableDomains one = new BasicCombinableDomains( one_key.getDomainId(), new BasicSpecies( "mouse" ) );
+            final CombinableDomains two = new BasicCombinableDomains( two_key.getDomainId(),
+                                                                      new BasicSpecies( "rabbit" ) );
+            one.setKeyDomainCount( 2 );
+            two.setKeyDomainCount( 3 );
+            final PairwiseDomainSimilarityCalculator calc = new DomainCountsBasedPairwiseSimilarityCalculator();
+            PairwiseDomainSimilarity s1 = calc.calculateSimilarity( one, two );
+            if ( !TestSurfacing.isEqual( s1.getSimilarityScore(), 1.0 - ( 3 - 2.0 ) / ( 2 + 3 ) ) ) {
+                return false;
+            }
+            if ( s1.getDifferenceInCounts() != ( 2 - 3 ) ) {
+                return false;
+            }
+            one.setKeyDomainCount( 1 );
+            two.setKeyDomainCount( 1 );
+            s1 = calc.calculateSimilarity( one, two );
+            if ( !TestSurfacing.isEqual( s1.getSimilarityScore(), 1.0 ) ) {
+                return false;
+            }
+            if ( s1.getDifferenceInCounts() != ( 1 - 1 ) ) {
+                return false;
+            }
+            one.setKeyDomainCount( 1 );
+            two.setKeyDomainCount( 1000 );
+            s1 = calc.calculateSimilarity( one, two );
+            if ( !TestSurfacing.isEqual( s1.getSimilarityScore(), 1.0 - 999.0 / 1001 ) ) {
+                return false;
+            }
+            if ( s1.getDifferenceInCounts() != ( 1 - 1000 ) ) {
+                return false;
+            }
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace( System.out );
+            return false;
+        }
+        return true;
+    }
+
+    private static boolean testDirectedCombinableDomains() {
+        try {
+            final Domain key0 = new BasicDomain( "key0", 10, 20, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final Domain a = new BasicDomain( "a", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final Domain b = new BasicDomain( "b", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final Domain c = new BasicDomain( "c", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final CombinableDomains cd0 = new DirectedCombinableDomains( key0.getDomainId(), new BasicSpecies( "eel" ) );
+            cd0.addCombinableDomain( a.getDomainId() );
+            cd0.addCombinableDomain( b.getDomainId() );
+            cd0.addCombinableDomain( b.getDomainId() );
+            cd0.addCombinableDomain( c.getDomainId() );
+            cd0.addCombinableDomain( c.getDomainId() );
+            cd0.addCombinableDomain( c.getDomainId() );
+            if ( cd0.getNumberOfCombinableDomains() != 3 ) {
+                return false;
+            }
+            if ( cd0.getNumberOfProteinsExhibitingCombination( a.getDomainId() ) != 1 ) {
+                return false;
+            }
+            if ( cd0.getNumberOfProteinsExhibitingCombination( b.getDomainId() ) != 2 ) {
+                return false;
+            }
+            if ( cd0.getNumberOfProteinsExhibitingCombination( c.getDomainId() ) != 3 ) {
+                return false;
+            }
+            if ( cd0.getNumberOfProteinsExhibitingCombination( key0.getDomainId() ) != 0 ) {
+                return false;
+            }
+            if ( cd0.getAllDomains().size() != 4 ) {
+                return false;
+            }
+            if ( !cd0.getAllDomains().contains( a.getDomainId() ) ) {
+                return false;
+            }
+            if ( !cd0.getAllDomains().contains( b.getDomainId() ) ) {
+                return false;
+            }
+            if ( !cd0.getAllDomains().contains( c.getDomainId() ) ) {
+                return false;
+            }
+            if ( !cd0.getAllDomains().contains( key0.getDomainId() ) ) {
+                return false;
+            }
+            if ( cd0.toBinaryDomainCombinations().size() != 3 ) {
+                return false;
+            }
+            final BinaryDomainCombination s0 = new DirectedBinaryDomainCombination( "key0", "a" );
+            final BinaryDomainCombination s1 = new DirectedBinaryDomainCombination( "b", "key0" );
+            final BinaryDomainCombination s2 = new DirectedBinaryDomainCombination( "key0", "c" );
+            final BinaryDomainCombination s3 = new DirectedBinaryDomainCombination( "key0", "cc" );
+            final BinaryDomainCombination s4 = new DirectedBinaryDomainCombination( "a", "b" );
+            final BinaryDomainCombination s5 = new DirectedBinaryDomainCombination( "b", "a" );
+            final BinaryDomainCombination s6 = new DirectedBinaryDomainCombination( "key0", "b" );
+            final BinaryDomainCombination s7 = new DirectedBinaryDomainCombination( "a", "key0" );
+            final BinaryDomainCombination s8 = new DirectedBinaryDomainCombination( "c", "key0" );
+            if ( !cd0.toBinaryDomainCombinations().contains( s0 ) ) {
+                return false;
+            }
+            if ( cd0.toBinaryDomainCombinations().contains( s1 ) ) {
+                return false;
+            }
+            if ( !cd0.toBinaryDomainCombinations().contains( s2 ) ) {
+                return false;
+            }
+            if ( cd0.toBinaryDomainCombinations().contains( s3 ) ) {
+                return false;
+            }
+            if ( cd0.toBinaryDomainCombinations().contains( s4 ) ) {
+                return false;
+            }
+            if ( cd0.toBinaryDomainCombinations().contains( s5 ) ) {
+                return false;
+            }
+            if ( !cd0.toBinaryDomainCombinations().contains( s6 ) ) {
+                return false;
+            }
+            if ( cd0.toBinaryDomainCombinations().contains( s7 ) ) {
+                return false;
+            }
+            if ( cd0.toBinaryDomainCombinations().contains( s8 ) ) {
+                return false;
+            }
+            final Domain key1 = new BasicDomain( "key1", 1, 2, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final Domain a1 = new BasicDomain( "a1", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final Domain b1 = new BasicDomain( "b1", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final Domain c1 = new BasicDomain( "c1", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final CombinableDomains cd1 = new DirectedCombinableDomains( key1.getDomainId(), new BasicSpecies( "eel" ) );
+            cd1.addCombinableDomain( a1.getDomainId() );
+            cd1.addCombinableDomain( b1.getDomainId() );
+            cd1.addCombinableDomain( c1.getDomainId() );
+            cd1.addCombinableDomain( key1.getDomainId() );
+            if ( cd1.getNumberOfCombinableDomains() != 4 ) {
+                return false;
+            }
+            if ( cd1.getNumberOfProteinsExhibitingCombination( a1.getDomainId() ) != 1 ) {
+                return false;
+            }
+            if ( cd1.getNumberOfProteinsExhibitingCombination( b1.getDomainId() ) != 1 ) {
+                return false;
+            }
+            if ( cd1.getNumberOfProteinsExhibitingCombination( c1.getDomainId() ) != 1 ) {
+                return false;
+            }
+            if ( cd1.getNumberOfProteinsExhibitingCombination( key1.getDomainId() ) != 1 ) {
+                return false;
+            }
+            if ( cd1.getAllDomains().size() != 4 ) {
+                return false;
+            }
+            if ( cd1.toBinaryDomainCombinations().size() != 4 ) {
+                return false;
+            }
+            final BinaryDomainCombination kk = new DirectedBinaryDomainCombination( "key1", "key1" );
+            if ( !cd1.toBinaryDomainCombinations().contains( kk ) ) {
+                return false;
+            }
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace( System.out );
+            return false;
+        }
+        return true;
+    }
+
+    private static boolean testDirectedness() {
+        try {
+            final BinaryStates X = BinaryStates.PRESENT;
+            final BinaryStates O = BinaryStates.ABSENT;
+            final GainLossStates G = GainLossStates.GAIN;
+            final GainLossStates L = GainLossStates.LOSS;
+            final GainLossStates A = GainLossStates.UNCHANGED_ABSENT;
+            final GainLossStates P = GainLossStates.UNCHANGED_PRESENT;
+            final Protein one_1 = new BasicProtein( "one", "1" );
+            final Protein two_1 = new BasicProtein( "two", "1" );
+            final Protein three_1 = new BasicProtein( "three", "1" );
+            final Protein four_1 = new BasicProtein( "four", "1" );
+            final Protein five_1 = new BasicProtein( "five", "1" );
+            one_1.addProteinDomain( new BasicDomain( "B", 12, 14, ( short ) 1, ( short ) 4, 0.1, -12 ) );
+            one_1.addProteinDomain( new BasicDomain( "C", 13, 14, ( short ) 1, ( short ) 4, 0.1, -12 ) );
+            one_1.addProteinDomain( new BasicDomain( "A", 11, 12, ( short ) 1, ( short ) 4, 0.1, -12 ) );
+            one_1.addProteinDomain( new BasicDomain( "X", 100, 110, ( short ) 1, ( short ) 4, 0.1, -12 ) );
+            one_1.addProteinDomain( new BasicDomain( "Y", 200, 210, ( short ) 1, ( short ) 4, 0.1, -12 ) );
+            two_1.addProteinDomain( new BasicDomain( "A", 10, 20, ( short ) 1, ( short ) 4, 0.1, -12 ) );
+            two_1.addProteinDomain( new BasicDomain( "B", 30, 40, ( short ) 1, ( short ) 4, 0.1, -12 ) );
+            two_1.addProteinDomain( new BasicDomain( "Y", 1, 2, ( short ) 1, ( short ) 4, 0.1, -12 ) );
+            two_1.addProteinDomain( new BasicDomain( "X", 10, 11, ( short ) 1, ( short ) 4, 0.1, -12 ) );
+            three_1.addProteinDomain( new BasicDomain( "P", 10, 11, ( short ) 1, ( short ) 4, 0.1, -12 ) );
+            three_1.addProteinDomain( new BasicDomain( "M", 1, 2, ( short ) 1, ( short ) 4, 0.1, -12 ) );
+            three_1.addProteinDomain( new BasicDomain( "M", 5, 6, ( short ) 1, ( short ) 4, 0.1, -12 ) );
+            three_1.addProteinDomain( new BasicDomain( "N", 7, 8, ( short ) 1, ( short ) 4, 0.1, -12 ) );
+            three_1.addProteinDomain( new BasicDomain( "N", 3, 4, ( short ) 1, ( short ) 4, 0.1, -12 ) );
+            four_1.addProteinDomain( new BasicDomain( "XX", 10, 20, ( short ) 1, ( short ) 4, 0.1, -12 ) );
+            five_1.addProteinDomain( new BasicDomain( "YY", 30, 40, ( short ) 1, ( short ) 4, 0.1, -12 ) );
+            final List<Protein> list_1 = new ArrayList<Protein>();
+            list_1.add( one_1 );
+            list_1.add( two_1 );
+            list_1.add( three_1 );
+            list_1.add( four_1 );
+            list_1.add( five_1 );
+            final GenomeWideCombinableDomains gwcd_1 = BasicGenomeWideCombinableDomains
+                    .createInstance( list_1, false, new BasicSpecies( "1" ), DomainCombinationType.DIRECTED );
+            if ( !gwcd_1.toBinaryDomainCombinations().contains( new DirectedBinaryDomainCombination( "A", "B" ) ) ) {
+                return false;
+            }
+            if ( gwcd_1.toBinaryDomainCombinations().contains( new DirectedBinaryDomainCombination( "B", "A" ) ) ) {
+                return false;
+            }
+            if ( gwcd_1.toBinaryDomainCombinations().contains( new DirectedBinaryDomainCombination( "A", "A" ) ) ) {
+                return false;
+            }
+            if ( !gwcd_1.toBinaryDomainCombinations().contains( new DirectedBinaryDomainCombination( "A", "C" ) ) ) {
+                return false;
+            }
+            if ( gwcd_1.toBinaryDomainCombinations().contains( new DirectedBinaryDomainCombination( "C", "A" ) ) ) {
+                return false;
+            }
+            if ( !gwcd_1.toBinaryDomainCombinations().contains( new DirectedBinaryDomainCombination( "B", "C" ) ) ) {
+                return false;
+            }
+            if ( !gwcd_1.toBinaryDomainCombinations().contains( new DirectedBinaryDomainCombination( "C", "X" ) ) ) {
+                return false;
+            }
+            if ( !gwcd_1.toBinaryDomainCombinations().contains( new DirectedBinaryDomainCombination( "C", "Y" ) ) ) {
+                return false;
+            }
+            if ( !gwcd_1.toBinaryDomainCombinations().contains( new DirectedBinaryDomainCombination( "A", "X" ) ) ) {
+                return false;
+            }
+            if ( !gwcd_1.toBinaryDomainCombinations().contains( new DirectedBinaryDomainCombination( "A", "Y" ) ) ) {
+                return false;
+            }
+            if ( !gwcd_1.toBinaryDomainCombinations().contains( new DirectedBinaryDomainCombination( "Y", "A" ) ) ) {
+                return false;
+            }
+            if ( gwcd_1.toBinaryDomainCombinations().contains( new DirectedBinaryDomainCombination( "X", "A" ) ) ) {
+                return false;
+            }
+            if ( gwcd_1.toBinaryDomainCombinations().contains( new DirectedBinaryDomainCombination( "C", "B" ) ) ) {
+                return false;
+            }
+            if ( !gwcd_1.toBinaryDomainCombinations().contains( new DirectedBinaryDomainCombination( "X", "Y" ) ) ) {
+                return false;
+            }
+            if ( !gwcd_1.toBinaryDomainCombinations().contains( new DirectedBinaryDomainCombination( "Y", "X" ) ) ) {
+                return false;
+            }
+            if ( !gwcd_1.toBinaryDomainCombinations().contains( new DirectedBinaryDomainCombination( "A", "Y" ) ) ) {
+                return false;
+            }
+            if ( !gwcd_1.toBinaryDomainCombinations().contains( new DirectedBinaryDomainCombination( "A", "X" ) ) ) {
+                return false;
+            }
+            if ( gwcd_1.toBinaryDomainCombinations().contains( new DirectedBinaryDomainCombination( "Y", "C" ) ) ) {
+                return false;
+            }
+            if ( !gwcd_1.toBinaryDomainCombinations().contains( new DirectedBinaryDomainCombination( "M", "N" ) ) ) {
+                return false;
+            }
+            if ( !gwcd_1.toBinaryDomainCombinations().contains( new DirectedBinaryDomainCombination( "N", "M" ) ) ) {
+                return false;
+            }
+            if ( !gwcd_1.toBinaryDomainCombinations().contains( new DirectedBinaryDomainCombination( "N", "P" ) ) ) {
+                return false;
+            }
+            if ( !gwcd_1.toBinaryDomainCombinations().contains( new DirectedBinaryDomainCombination( "M", "P" ) ) ) {
+                return false;
+            }
+            if ( gwcd_1.toBinaryDomainCombinations().contains( new DirectedBinaryDomainCombination( "P", "N" ) ) ) {
+                return false;
+            }
+            if ( gwcd_1.toBinaryDomainCombinations().contains( new DirectedBinaryDomainCombination( "P", "M" ) ) ) {
+                return false;
+            }
+            if ( gwcd_1.toBinaryDomainCombinations().contains( new DirectedBinaryDomainCombination( "XX", "YY" ) ) ) {
+                return false;
+            }
+            if ( gwcd_1.toBinaryDomainCombinations().contains( new DirectedBinaryDomainCombination( "YY", "XX" ) ) ) {
+                return false;
+            }
+            if ( gwcd_1.toBinaryDomainCombinations().contains( new DirectedBinaryDomainCombination( "B", "B" ) ) ) {
+                return false;
+            }
+            //            final List<GenomeWideCombinableDomains> gwcd_list = new ArrayList<GenomeWideCombinableDomains>();
+            //            gwcd_list.add( gwcd_1 );
+            //            gwcd_list.add( gwcd_2 );
+            //            final CharacterStateMatrix<BinaryStates> matrix_d = DomainParsimonyCalculator
+            //                    .createMatrixOfDomainPresenceOrAbsence( gwcd_list );
+            //            final CharacterStateMatrix<BinaryStates> matrix_bc = DomainParsimonyCalculator
+            //                    .createMatrixOfBinaryDomainCombinationPresenceOrAbsence( gwcd_list );
+            //            if ( matrix_d.getState( 0, 0 ) != X ) {
+            //                return false;
+            //            }
+            //            if ( matrix_bc.getState( 0, 0 ) != X ) {
+            //                return false;
+            //            }
+            //        
+            //
+            //            final BasicCharacterStateMatrix<BinaryStates> dm = new BasicCharacterStateMatrix<BinaryStates>( new BinaryStates[][] {
+            //                    { X, X, X, X, X, X }, { X, X, X, X, X, X } } );
+            //            if ( !matrix_d.equals( dm ) ) {
+            //                return false;
+            //            }
+            //            final BasicCharacterStateMatrix<BinaryStates> bcm = new BasicCharacterStateMatrix<BinaryStates>( new BinaryStates[][] {
+            //                    { X, O, X, X, X, X, O, X, X, O, X, X }, { X, X, X, O, O, O, O, X, O, O, X, X } } );
+            //            if ( !matrix_d.equals( dm ) ) {
+            //                return false;
+            //            }
+            //``````````````````````````
+            //            final List<GenomeWideCombinableDomains> gwcd_list = new ArrayList<GenomeWideCombinableDomains>();
+            //            gwcd_list.add( one );
+            //            gwcd_list.add( two );
+            //            gwcd_list.add( three );
+            //            gwcd_list.add( four );
+            //            final CharacterStateMatrix<BinaryStates> matrix_d = DomainParsimony
+            //                    .createMatrixOfDomainPresenceOrAbsence( gwcd_list );
+            //            final CharacterStateMatrix<BinaryStates> matrix_bc = DomainParsimony
+            //                    .createMatrixOfBinaryDomainCombinationPresenceOrAbsence( gwcd_list );
+            //            //         System.out.println( "d:"  );
+            //            //         System.out.println(matrix_d.toStringBuffer().toString()  );
+            //            //         System.out.println( "bc:"  );
+            //            //        System.out.println(matrix_bc.toStringBuffer().toString()  );
+            //            // 1 a b c e f g h l m
+            //            // 2 a b c e f g i n o
+            //            // 3 a b d e f g j p q
+            //            // 4 a b d p r
+            //            if ( matrix_d.getState( 0, 0 ) != X ) {
+            //                return false;
+            //            }
+            //            if ( matrix_d.getState( 0, 1 ) != X ) {
+            //                return false;
+            //            }
+            //            if ( matrix_d.getState( 0, 2 ) != X ) {
+            //                return false;
+            //            }
+            //            if ( matrix_d.getState( 0, 3 ) != O ) {
+            //                return false;
+            //            }
+            //            if ( matrix_d.getState( 0, 4 ) != X ) {
+            //                return false;
+            //            }
+            //            if ( matrix_d.getState( 0, 5 ) != X ) {
+            //                return false;
+            //            }
+            //            if ( matrix_d.getState( 0, 6 ) != X ) {
+            //                return false;
+            //            }
+            //            if ( matrix_d.getState( 0, 7 ) != X ) {
+            //                return false;
+            //            }
+            //            if ( matrix_d.getState( 0, 8 ) != O ) {
+            //                return false;
+            //            }
+            //            // 1 a-a a-b a-c e-f e-g e-h f-g f-h g-h l-m
+            //            // 2 a-b a-c e-f e-g e-i f-g f-i g-i n-o
+            //            // 3 a-b a-d e-f e-g e-j f-g f-j g-j p-q
+            //            // 4 a-b a-d p-r
+            //            if ( matrix_bc.getState( 0, 0 ) != X ) {
+            //                return false;
+            //            }
+            //            if ( matrix_bc.getState( 0, 1 ) != X ) {
+            //                return false;
+            //            }
+            //            if ( matrix_bc.getState( 0, 2 ) != X ) {
+            //                return false;
+            //            }
+            //            if ( matrix_bc.getState( 0, 3 ) != O ) {
+            //                return false;
+            //            }
+            //            if ( matrix_bc.getState( 0, 4 ) != X ) {
+            //                return false;
+            //            }
+            //            if ( matrix_bc.getState( 1, 0 ) != O ) {
+            //                return false;
+            //            }
+            //            if ( matrix_bc.getState( 1, 1 ) != X ) {
+            //                return false;
+            //            }
+            //            if ( matrix_bc.getState( 1, 2 ) != X ) {
+            //                return false;
+            //            }
+            //            if ( matrix_bc.getState( 1, 3 ) != O ) {
+            //                return false;
+            //            }
+            //            if ( matrix_bc.getState( 1, 4 ) != X ) {
+            //                return false;
+            //            }
+            //            if ( matrix_bc.getState( 2, 0 ) != O ) {
+            //                return false;
+            //            }
+            //            if ( matrix_bc.getState( 2, 1 ) != X ) {
+            //                return false;
+            //            }
+            //            if ( matrix_bc.getState( 2, 2 ) != O ) {
+            //                return false;
+            //            }
+            //            if ( matrix_bc.getState( 2, 3 ) != X ) {
+            //                return false;
+            //            }
+            //            if ( matrix_bc.getState( 2, 4 ) != X ) {
+            //                return false;
+            //            }
+            //            final PhylogenyFactory factory0 = ParserBasedPhylogenyFactory.getInstance();
+            //            final String p0_str = "((one,two)1-2,(three,four)3-4)root";
+            //            final Phylogeny p0 = factory0.create( p0_str, new NHXParser() )[ 0 ];
+            //            final DomainParsimony dp0 = DomainParsimony.createInstance( p0, gwcd_list );
+            //            dp0.executeDolloParsimonyOnDomainPresence();
+            //            final CharacterStateMatrix<GainLossStates> gl_matrix_d = dp0.getGainLossMatrix();
+            //            final CharacterStateMatrix<BinaryStates> is_matrix_d = dp0.getInternalStatesMatrix();
+            //            dp0.executeDolloParsimonyOnBinaryDomainCombintionPresence();
+            //            final CharacterStateMatrix<GainLossStates> gl_matrix_bc = dp0.getGainLossMatrix();
+            //            final CharacterStateMatrix<BinaryStates> is_matrix_bc = dp0.getInternalStatesMatrix();
+            //            if ( is_matrix_d.getState( "root", "A" ) != X ) {
+            //                return false;
+            //            }
+            //            if ( is_matrix_d.getState( "root", "B" ) != X ) {
+            //                return false;
+            //            }
+            //            if ( is_matrix_d.getState( "root", "C" ) != O ) {
+            //                return false;
+            //            }
+            //            if ( is_matrix_d.getState( "root", "D" ) != O ) {
+            //                return false;
+            //            }
+            //            if ( is_matrix_d.getState( "root", "E" ) != X ) {
+            //                return false;
+            //            }
+            //            if ( is_matrix_bc.getState( "root", "A=A" ) != O ) {
+            //                return false;
+            //            }
+            //            if ( is_matrix_bc.getState( "root", "A=B" ) != X ) {
+            //                return false;
+            //            }
+            //            if ( is_matrix_bc.getState( "root", "A=C" ) != O ) {
+            //                return false;
+            //            }
+            //            if ( is_matrix_bc.getState( "root", "A=D" ) != O ) {
+            //                return false;
+            //            }
+            //            if ( is_matrix_bc.getState( "root", "G=H" ) != O ) {
+            //                return false;
+            //            }
+            //            if ( is_matrix_bc.getState( "1-2", "G=H" ) != O ) {
+            //                return false;
+            //            }
+            //            if ( is_matrix_bc.getState( "root", "E=F" ) != X ) {
+            //                return false;
+            //            }
+            //            if ( gl_matrix_bc.getState( "root", "E=F" ) != P ) {
+            //                return false;
+            //            }
+            //            if ( gl_matrix_bc.getState( "root", "A=A" ) != A ) {
+            //                return false;
+            //            }
+            //            if ( gl_matrix_bc.getState( "one", "A=A" ) != G ) {
+            //                return false;
+            //            }
+            //            if ( gl_matrix_bc.getState( "root", "A=B" ) != P ) {
+            //                return false;
+            //            }
+            //            if ( gl_matrix_bc.getState( "3-4", "A=D" ) != G ) {
+            //                return false;
+            //            }
+            //            if ( gl_matrix_bc.getState( "four", "E=F" ) != L ) {
+            //                return false;
+            //            }
+            //            if ( gl_matrix_d.getState( "3-4", "P" ) != G ) {
+            //                return false;
+            //            }
+            //            final Protein ab_1 = new BasicProtein( "ab", "one" );
+            //            ab_1.addProteinDomain( a );
+            //            ab_1.addProteinDomain( b );
+            //            final Protein ac_1 = new BasicProtein( "ac", "one" );
+            //            ac_1.addProteinDomain( a );
+            //            ac_1.addProteinDomain( c );
+            //            final Protein de_1 = new BasicProtein( "de", "one" );
+            //            de_1.addProteinDomain( d );
+            //            de_1.addProteinDomain( e );
+            //            final Protein ac_2 = new BasicProtein( "ac", "two" );
+            //            ac_2.addProteinDomain( a );
+            //            ac_2.addProteinDomain( c );
+            //            final Protein ab_3 = new BasicProtein( "ab", "three" );
+            //            ab_3.addProteinDomain( a );
+            //            ab_3.addProteinDomain( b );
+            //            final Protein de_4 = new BasicProtein( "de", "four" );
+            //            de_4.addProteinDomain( d );
+            //            de_4.addProteinDomain( e );
+            //            final Protein ab_6 = new BasicProtein( "ab", "six" );
+            //            ab_6.addProteinDomain( a );
+            //            ab_6.addProteinDomain( b );
+            //            final List<Protein> spec_one = new ArrayList<Protein>();
+            //            final List<Protein> spec_two = new ArrayList<Protein>();
+            //            final List<Protein> spec_three = new ArrayList<Protein>();
+            //            final List<Protein> spec_four = new ArrayList<Protein>();
+            //            final List<Protein> spec_five = new ArrayList<Protein>();
+            //            final List<Protein> spec_six = new ArrayList<Protein>();
+            //            final List<Protein> spec_seven = new ArrayList<Protein>();
+            //            spec_one.add( ab_1 );
+            //            spec_one.add( ac_1 );
+            //            spec_one.add( de_1 );
+            //            spec_two.add( ac_2 );
+            //            spec_three.add( ab_3 );
+            //            spec_four.add( de_4 );
+            //            spec_six.add( ab_6 );
+            //            final GenomeWideCombinableDomains one_gwcd = BasicGenomeWideCombinableDomains
+            //                    .createInstance( spec_one, false, new BasicSpecies( "one" ), false );
+            //            final GenomeWideCombinableDomains two_gwcd = BasicGenomeWideCombinableDomains
+            //                    .createInstance( spec_two, false, new BasicSpecies( "two" ), false );
+            //            final GenomeWideCombinableDomains three_gwcd = BasicGenomeWideCombinableDomains
+            //                    .createInstance( spec_three, false, new BasicSpecies( "three" ), false );
+            //            final GenomeWideCombinableDomains four_gwcd = BasicGenomeWideCombinableDomains
+            //                    .createInstance( spec_four, false, new BasicSpecies( "four" ), false );
+            //            final GenomeWideCombinableDomains five_gwcd = BasicGenomeWideCombinableDomains
+            //                    .createInstance( spec_five, false, new BasicSpecies( "five" ), false );
+            //            final GenomeWideCombinableDomains six_gwcd = BasicGenomeWideCombinableDomains
+            //                    .createInstance( spec_six, false, new BasicSpecies( "six" ), false );
+            //            final GenomeWideCombinableDomains seven_gwcd = BasicGenomeWideCombinableDomains
+            //                    .createInstance( spec_seven, false, new BasicSpecies( "seven" ), false
+            //                                    );
+            //            final List<GenomeWideCombinableDomains> gwcd_list1 = new ArrayList<GenomeWideCombinableDomains>();
+            //            gwcd_list1.add( one_gwcd );
+            //            gwcd_list1.add( two_gwcd );
+            //            gwcd_list1.add( three_gwcd );
+            //            gwcd_list1.add( four_gwcd );
+            //            gwcd_list1.add( five_gwcd );
+            //            gwcd_list1.add( six_gwcd );
+            //            gwcd_list1.add( seven_gwcd );
+            //            final PhylogenyFactory factory1 = ParserBasedPhylogenyFactory.getInstance();
+            //            final String p1_str = "(((((one,two)12,three)123,(four,five)45)12345,six)123456,seven)root";
+            //            final Phylogeny p1 = factory1.create( p1_str, new NHXParser() )[ 0 ];
+            //            final DomainParsimony dp1 = DomainParsimony.createInstance( p1, gwcd_list1 );
+            //            dp1.executeDolloParsimonyOnDomainPresence();
+            //            final CharacterStateMatrix<GainLossStates> gl_dollo_d = dp1.getGainLossMatrix();
+            //            final CharacterStateMatrix<BinaryStates> i_dollo_d = dp1.getInternalStatesMatrix();
+            //            if ( dp1.getCost() != 14 ) {
+            //                return false;
+            //            }
+            //            if ( dp1.getTotalGains() != 5 ) {
+            //                return false;
+            //            }
+            //            if ( dp1.getTotalLosses() != 9 ) {
+            //                return false;
+            //            }
+            //            if ( dp1.getTotalUnchanged() != 51 ) {
+            //                return false;
+            //            }
+            //            if ( dp1.getNetGainsOnNode( "45" ) != -2 ) {
+            //                return false;
+            //            }
+            //            if ( dp1.getSumOfGainsOnNode( "45" ) != 0 ) {
+            //                return false;
+            //            }
+            //            if ( dp1.getSumOfLossesOnNode( "45" ) != 2 ) {
+            //                return false;
+            //            }
+            //            if ( dp1.getSumOfUnchangedOnNode( "45" ) != 3 ) {
+            //                return false;
+            //            }
+            //            if ( dp1.getSumOfUnchangedPresentOnNode( "45" ) != 2 ) {
+            //                return false;
+            //            }
+            //            if ( dp1.getSumOfUnchangedAbsentOnNode( "45" ) != 1 ) {
+            //                return false;
+            //            }
+            //            if ( dp1.getUnitsGainedOnNode( "45" ).contains( "A" ) ) {
+            //                return false;
+            //            }
+            //            if ( !dp1.getUnitsLostOnNode( "45" ).contains( "A" ) ) {
+            //                return false;
+            //            }
+            //            if ( !dp1.getUnitsLostOnNode( "45" ).contains( "B" ) ) {
+            //                return false;
+            //            }
+            //            if ( !dp1.getUnitsGainedOnNode( "12345" ).contains( "D" ) ) {
+            //                return false;
+            //            }
+            //            if ( !dp1.getUnitsOnNode( "12" ).contains( "A" ) ) {
+            //                return false;
+            //            }
+            //            if ( !dp1.getUnitsOnNode( "12" ).contains( "B" ) ) {
+            //                return false;
+            //            }
+            //            if ( !dp1.getUnitsOnNode( "12" ).contains( "C" ) ) {
+            //                return false;
+            //            }
+            //            if ( !dp1.getUnitsOnNode( "12" ).contains( "D" ) ) {
+            //                return false;
+            //            }
+            //            if ( !dp1.getUnitsOnNode( "12" ).contains( "E" ) ) {
+            //                return false;
+            //            }
+            //            if ( dp1.getNetGainsOnNode( "123456" ) != 2 ) {
+            //                return false;
+            //            }
+            //            if ( dp1.getSumOfGainsOnNode( "123456" ) != 2 ) {
+            //                return false;
+            //            }
+            //            dp1.executeDolloParsimonyOnBinaryDomainCombintionPresence();
+            //            final CharacterStateMatrix<GainLossStates> gl_dollo_bc = dp1.getGainLossMatrix();
+            //            final CharacterStateMatrix<BinaryStates> i_dollo_bc = dp1.getInternalStatesMatrix();
+            //            if ( dp1.getCost() != 8 ) {
+            //                return false;
+            //            }
+            //            if ( dp1.getTotalGains() != 3 ) {
+            //                return false;
+            //            }
+            //            if ( dp1.getTotalLosses() != 5 ) {
+            //                return false;
+            //            }
+            //            if ( dp1.getTotalUnchanged() != 31 ) {
+            //                return false;
+            //            }
+            //            if ( !dp1.getUnitsLostOnNode( "45" ).contains( "A=B" ) ) {
+            //                return false;
+            //            }
+            //            if ( !dp1.getUnitsGainedOnNode( "12345" ).contains( "D=E" ) ) {
+            //                return false;
+            //            }
+            //            dp1.executeFitchParsimonyOnDomainPresence();
+            //            final CharacterStateMatrix<GainLossStates> gl_fitch_d = dp1.getGainLossMatrix();
+            //            final CharacterStateMatrix<BinaryStates> i_fitch_d = dp1.getInternalStatesMatrix();
+            //            if ( dp1.getCost() != 10 ) {
+            //                return false;
+            //            }
+            //            if ( dp1.getTotalGains() != 7 ) {
+            //                return false;
+            //            }
+            //            if ( dp1.getTotalLosses() != 3 ) {
+            //                return false;
+            //            }
+            //            if ( dp1.getTotalUnchanged() != 55 ) {
+            //                return false;
+            //            }
+            //            if ( !dp1.getUnitsGainedOnNode( "four" ).contains( "E" ) ) {
+            //                return false;
+            //            }
+            //            dp1.executeFitchParsimonyOnBinaryDomainCombintion();
+            //            final CharacterStateMatrix<GainLossStates> gl_fitch_bc = dp1.getGainLossMatrix();
+            //            final CharacterStateMatrix<BinaryStates> i_fitch_bc = dp1.getInternalStatesMatrix();
+            //            if ( dp1.getCost() != 6 ) {
+            //                return false;
+            //            }
+            //            if ( dp1.getTotalGains() != 4 ) {
+            //                return false;
+            //            }
+            //            if ( dp1.getTotalLosses() != 2 ) {
+            //                return false;
+            //            }
+            //            if ( dp1.getTotalUnchanged() != 33 ) {
+            //                return false;
+            //            }
+            //            if ( !dp1.getUnitsLostOnNode( "45" ).contains( "A=B" ) ) {
+            //                return false;
+            //            }
+            //            if ( !dp1.getUnitsGainedOnNode( "four" ).contains( "D=E" ) ) {
+            //                return false;
+            //            }
+            //            if ( dp1.getNetGainsOnNode( "two" ) != -1 ) {
+            //                return false;
+            //            }
+            //            if ( dp1.getNetGainsOnNode( "123" ) != 0 ) {
+            //                return false;
+            //            }
+            //            if ( dp1.getSumOfUnchangedPresentOnNode( "123" ) != 1 ) {
+            //                return false;
+            //            }
+            //            if ( dp1.getSumOfUnchangedAbsentOnNode( "123" ) != 2 ) {
+            //                return false;
+            //            }
+            //            if ( dp1.getSumOfUnchangedOnNode( "123" ) != 3 ) {
+            //                return false;
+            //            }
+            //            if ( dp1.getSumOfUnchangedOnNode( "two" ) != 2 ) {
+            //                return false;
+            //            }
+            //            if ( !dp1.getUnitsUnchangedAbsentOnNode( "two" ).contains( "D=E" ) ) {
+            //                return false;
+            //            }
+            //            if ( !dp1.getUnitsUnchangedPresentOnNode( "two" ).contains( "A=C" ) ) {
+            //                return false;
+            //            }
+            //            if ( !dp1.getUnitsUnchangedAbsentOnNode( "123" ).contains( "A=C" ) ) {
+            //                return false;
+            //            }
+            //            if ( !dp1.getUnitsUnchangedPresentOnNode( "123" ).contains( "A=B" ) ) {
+            //                return false;
+            //            }
+            //            if ( !dp1.getUnitsUnchangedAbsentOnNode( "123" ).contains( "D=E" ) ) {
+            //                return false;
+            //            }
+            //            CharacterStateMatrix<BinaryStates> bsm = null;
+            //            CharacterStateMatrix<GainLossStates> glm = null;
+            //            bsm = new BasicCharacterStateMatrix<BinaryStates>( new BinaryStates[][] { { X, X, X, X, X },
+            //                    { X, X, O, X, X }, { O, O, O, X, X }, { X, X, O, X, X }, { X, X, O, O, O }, { O, O, O, O, O } } );
+            //            if ( !bsm.equals( i_dollo_d ) ) {
+            //                return false;
+            //            }
+            //            bsm = new BasicCharacterStateMatrix<BinaryStates>( new BinaryStates[][] { { X, X, X, O, O },
+            //                    { X, X, O, O, O }, { O, O, O, O, O }, { X, X, O, O, O }, { X, X, O, O, O }, { O, O, O, O, O } } );
+            //            if ( !bsm.equals( i_fitch_d ) ) {
+            //                return false;
+            //            }
+            //            glm = new BasicCharacterStateMatrix<GainLossStates>( new GainLossStates[][] { { P, P, P, P, P },
+            //                    { P, L, P, L, L }, { P, P, G, P, P }, { P, P, A, L, L }, { P, P, A, P, P }, { A, A, A, P, P },
+            //                    { A, A, A, L, L }, { L, L, A, P, P }, { P, P, A, G, G }, { P, P, A, A, A }, { G, G, A, A, A },
+            //                    { A, A, A, A, A }, { A, A, A, A, A } } );
+            //            if ( !glm.equals( gl_dollo_d ) ) {
+            //                return false;
+            //            }
+            //            glm = new BasicCharacterStateMatrix<GainLossStates>( new GainLossStates[][] { { P, P, P, G, G },
+            //                    { P, L, P, A, A }, { P, P, G, A, A }, { P, P, A, A, A }, { P, P, A, A, A }, { A, A, A, G, G },
+            //                    { A, A, A, A, A }, { L, L, A, A, A }, { P, P, A, A, A }, { P, P, A, A, A }, { G, G, A, A, A },
+            //                    { A, A, A, A, A }, { A, A, A, A, A } } );
+            //            if ( !glm.equals( gl_fitch_d ) ) {
+            //                return false;
+            //            }
+            //            bsm = new BasicCharacterStateMatrix<BinaryStates>( new BinaryStates[][] { { X, X, X }, { X, O, X },
+            //                    { O, O, X }, { X, O, X }, { X, O, O }, { O, O, O } } );
+            //            if ( !bsm.equals( i_dollo_bc ) ) {
+            //                return false;
+            //            }
+            //            bsm = new BasicCharacterStateMatrix<BinaryStates>( new BinaryStates[][] { { X, X, O }, { X, O, O },
+            //                    { O, O, O }, { X, O, O }, { X, O, O }, { O, O, O } } );
+            //            if ( !bsm.equals( i_fitch_bc ) ) {
+            //                return false;
+            //            }
+            //            glm = new BasicCharacterStateMatrix<GainLossStates>( new GainLossStates[][] { { P, P, P }, { L, P, L },
+            //                    { P, G, P }, { P, A, L }, { P, A, P }, { A, A, P }, { A, A, L }, { L, A, P }, { P, A, G },
+            //                    { P, A, A }, { G, A, A }, { A, A, A }, { A, A, A } } );
+            //            if ( !glm.equals( gl_dollo_bc ) ) {
+            //                return false;
+            //            }
+            //            glm = new BasicCharacterStateMatrix<GainLossStates>( new GainLossStates[][] { { P, P, G }, { L, P, A },
+            //                    { P, G, A }, { P, A, A }, { P, A, A }, { A, A, G }, { A, A, A }, { L, A, A }, { P, A, A },
+            //                    { P, A, A }, { G, A, A }, { A, A, A }, { A, A, A } } );
+            //            if ( !glm.equals( gl_fitch_bc ) ) {
+            //                return false;
+            //            }
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace( System.out );
+            return false;
+        }
+        return true;
+    }
+
+    private static boolean testDirectednessAndAdjacency() {
+        try {
+            final Protein one_1 = new BasicProtein( "one", "1" );
+            final Protein two_1 = new BasicProtein( "two", "1" );
+            final Protein three_1 = new BasicProtein( "three", "1" );
+            final Protein four_1 = new BasicProtein( "four", "1" );
+            final Protein five_1 = new BasicProtein( "five", "1" );
+            one_1.addProteinDomain( new BasicDomain( "B", 12, 14, ( short ) 1, ( short ) 4, 0.1, -12 ) );
+            one_1.addProteinDomain( new BasicDomain( "C", 13, 14, ( short ) 1, ( short ) 4, 0.1, -12 ) );
+            one_1.addProteinDomain( new BasicDomain( "A", 11, 12, ( short ) 1, ( short ) 4, 0.1, -12 ) );
+            one_1.addProteinDomain( new BasicDomain( "X", 100, 110, ( short ) 1, ( short ) 4, 0.1, -12 ) );
+            one_1.addProteinDomain( new BasicDomain( "Y", 200, 210, ( short ) 1, ( short ) 4, 0.1, -12 ) );
+            two_1.addProteinDomain( new BasicDomain( "A", 10, 20, ( short ) 1, ( short ) 4, 0.1, -12 ) );
+            two_1.addProteinDomain( new BasicDomain( "B", 30, 40, ( short ) 1, ( short ) 4, 0.1, -12 ) );
+            two_1.addProteinDomain( new BasicDomain( "Y", 1, 2, ( short ) 1, ( short ) 4, 0.1, -12 ) );
+            two_1.addProteinDomain( new BasicDomain( "X", 10, 11, ( short ) 1, ( short ) 4, 0.1, -12 ) );
+            three_1.addProteinDomain( new BasicDomain( "P", 10, 11, ( short ) 1, ( short ) 4, 0.1, -12 ) );
+            three_1.addProteinDomain( new BasicDomain( "M", 1, 2, ( short ) 1, ( short ) 4, 0.1, -12 ) );
+            three_1.addProteinDomain( new BasicDomain( "M", 5, 6, ( short ) 1, ( short ) 4, 0.1, -12 ) );
+            three_1.addProteinDomain( new BasicDomain( "N", 7, 8, ( short ) 1, ( short ) 4, 0.1, -12 ) );
+            three_1.addProteinDomain( new BasicDomain( "N", 3, 4, ( short ) 1, ( short ) 4, 0.1, -12 ) );
+            four_1.addProteinDomain( new BasicDomain( "XX", 10, 20, ( short ) 1, ( short ) 4, 0.1, -12 ) );
+            five_1.addProteinDomain( new BasicDomain( "YY", 30, 40, ( short ) 1, ( short ) 4, 0.1, -12 ) );
+            final List<Protein> list_1 = new ArrayList<Protein>();
+            list_1.add( one_1 );
+            list_1.add( two_1 );
+            list_1.add( three_1 );
+            list_1.add( four_1 );
+            list_1.add( five_1 );
+            final GenomeWideCombinableDomains gwcd_1 = BasicGenomeWideCombinableDomains
+                    .createInstance( list_1, false, new BasicSpecies( "1" ), DomainCombinationType.DIRECTED_ADJACTANT );
+            if ( !gwcd_1.toBinaryDomainCombinations()
+                    .contains( new AdjactantDirectedBinaryDomainCombination( "A", "B" ) ) ) {
+                return false;
+            }
+            if ( gwcd_1.toBinaryDomainCombinations()
+                    .contains( new AdjactantDirectedBinaryDomainCombination( "B", "A" ) ) ) {
+                return false;
+            }
+            if ( gwcd_1.toBinaryDomainCombinations()
+                    .contains( new AdjactantDirectedBinaryDomainCombination( "A", "A" ) ) ) {
+                return false;
+            }
+            if ( gwcd_1.toBinaryDomainCombinations()
+                    .contains( new AdjactantDirectedBinaryDomainCombination( "A", "C" ) ) ) {
+                return false;
+            }
+            if ( gwcd_1.toBinaryDomainCombinations()
+                    .contains( new AdjactantDirectedBinaryDomainCombination( "C", "A" ) ) ) {
+                return false;
+            }
+            if ( !gwcd_1.toBinaryDomainCombinations()
+                    .contains( new AdjactantDirectedBinaryDomainCombination( "B", "C" ) ) ) {
+                return false;
+            }
+            if ( !gwcd_1.toBinaryDomainCombinations()
+                    .contains( new AdjactantDirectedBinaryDomainCombination( "C", "X" ) ) ) {
+                return false;
+            }
+            if ( gwcd_1.toBinaryDomainCombinations()
+                    .contains( new AdjactantDirectedBinaryDomainCombination( "C", "Y" ) ) ) {
+                return false;
+            }
+            if ( !gwcd_1.toBinaryDomainCombinations()
+                    .contains( new AdjactantDirectedBinaryDomainCombination( "X", "Y" ) ) ) {
+                return false;
+            }
+            if ( gwcd_1.toBinaryDomainCombinations()
+                    .contains( new AdjactantDirectedBinaryDomainCombination( "A", "X" ) ) ) {
+                return false;
+            }
+            if ( gwcd_1.toBinaryDomainCombinations()
+                    .contains( new AdjactantDirectedBinaryDomainCombination( "A", "Y" ) ) ) {
+                return false;
+            }
+            if ( !gwcd_1.toBinaryDomainCombinations()
+                    .contains( new AdjactantDirectedBinaryDomainCombination( "Y", "A" ) ) ) {
+                return false;
+            }
+            if ( gwcd_1.toBinaryDomainCombinations()
+                    .contains( new AdjactantDirectedBinaryDomainCombination( "X", "A" ) ) ) {
+                return false;
+            }
+            if ( gwcd_1.toBinaryDomainCombinations()
+                    .contains( new AdjactantDirectedBinaryDomainCombination( "C", "B" ) ) ) {
+                return false;
+            }
+            if ( !gwcd_1.toBinaryDomainCombinations()
+                    .contains( new AdjactantDirectedBinaryDomainCombination( "X", "Y" ) ) ) {
+                return false;
+            }
+            if ( gwcd_1.toBinaryDomainCombinations()
+                    .contains( new AdjactantDirectedBinaryDomainCombination( "Y", "X" ) ) ) {
+                return false;
+            }
+            if ( gwcd_1.toBinaryDomainCombinations()
+                    .contains( new AdjactantDirectedBinaryDomainCombination( "A", "Y" ) ) ) {
+                return false;
+            }
+            if ( gwcd_1.toBinaryDomainCombinations()
+                    .contains( new AdjactantDirectedBinaryDomainCombination( "A", "X" ) ) ) {
+                return false;
+            }
+            if ( gwcd_1.toBinaryDomainCombinations()
+                    .contains( new AdjactantDirectedBinaryDomainCombination( "Y", "C" ) ) ) {
+                return false;
+            }
+            if ( !gwcd_1.toBinaryDomainCombinations()
+                    .contains( new AdjactantDirectedBinaryDomainCombination( "M", "N" ) ) ) {
+                return false;
+            }
+            if ( !gwcd_1.toBinaryDomainCombinations()
+                    .contains( new AdjactantDirectedBinaryDomainCombination( "N", "M" ) ) ) {
+                return false;
+            }
+            if ( !gwcd_1.toBinaryDomainCombinations()
+                    .contains( new AdjactantDirectedBinaryDomainCombination( "N", "P" ) ) ) {
+                return false;
+            }
+            if ( gwcd_1.toBinaryDomainCombinations()
+                    .contains( new AdjactantDirectedBinaryDomainCombination( "M", "P" ) ) ) {
+                return false;
+            }
+            if ( gwcd_1.toBinaryDomainCombinations()
+                    .contains( new AdjactantDirectedBinaryDomainCombination( "P", "N" ) ) ) {
+                return false;
+            }
+            if ( gwcd_1.toBinaryDomainCombinations()
+                    .contains( new AdjactantDirectedBinaryDomainCombination( "P", "M" ) ) ) {
+                return false;
+            }
+            if ( gwcd_1.toBinaryDomainCombinations()
+                    .contains( new AdjactantDirectedBinaryDomainCombination( "XX", "YY" ) ) ) {
+                return false;
+            }
+            if ( gwcd_1.toBinaryDomainCombinations()
+                    .contains( new AdjactantDirectedBinaryDomainCombination( "YY", "XX" ) ) ) {
+                return false;
+            }
+            if ( gwcd_1.toBinaryDomainCombinations()
+                    .contains( new AdjactantDirectedBinaryDomainCombination( "B", "B" ) ) ) {
+                return false;
+            }
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace( System.out );
+            return false;
+        }
+        return true;
+    }
+
+    private static boolean testDomainArchitectureBasedGenomeSimilarityCalculator() {
+        try {
+            final Domain a = new BasicDomain( "a", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final Domain b = new BasicDomain( "b", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final Domain c = new BasicDomain( "c", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final Domain d = new BasicDomain( "d", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final Domain e = new BasicDomain( "e", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final Domain f = new BasicDomain( "f", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final Domain g = new BasicDomain( "g", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final Domain h = new BasicDomain( "h", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final Domain i = new BasicDomain( "i", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final Domain j = new BasicDomain( "j", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final Domain k = new BasicDomain( "k", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final Domain l = new BasicDomain( "l", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final Domain m = new BasicDomain( "m", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final Domain n = new BasicDomain( "n", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final Protein eel_0 = new BasicProtein( "0", "eel" );
+            final Protein eel_1 = new BasicProtein( "1", "eel" );
+            final Protein eel_2 = new BasicProtein( "2", "eel" );
+            final Protein eel_3 = new BasicProtein( "3", "eel" );
+            final Protein eel_4 = new BasicProtein( "4", "eel" );
+            final Protein eel_5 = new BasicProtein( "5", "eel" );
+            final Protein eel_6 = new BasicProtein( "6", "eel" );
+            final Protein rat_0 = new BasicProtein( "0", "rat" );
+            final Protein rat_1 = new BasicProtein( "1", "rat" );
+            final Protein rat_2 = new BasicProtein( "2", "rat" );
+            final Protein rat_3 = new BasicProtein( "3", "rat" );
+            final Protein rat_4 = new BasicProtein( "4", "rat" );
+            final Protein rat_5 = new BasicProtein( "5", "rat" );
+            final Protein rat_6 = new BasicProtein( "6", "rat" );
+            final Protein rat_7 = new BasicProtein( "7", "rat" );
+            eel_1.addProteinDomain( a );
+            eel_2.addProteinDomain( a );
+            eel_2.addProteinDomain( b );
+            eel_3.addProteinDomain( a );
+            eel_3.addProteinDomain( a );
+            eel_3.addProteinDomain( b );
+            eel_4.addProteinDomain( a );
+            eel_4.addProteinDomain( b );
+            eel_4.addProteinDomain( c );
+            eel_4.addProteinDomain( d );
+            eel_4.addProteinDomain( e );
+            eel_5.addProteinDomain( e );
+            eel_5.addProteinDomain( e );
+            eel_5.addProteinDomain( f );
+            eel_5.addProteinDomain( f );
+            eel_5.addProteinDomain( f );
+            eel_5.addProteinDomain( f );
+            eel_6.addProteinDomain( g );
+            eel_6.addProteinDomain( h );
+            rat_1.addProteinDomain( a );
+            rat_2.addProteinDomain( a );
+            rat_2.addProteinDomain( b );
+            rat_3.addProteinDomain( a );
+            rat_3.addProteinDomain( a );
+            rat_3.addProteinDomain( b );
+            rat_4.addProteinDomain( a );
+            rat_4.addProteinDomain( b );
+            rat_4.addProteinDomain( c );
+            rat_4.addProteinDomain( i );
+            rat_4.addProteinDomain( l );
+            rat_5.addProteinDomain( i );
+            rat_5.addProteinDomain( f );
+            rat_5.addProteinDomain( f );
+            rat_6.addProteinDomain( j );
+            rat_6.addProteinDomain( k );
+            rat_7.addProteinDomain( m );
+            rat_7.addProteinDomain( n );
+            final List<Protein> protein_list_eel = new ArrayList<Protein>();
+            protein_list_eel.add( eel_0 );
+            protein_list_eel.add( eel_1 );
+            protein_list_eel.add( eel_2 );
+            protein_list_eel.add( eel_3 );
+            protein_list_eel.add( eel_4 );
+            protein_list_eel.add( eel_5 );
+            protein_list_eel.add( eel_6 );
+            final List<Protein> protein_list_rat = new ArrayList<Protein>();
+            protein_list_rat.add( rat_0 );
+            protein_list_rat.add( rat_1 );
+            protein_list_rat.add( rat_2 );
+            protein_list_rat.add( rat_3 );
+            protein_list_rat.add( rat_4 );
+            protein_list_rat.add( rat_5 );
+            protein_list_rat.add( rat_6 );
+            protein_list_rat.add( rat_7 );
+            final GenomeWideCombinableDomains eel_not_ignore = BasicGenomeWideCombinableDomains
+                    .createInstance( protein_list_eel, false, new BasicSpecies( "eel" ) );
+            final GenomeWideCombinableDomains eel_ignore = BasicGenomeWideCombinableDomains
+                    .createInstance( protein_list_eel, true, new BasicSpecies( "eel" ) );
+            final GenomeWideCombinableDomains rat_not_ignore = BasicGenomeWideCombinableDomains
+                    .createInstance( protein_list_rat, false, new BasicSpecies( "rat" ) );
+            final GenomeWideCombinableDomains rat_ignore = BasicGenomeWideCombinableDomains
+                    .createInstance( protein_list_rat, true, new BasicSpecies( "rat" ) );
+            final DomainArchitectureBasedGenomeSimilarityCalculator calc_ni = new DomainArchitectureBasedGenomeSimilarityCalculator( eel_not_ignore,
+                                                                                                                                     rat_not_ignore );
+            final DomainArchitectureBasedGenomeSimilarityCalculator calc_i = new DomainArchitectureBasedGenomeSimilarityCalculator( eel_ignore,
+                                                                                                                                    rat_ignore );
+            if ( calc_ni.getAllDomains().size() != 14 ) {
+                return false;
+            }
+            if ( calc_i.getAllDomains().size() != 14 ) {
+                return false;
+            }
+            if ( calc_ni.getDomainsSpecificToGenome0().size() != 4 ) {
+                return false;
+            }
+            if ( calc_i.getDomainsSpecificToGenome0().size() != 4 ) {
+                return false;
+            }
+            if ( calc_ni.getDomainsSpecificToGenome1().size() != 6 ) {
+                return false;
+            }
+            if ( calc_i.getDomainsSpecificToGenome1().size() != 6 ) {
+                return false;
+            }
+            if ( calc_i.getSharedDomains().size() != 4 ) {
+                return false;
+            }
+            if ( calc_ni.getSharedDomains().size() != 4 ) {
+                return false;
+            }
+            if ( !calc_ni.getDomainsSpecificToGenome0().contains( d.getDomainId() ) ) {
+                return false;
+            }
+            if ( !calc_ni.getDomainsSpecificToGenome0().contains( e.getDomainId() ) ) {
+                return false;
+            }
+            if ( !calc_ni.getDomainsSpecificToGenome0().contains( g.getDomainId() ) ) {
+                return false;
+            }
+            if ( !calc_ni.getDomainsSpecificToGenome0().contains( h.getDomainId() ) ) {
+                return false;
+            }
+            if ( calc_ni.getDomainsSpecificToGenome0().contains( a.getDomainId() ) ) {
+                return false;
+            }
+            if ( calc_ni.getDomainsSpecificToGenome0().contains( i.getDomainId() ) ) {
+                return false;
+            }
+            if ( !calc_i.getDomainsSpecificToGenome0().contains( d.getDomainId() ) ) {
+                return false;
+            }
+            if ( !calc_i.getDomainsSpecificToGenome0().contains( e.getDomainId() ) ) {
+                return false;
+            }
+            if ( !calc_i.getDomainsSpecificToGenome0().contains( g.getDomainId() ) ) {
+                return false;
+            }
+            if ( !calc_i.getDomainsSpecificToGenome0().contains( h.getDomainId() ) ) {
+                return false;
+            }
+            if ( calc_i.getDomainsSpecificToGenome0().contains( a.getDomainId() ) ) {
+                return false;
+            }
+            if ( calc_i.getDomainsSpecificToGenome0().contains( i.getDomainId() ) ) {
+                return false;
+            }
+            if ( !calc_ni.getDomainsSpecificToGenome1().contains( i.getDomainId() ) ) {
+                return false;
+            }
+            if ( !calc_ni.getDomainsSpecificToGenome1().contains( l.getDomainId() ) ) {
+                return false;
+            }
+            if ( !calc_ni.getDomainsSpecificToGenome1().contains( j.getDomainId() ) ) {
+                return false;
+            }
+            if ( !calc_ni.getDomainsSpecificToGenome1().contains( k.getDomainId() ) ) {
+                return false;
+            }
+            if ( !calc_ni.getDomainsSpecificToGenome1().contains( m.getDomainId() ) ) {
+                return false;
+            }
+            if ( !calc_ni.getDomainsSpecificToGenome1().contains( n.getDomainId() ) ) {
+                return false;
+            }
+            if ( calc_ni.getDomainsSpecificToGenome1().contains( a.getDomainId() ) ) {
+                return false;
+            }
+            if ( calc_ni.getDomainsSpecificToGenome1().contains( b.getDomainId() ) ) {
+                return false;
+            }
+            if ( calc_ni.getDomainsSpecificToGenome1().contains( d.getDomainId() ) ) {
+                return false;
+            }
+            if ( !calc_i.getDomainsSpecificToGenome1().contains( i.getDomainId() ) ) {
+                return false;
+            }
+            if ( !calc_i.getDomainsSpecificToGenome1().contains( l.getDomainId() ) ) {
+                return false;
+            }
+            if ( !calc_i.getDomainsSpecificToGenome1().contains( j.getDomainId() ) ) {
+                return false;
+            }
+            if ( !calc_i.getDomainsSpecificToGenome1().contains( k.getDomainId() ) ) {
+                return false;
+            }
+            if ( !calc_i.getDomainsSpecificToGenome1().contains( m.getDomainId() ) ) {
+                return false;
+            }
+            if ( !calc_i.getDomainsSpecificToGenome1().contains( n.getDomainId() ) ) {
+                return false;
+            }
+            if ( calc_i.getDomainsSpecificToGenome1().contains( a.getDomainId() ) ) {
+                return false;
+            }
+            if ( calc_i.getDomainsSpecificToGenome1().contains( b.getDomainId() ) ) {
+                return false;
+            }
+            if ( calc_i.getDomainsSpecificToGenome1().contains( d.getDomainId() ) ) {
+                return false;
+            }
+            if ( !calc_i.getSharedDomains().contains( a.getDomainId() ) ) {
+                return false;
+            }
+            if ( !calc_i.getSharedDomains().contains( b.getDomainId() ) ) {
+                return false;
+            }
+            if ( !calc_i.getSharedDomains().contains( c.getDomainId() ) ) {
+                return false;
+            }
+            if ( !calc_i.getSharedDomains().contains( f.getDomainId() ) ) {
+                return false;
+            }
+            final Set<DomainId> all = calc_ni.getAllDomains();
+            if ( !all.contains( a.getDomainId() ) ) {
+                return false;
+            }
+            if ( !all.contains( b.getDomainId() ) ) {
+                return false;
+            }
+            if ( !all.contains( c.getDomainId() ) ) {
+                return false;
+            }
+            if ( !all.contains( d.getDomainId() ) ) {
+                return false;
+            }
+            if ( !all.contains( e.getDomainId() ) ) {
+                return false;
+            }
+            if ( !all.contains( f.getDomainId() ) ) {
+                return false;
+            }
+            if ( !all.contains( g.getDomainId() ) ) {
+                return false;
+            }
+            if ( !all.contains( h.getDomainId() ) ) {
+                return false;
+            }
+            if ( !all.contains( i.getDomainId() ) ) {
+                return false;
+            }
+            if ( !all.contains( l.getDomainId() ) ) {
+                return false;
+            }
+            if ( !all.contains( j.getDomainId() ) ) {
+                return false;
+            }
+            if ( !all.contains( k.getDomainId() ) ) {
+                return false;
+            }
+            if ( !all.contains( m.getDomainId() ) ) {
+                return false;
+            }
+            if ( !all.contains( n.getDomainId() ) ) {
+                return false;
+            }
+            final Set<BinaryDomainCombination> s_0_ni = calc_ni.getBinaryDomainCombinationsSpecificToGenome0();
+            final Set<BinaryDomainCombination> s_0_i = calc_i.getBinaryDomainCombinationsSpecificToGenome0();
+            final Set<BinaryDomainCombination> s_1_ni = calc_ni.getBinaryDomainCombinationsSpecificToGenome1();
+            final Set<BinaryDomainCombination> s_1_i = calc_i.getBinaryDomainCombinationsSpecificToGenome1();
+            final Set<BinaryDomainCombination> a_ni = calc_ni.getAllBinaryDomainCombinations();
+            final Set<BinaryDomainCombination> a_i = calc_i.getAllBinaryDomainCombinations();
+            final Set<BinaryDomainCombination> shared_ni = calc_ni.getSharedBinaryDomainCombinations();
+            final Set<BinaryDomainCombination> shared_i = calc_i.getSharedBinaryDomainCombinations();
+            if ( a_ni.size() != 25 ) {
+                return false;
+            }
+            if ( a_i.size() != 22 ) {
+                return false;
+            }
+            if ( s_0_ni.size() != 10 ) {
+                return false;
+            }
+            if ( s_0_i.size() != 9 ) {
+                return false;
+            }
+            if ( s_1_ni.size() != 10 ) {
+                return false;
+            }
+            if ( s_1_i.size() != 10 ) {
+                return false;
+            }
+            if ( shared_ni.size() != 5 ) {
+                return false;
+            }
+            if ( shared_i.size() != 3 ) {
+                return false;
+            }
+            if ( !a_ni.contains( new BasicBinaryDomainCombination( "a", "a" ) ) ) {
+                return false;
+            }
+            if ( !a_ni.contains( new BasicBinaryDomainCombination( "b", "a" ) ) ) {
+                return false;
+            }
+            if ( !a_ni.contains( new BasicBinaryDomainCombination( "a", "c" ) ) ) {
+                return false;
+            }
+            if ( !a_ni.contains( new BasicBinaryDomainCombination( "a", "d" ) ) ) {
+                return false;
+            }
+            if ( !a_ni.contains( new BasicBinaryDomainCombination( "a", "e" ) ) ) {
+                return false;
+            }
+            if ( !a_ni.contains( new BasicBinaryDomainCombination( "b", "c" ) ) ) {
+                return false;
+            }
+            if ( !a_ni.contains( new BasicBinaryDomainCombination( "b", "d" ) ) ) {
+                return false;
+            }
+            if ( !a_ni.contains( new BasicBinaryDomainCombination( "b", "e" ) ) ) {
+                return false;
+            }
+            if ( !a_ni.contains( new BasicBinaryDomainCombination( "c", "d" ) ) ) {
+                return false;
+            }
+            if ( !a_ni.contains( new BasicBinaryDomainCombination( "c", "e" ) ) ) {
+                return false;
+            }
+            if ( !a_ni.contains( new BasicBinaryDomainCombination( "d", "e" ) ) ) {
+                return false;
+            }
+            if ( !a_ni.contains( new BasicBinaryDomainCombination( "e", "f" ) ) ) {
+                return false;
+            }
+            if ( !a_ni.contains( new BasicBinaryDomainCombination( "g", "h" ) ) ) {
+                return false;
+            }
+            if ( !a_ni.contains( new BasicBinaryDomainCombination( "f", "f" ) ) ) {
+                return false;
+            }
+            if ( !a_ni.contains( new BasicBinaryDomainCombination( "e", "e" ) ) ) {
+                return false;
+            }
+            if ( !a_ni.contains( new BasicBinaryDomainCombination( "a", "i" ) ) ) {
+                return false;
+            }
+            if ( !a_ni.contains( new BasicBinaryDomainCombination( "a", "l" ) ) ) {
+                return false;
+            }
+            if ( !a_ni.contains( new BasicBinaryDomainCombination( "b", "i" ) ) ) {
+                return false;
+            }
+            if ( !a_ni.contains( new BasicBinaryDomainCombination( "b", "l" ) ) ) {
+                return false;
+            }
+            if ( !a_ni.contains( new BasicBinaryDomainCombination( "c", "i" ) ) ) {
+                return false;
+            }
+            if ( !a_ni.contains( new BasicBinaryDomainCombination( "c", "l" ) ) ) {
+                return false;
+            }
+            if ( !a_ni.contains( new BasicBinaryDomainCombination( "i", "l" ) ) ) {
+                return false;
+            }
+            if ( !a_ni.contains( new BasicBinaryDomainCombination( "i", "f" ) ) ) {
+                return false;
+            }
+            if ( !a_ni.contains( new BasicBinaryDomainCombination( "m", "n" ) ) ) {
+                return false;
+            }
+            if ( !a_ni.contains( new BasicBinaryDomainCombination( "j", "k" ) ) ) {
+                return false;
+            }
+            if ( a_ni.contains( new BasicBinaryDomainCombination( "a", "g" ) ) ) {
+                return false;
+            }
+            if ( a_ni.contains( new BasicBinaryDomainCombination( "a", "m" ) ) ) {
+                return false;
+            }
+            if ( a_i.contains( new BasicBinaryDomainCombination( "a", "a" ) ) ) {
+                return false;
+            }
+            if ( a_i.contains( new BasicBinaryDomainCombination( "f", "f" ) ) ) {
+                return false;
+            }
+            if ( a_i.contains( new BasicBinaryDomainCombination( "e", "e" ) ) ) {
+                return false;
+            }
+            if ( !shared_ni.contains( new BasicBinaryDomainCombination( "a", "a" ) ) ) {
+                return false;
+            }
+            if ( !shared_ni.contains( new BasicBinaryDomainCombination( "a", "b" ) ) ) {
+                return false;
+            }
+            if ( !shared_ni.contains( new BasicBinaryDomainCombination( "a", "c" ) ) ) {
+                return false;
+            }
+            if ( !shared_ni.contains( new BasicBinaryDomainCombination( "b", "c" ) ) ) {
+                return false;
+            }
+            if ( !shared_ni.contains( new BasicBinaryDomainCombination( "f", "f" ) ) ) {
+                return false;
+            }
+            if ( shared_ni.contains( new BasicBinaryDomainCombination( "m", "n" ) ) ) {
+                return false;
+            }
+            if ( shared_i.contains( new BasicBinaryDomainCombination( "a", "a" ) ) ) {
+                return false;
+            }
+            if ( !shared_i.contains( new BasicBinaryDomainCombination( "a", "b" ) ) ) {
+                return false;
+            }
+            if ( !shared_i.contains( new BasicBinaryDomainCombination( "a", "c" ) ) ) {
+                return false;
+            }
+            if ( !shared_i.contains( new BasicBinaryDomainCombination( "b", "c" ) ) ) {
+                return false;
+            }
+            if ( shared_i.contains( new BasicBinaryDomainCombination( "f", "f" ) ) ) {
+                return false;
+            }
+            if ( shared_i.contains( new BasicBinaryDomainCombination( "m", "n" ) ) ) {
+                return false;
+            }
+            if ( !s_0_ni.contains( new BasicBinaryDomainCombination( "a", "d" ) ) ) {
+                return false;
+            }
+            if ( !s_0_ni.contains( new BasicBinaryDomainCombination( "a", "e" ) ) ) {
+                return false;
+            }
+            if ( !s_0_ni.contains( new BasicBinaryDomainCombination( "b", "d" ) ) ) {
+                return false;
+            }
+            if ( !s_0_ni.contains( new BasicBinaryDomainCombination( "b", "e" ) ) ) {
+                return false;
+            }
+            if ( !s_0_ni.contains( new BasicBinaryDomainCombination( "c", "d" ) ) ) {
+                return false;
+            }
+            if ( !s_0_ni.contains( new BasicBinaryDomainCombination( "c", "e" ) ) ) {
+                return false;
+            }
+            if ( !s_0_ni.contains( new BasicBinaryDomainCombination( "d", "e" ) ) ) {
+                return false;
+            }
+            if ( !s_0_ni.contains( new BasicBinaryDomainCombination( "e", "f" ) ) ) {
+                return false;
+            }
+            if ( !s_0_ni.contains( new BasicBinaryDomainCombination( "g", "h" ) ) ) {
+                return false;
+            }
+            if ( !s_0_ni.contains( new BasicBinaryDomainCombination( "e", "e" ) ) ) {
+                return false;
+            }
+            if ( !s_0_i.contains( new BasicBinaryDomainCombination( "a", "d" ) ) ) {
+                return false;
+            }
+            if ( !s_0_i.contains( new BasicBinaryDomainCombination( "a", "e" ) ) ) {
+                return false;
+            }
+            if ( !s_0_i.contains( new BasicBinaryDomainCombination( "b", "d" ) ) ) {
+                return false;
+            }
+            if ( !s_0_i.contains( new BasicBinaryDomainCombination( "b", "e" ) ) ) {
+                return false;
+            }
+            if ( !s_0_i.contains( new BasicBinaryDomainCombination( "c", "d" ) ) ) {
+                return false;
+            }
+            if ( !s_0_i.contains( new BasicBinaryDomainCombination( "c", "e" ) ) ) {
+                return false;
+            }
+            if ( !s_0_i.contains( new BasicBinaryDomainCombination( "d", "e" ) ) ) {
+                return false;
+            }
+            if ( !s_0_i.contains( new BasicBinaryDomainCombination( "e", "f" ) ) ) {
+                return false;
+            }
+            if ( !s_0_i.contains( new BasicBinaryDomainCombination( "g", "h" ) ) ) {
+                return false;
+            }
+            if ( s_0_i.contains( new BasicBinaryDomainCombination( "e", "e" ) ) ) {
+                return false;
+            }
+            if ( !s_1_ni.contains( new BasicBinaryDomainCombination( "a", "i" ) ) ) {
+                return false;
+            }
+            if ( !s_1_ni.contains( new BasicBinaryDomainCombination( "a", "l" ) ) ) {
+                return false;
+            }
+            if ( !s_1_ni.contains( new BasicBinaryDomainCombination( "b", "i" ) ) ) {
+                return false;
+            }
+            if ( !s_1_ni.contains( new BasicBinaryDomainCombination( "b", "l" ) ) ) {
+                return false;
+            }
+            if ( !s_1_ni.contains( new BasicBinaryDomainCombination( "c", "i" ) ) ) {
+                return false;
+            }
+            if ( !s_1_ni.contains( new BasicBinaryDomainCombination( "c", "l" ) ) ) {
+                return false;
+            }
+            if ( !s_1_ni.contains( new BasicBinaryDomainCombination( "l", "i" ) ) ) {
+                return false;
+            }
+            if ( !s_1_ni.contains( new BasicBinaryDomainCombination( "i", "f" ) ) ) {
+                return false;
+            }
+            if ( !s_1_ni.contains( new BasicBinaryDomainCombination( "m", "n" ) ) ) {
+                return false;
+            }
+            if ( !s_1_ni.contains( new BasicBinaryDomainCombination( "j", "k" ) ) ) {
+                return false;
+            }
+            if ( s_1_ni.contains( new BasicBinaryDomainCombination( "a", "b" ) ) ) {
+                return false;
+            }
+            if ( !s_1_i.contains( new BasicBinaryDomainCombination( "a", "i" ) ) ) {
+                return false;
+            }
+            if ( !s_1_i.contains( new BasicBinaryDomainCombination( "a", "l" ) ) ) {
+                return false;
+            }
+            if ( !s_1_i.contains( new BasicBinaryDomainCombination( "b", "i" ) ) ) {
+                return false;
+            }
+            if ( !s_1_i.contains( new BasicBinaryDomainCombination( "b", "l" ) ) ) {
+                return false;
+            }
+            if ( !s_1_i.contains( new BasicBinaryDomainCombination( "c", "i" ) ) ) {
+                return false;
+            }
+            if ( !s_1_i.contains( new BasicBinaryDomainCombination( "c", "l" ) ) ) {
+                return false;
+            }
+            if ( !s_1_i.contains( new BasicBinaryDomainCombination( "l", "i" ) ) ) {
+                return false;
+            }
+            if ( !s_1_i.contains( new BasicBinaryDomainCombination( "i", "f" ) ) ) {
+                return false;
+            }
+            if ( !s_1_i.contains( new BasicBinaryDomainCombination( "m", "n" ) ) ) {
+                return false;
+            }
+            if ( !s_1_i.contains( new BasicBinaryDomainCombination( "j", "k" ) ) ) {
+                return false;
+            }
+            if ( s_1_i.contains( new BasicBinaryDomainCombination( "a", "b" ) ) ) {
+                return false;
+            }
+            if ( !isEqual( calc_ni.calculateSharedBinaryDomainCombinationBasedGenomeSimilarityScore(),
+                           1.0 - ( 25.0 - 5.0 ) / 25.0 ) ) {
+                return false;
+            }
+            if ( !isEqual( calc_i.calculateSharedBinaryDomainCombinationBasedGenomeSimilarityScore(),
+                           1.0 - ( 22.0 - 3.0 ) / 22.0 ) ) {
+                return false;
+            }
+            if ( !isEqual( calc_ni.calculateSharedDomainsBasedGenomeSimilarityScore(), 1.0 - ( 14.0 - 4.0 ) / 14.0 ) ) {
+                return false;
+            }
+            if ( !isEqual( calc_i.calculateSharedDomainsBasedGenomeSimilarityScore(), 1.0 - ( 14.0 - 4.0 ) / 14.0 ) ) {
+                return false;
+            }
+            final Domain u = new BasicDomain( "u", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final Domain v = new BasicDomain( "v", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final Domain w = new BasicDomain( "w", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final Domain x = new BasicDomain( "x", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final Domain y = new BasicDomain( "y", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final Domain z = new BasicDomain( "z", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final Protein a_0 = new BasicProtein( "0", "a" );
+            final Protein a_1 = new BasicProtein( "1", "a" );
+            final Protein a_2 = new BasicProtein( "2", "a" );
+            final Protein b_0 = new BasicProtein( "0", "b" );
+            final Protein b_1 = new BasicProtein( "1", "b" );
+            a_0.addProteinDomain( u );
+            a_0.addProteinDomain( v );
+            a_0.addProteinDomain( w );
+            a_1.addProteinDomain( w );
+            a_1.addProteinDomain( x );
+            a_2.addProteinDomain( y );
+            a_2.addProteinDomain( z );
+            b_0.addProteinDomain( u );
+            b_0.addProteinDomain( w );
+            b_1.addProteinDomain( y );
+            b_1.addProteinDomain( z );
+            final List<Protein> protein_list_a = new ArrayList<Protein>();
+            protein_list_a.add( a_0 );
+            protein_list_a.add( a_1 );
+            protein_list_a.add( a_2 );
+            final List<Protein> protein_list_b = new ArrayList<Protein>();
+            protein_list_b.add( b_0 );
+            protein_list_b.add( b_1 );
+            final GenomeWideCombinableDomains ca = BasicGenomeWideCombinableDomains
+                    .createInstance( protein_list_a, false, new BasicSpecies( "a" ) );
+            final GenomeWideCombinableDomains cb = BasicGenomeWideCombinableDomains
+                    .createInstance( protein_list_b, true, new BasicSpecies( "b" ) );
+            final DomainArchitectureBasedGenomeSimilarityCalculator calc_u = new DomainArchitectureBasedGenomeSimilarityCalculator( ca,
+                                                                                                                                    cb );
+            calc_u.setAllowDomainsToBeIgnored( true );
+            if ( calc_u.getAllDomains().size() != 6 ) {
+                return false;
+            }
+            if ( calc_u.getDomainsSpecificToGenome0().size() != 2 ) {
+                return false;
+            }
+            if ( calc_u.getDomainsSpecificToGenome1().size() != 0 ) {
+                return false;
+            }
+            if ( !calc_u.getDomainsSpecificToGenome0().contains( v.getDomainId() ) ) {
+                return false;
+            }
+            if ( !calc_u.getDomainsSpecificToGenome0().contains( x.getDomainId() ) ) {
+                return false;
+            }
+            if ( calc_u.getSharedDomains().size() != 4 ) {
+                return false;
+            }
+            if ( !calc_u.getSharedDomains().contains( u.getDomainId() ) ) {
+                return false;
+            }
+            if ( !calc_u.getSharedDomains().contains( w.getDomainId() ) ) {
+                return false;
+            }
+            if ( !calc_u.getSharedDomains().contains( y.getDomainId() ) ) {
+                return false;
+            }
+            if ( !calc_u.getSharedDomains().contains( z.getDomainId() ) ) {
+                return false;
+            }
+            if ( calc_u.getAllDomains().size() != 6 ) {
+                return false;
+            }
+            if ( !calc_u.getAllDomains().contains( u.getDomainId() ) ) {
+                return false;
+            }
+            if ( !calc_u.getAllDomains().contains( w.getDomainId() ) ) {
+                return false;
+            }
+            if ( !calc_u.getAllDomains().contains( y.getDomainId() ) ) {
+                return false;
+            }
+            if ( !calc_u.getAllDomains().contains( z.getDomainId() ) ) {
+                return false;
+            }
+            if ( !calc_u.getAllDomains().contains( v.getDomainId() ) ) {
+                return false;
+            }
+            if ( !calc_u.getAllDomains().contains( x.getDomainId() ) ) {
+                return false;
+            }
+            if ( calc_u.getBinaryDomainCombinationsSpecificToGenome0().size() != 3 ) {
+                return false;
+            }
+            if ( calc_u.getBinaryDomainCombinationsSpecificToGenome1().size() != 0 ) {
+                return false;
+            }
+            if ( calc_u.getSharedBinaryDomainCombinations().size() != 2 ) {
+                return false;
+            }
+            if ( calc_u.getAllBinaryDomainCombinations().size() != 5 ) {
+                return false;
+            }
+            if ( !calc_u.getBinaryDomainCombinationsSpecificToGenome0()
+                    .contains( new BasicBinaryDomainCombination( "v", "u" ) ) ) {
+                return false;
+            }
+            if ( !calc_u.getBinaryDomainCombinationsSpecificToGenome0()
+                    .contains( new BasicBinaryDomainCombination( "w", "v" ) ) ) {
+                return false;
+            }
+            if ( !calc_u.getBinaryDomainCombinationsSpecificToGenome0()
+                    .contains( new BasicBinaryDomainCombination( "w", "x" ) ) ) {
+                return false;
+            }
+            if ( !calc_u.getSharedBinaryDomainCombinations().contains( new BasicBinaryDomainCombination( "w", "u" ) ) ) {
+                return false;
+            }
+            if ( !calc_u.getSharedBinaryDomainCombinations().contains( new BasicBinaryDomainCombination( "z", "y" ) ) ) {
+                return false;
+            }
+            if ( !calc_u.getAllBinaryDomainCombinations().contains( new BasicBinaryDomainCombination( "v", "u" ) ) ) {
+                return false;
+            }
+            if ( !calc_u.getAllBinaryDomainCombinations().contains( new BasicBinaryDomainCombination( "w", "v" ) ) ) {
+                return false;
+            }
+            if ( !calc_u.getAllBinaryDomainCombinations().contains( new BasicBinaryDomainCombination( "w", "x" ) ) ) {
+                return false;
+            }
+            if ( !calc_u.getAllBinaryDomainCombinations().contains( new BasicBinaryDomainCombination( "w", "u" ) ) ) {
+                return false;
+            }
+            if ( !calc_u.getAllBinaryDomainCombinations().contains( new BasicBinaryDomainCombination( "z", "y" ) ) ) {
+                return false;
+            }
+            calc_u.setAllowDomainsToBeIgnored( true );
+            calc_u.addDomainIdToIgnore( u.getDomainId() );
+            calc_u.addDomainIdToIgnore( new DomainId( "other" ) );
+            calc_u.addDomainIdToIgnore( new DomainId( "other_too" ) );
+            if ( calc_u.getAllDomains().size() != 5 ) {
+                return false;
+            }
+            if ( calc_u.getDomainsSpecificToGenome0().size() != 2 ) {
+                return false;
+            }
+            if ( calc_u.getDomainsSpecificToGenome1().size() != 0 ) {
+                return false;
+            }
+            if ( !calc_u.getDomainsSpecificToGenome0().contains( v.getDomainId() ) ) {
+                return false;
+            }
+            if ( !calc_u.getDomainsSpecificToGenome0().contains( x.getDomainId() ) ) {
+                return false;
+            }
+            if ( calc_u.getSharedDomains().size() != 3 ) {
+                return false;
+            }
+            if ( calc_u.getSharedDomains().contains( u.getDomainId() ) ) {
+                return false;
+            }
+            if ( !calc_u.getSharedDomains().contains( w.getDomainId() ) ) {
+                return false;
+            }
+            if ( !calc_u.getSharedDomains().contains( y.getDomainId() ) ) {
+                return false;
+            }
+            if ( !calc_u.getSharedDomains().contains( z.getDomainId() ) ) {
+                return false;
+            }
+            if ( calc_u.getAllDomains().size() != 5 ) {
+                return false;
+            }
+            if ( calc_u.getAllDomains().contains( u.getDomainId() ) ) {
+                return false;
+            }
+            if ( !calc_u.getAllDomains().contains( w.getDomainId() ) ) {
+                return false;
+            }
+            if ( !calc_u.getAllDomains().contains( y.getDomainId() ) ) {
+                return false;
+            }
+            if ( !calc_u.getAllDomains().contains( z.getDomainId() ) ) {
+                return false;
+            }
+            if ( !calc_u.getAllDomains().contains( v.getDomainId() ) ) {
+                return false;
+            }
+            if ( !calc_u.getAllDomains().contains( x.getDomainId() ) ) {
+                return false;
+            }
+            if ( calc_u.getBinaryDomainCombinationsSpecificToGenome0().size() != 2 ) {
+                return false;
+            }
+            if ( calc_u.getBinaryDomainCombinationsSpecificToGenome1().size() != 0 ) {
+                return false;
+            }
+            if ( calc_u.getSharedBinaryDomainCombinations().size() != 1 ) {
+                return false;
+            }
+            if ( calc_u.getAllBinaryDomainCombinations().size() != 3 ) {
+                return false;
+            }
+            if ( calc_u.getBinaryDomainCombinationsSpecificToGenome0()
+                    .contains( new BasicBinaryDomainCombination( "v", "u" ) ) ) {
+                return false;
+            }
+            if ( !calc_u.getBinaryDomainCombinationsSpecificToGenome0()
+                    .contains( new BasicBinaryDomainCombination( "w", "v" ) ) ) {
+                return false;
+            }
+            if ( !calc_u.getBinaryDomainCombinationsSpecificToGenome0()
+                    .contains( new BasicBinaryDomainCombination( "w", "x" ) ) ) {
+                return false;
+            }
+            if ( calc_u.getSharedBinaryDomainCombinations().contains( new BasicBinaryDomainCombination( "w", "u" ) ) ) {
+                return false;
+            }
+            if ( !calc_u.getSharedBinaryDomainCombinations().contains( new BasicBinaryDomainCombination( "z", "y" ) ) ) {
+                return false;
+            }
+            if ( calc_u.getAllBinaryDomainCombinations().contains( new BasicBinaryDomainCombination( "v", "u" ) ) ) {
+                return false;
+            }
+            if ( !calc_u.getAllBinaryDomainCombinations().contains( new BasicBinaryDomainCombination( "w", "v" ) ) ) {
+                return false;
+            }
+            if ( !calc_u.getAllBinaryDomainCombinations().contains( new BasicBinaryDomainCombination( "w", "x" ) ) ) {
+                return false;
+            }
+            if ( calc_u.getAllBinaryDomainCombinations().contains( new BasicBinaryDomainCombination( "w", "u" ) ) ) {
+                return false;
+            }
+            if ( !calc_u.getAllBinaryDomainCombinations().contains( new BasicBinaryDomainCombination( "z", "y" ) ) ) {
+                return false;
+            }
+            calc_u.setAllowDomainsToBeIgnored( false );
+            if ( calc_u.getAllDomains().size() != 6 ) {
+                return false;
+            }
+            //------------
+            calc_u.setAllowDomainsToBeIgnored( true );
+            calc_u.deleteAllDomainIdsToIgnore();
+            calc_u.addDomainIdToIgnore( new DomainId( "v" ) );
+            calc_u.addDomainIdToIgnore( new DomainId( "w" ) );
+            calc_u.addDomainIdToIgnore( new DomainId( "other" ) );
+            calc_u.addDomainIdToIgnore( new DomainId( "other_too" ) );
+            if ( calc_u.getAllDomains().size() != 4 ) {
+                return false;
+            }
+            if ( calc_u.getDomainsSpecificToGenome0().size() != 1 ) {
+                return false;
+            }
+            if ( calc_u.getDomainsSpecificToGenome1().size() != 0 ) {
+                return false;
+            }
+            if ( calc_u.getDomainsSpecificToGenome0().contains( v.getDomainId() ) ) {
+                return false;
+            }
+            if ( !calc_u.getDomainsSpecificToGenome0().contains( x.getDomainId() ) ) {
+                return false;
+            }
+            if ( calc_u.getSharedDomains().size() != 3 ) {
+                return false;
+            }
+            if ( !calc_u.getSharedDomains().contains( u.getDomainId() ) ) {
+                return false;
+            }
+            if ( calc_u.getSharedDomains().contains( w.getDomainId() ) ) {
+                return false;
+            }
+            if ( !calc_u.getSharedDomains().contains( y.getDomainId() ) ) {
+                return false;
+            }
+            if ( !calc_u.getSharedDomains().contains( z.getDomainId() ) ) {
+                return false;
+            }
+            if ( calc_u.getAllDomains().size() != 4 ) {
+                return false;
+            }
+            if ( !calc_u.getAllDomains().contains( u.getDomainId() ) ) {
+                return false;
+            }
+            if ( calc_u.getAllDomains().contains( w.getDomainId() ) ) {
+                return false;
+            }
+            if ( !calc_u.getAllDomains().contains( y.getDomainId() ) ) {
+                return false;
+            }
+            if ( !calc_u.getAllDomains().contains( z.getDomainId() ) ) {
+                return false;
+            }
+            if ( calc_u.getAllDomains().contains( v.getDomainId() ) ) {
+                return false;
+            }
+            if ( !calc_u.getAllDomains().contains( x.getDomainId() ) ) {
+                return false;
+            }
+            if ( calc_u.getBinaryDomainCombinationsSpecificToGenome0().size() != 0 ) {
+                return false;
+            }
+            if ( calc_u.getBinaryDomainCombinationsSpecificToGenome1().size() != 0 ) {
+                return false;
+            }
+            if ( calc_u.getSharedBinaryDomainCombinations().size() != 1 ) {
+                return false;
+            }
+            if ( calc_u.getAllBinaryDomainCombinations().size() != 1 ) {
+                return false;
+            }
+            if ( !calc_u.getSharedBinaryDomainCombinations().contains( new BasicBinaryDomainCombination( "y", "z" ) ) ) {
+                return false;
+            }
+            if ( !calc_u.getAllBinaryDomainCombinations().contains( new BasicBinaryDomainCombination( "z", "y" ) ) ) {
+                return false;
+            }
+            if ( !isEqual( calc_u.calculateSharedBinaryDomainCombinationBasedGenomeSimilarityScore(),
+                           1.0 - ( 1.0 - 1.0 ) / 1.0 ) ) {
+                return false;
+            }
+            if ( !isEqual( calc_u.calculateSharedDomainsBasedGenomeSimilarityScore(), 1.0 - ( 4.0 - 3.0 ) / 4.0 ) ) {
+                return false;
+            }
+            calc_u.setAllowDomainsToBeIgnored( false );
+            if ( !isEqual( calc_u.calculateSharedBinaryDomainCombinationBasedGenomeSimilarityScore(),
+                           1.0 - ( 5.0 - 2.0 ) / 5.0 ) ) {
+                return false;
+            }
+            if ( !isEqual( calc_u.calculateSharedDomainsBasedGenomeSimilarityScore(), 1.0 - ( 6.0 - 4.0 ) / 6.0 ) ) {
+                return false;
+            }
+            calc_u.setAllowDomainsToBeIgnored( true );
+            if ( !isEqual( calc_u.calculateSharedBinaryDomainCombinationBasedGenomeSimilarityScore(),
+                           1.0 - ( 1.0 - 1.0 ) / 1.0 ) ) {
+                return false;
+            }
+            if ( !isEqual( calc_u.calculateSharedDomainsBasedGenomeSimilarityScore(), 1.0 - ( 4.0 - 3.0 ) / 4.0 ) ) {
+                return false;
+            }
+            calc_u.deleteAllDomainIdsToIgnore();
+            if ( !isEqual( calc_u.calculateSharedBinaryDomainCombinationBasedGenomeSimilarityScore(),
+                           1.0 - ( 5.0 - 2.0 ) / 5.0 ) ) {
+                return false;
+            }
+            if ( !isEqual( calc_u.calculateSharedDomainsBasedGenomeSimilarityScore(), 1.0 - ( 6.0 - 4.0 ) / 6.0 ) ) {
+                return false;
+            }
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace( System.out );
+            return false;
+        }
+        return true;
+    }
+
+    private static boolean testDomainCombinationCounting( final File test_dir ) {
+        try {
+            final HmmPfamOutputParser parser = new HmmPfamOutputParser( new File( test_dir
+                    + ForesterUtil.getFileSeparator() + "hmmpfam_output2" ), "human", "ls" );
+            parser.setEValueMaximum( 0.2 );
+            parser.setIgnoreDufs( true );
+            parser.setReturnType( HmmPfamOutputParser.ReturnType.UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN );
+            final List<Protein> domain_collections = parser.parse();
+            final BasicGenomeWideCombinableDomains cdcc = BasicGenomeWideCombinableDomains
+                    .createInstance( domain_collections, false, new BasicSpecies( "human" ) );
+            CombinableDomains cd = cdcc.get( new DomainId( "A" ) );
+            if ( cd.getKeyDomainCount() != 9 ) {
+                return false;
+            }
+            if ( cd.getKeyDomainProteinsCount() != 7 ) {
+                return false;
+            }
+            if ( cd.getNumberOfCombinableDomains() != 11 ) {
+                return false;
+            }
+            if ( cd.getNumberOfProteinsExhibitingCombination( new SimpleDomain( "A" ).getDomainId() ) != 2 ) {
+                return false;
+            }
+            if ( cd.getNumberOfProteinsExhibitingCombination( new SimpleDomain( "B" ).getDomainId() ) != 6 ) {
+                return false;
+            }
+            if ( cd.getNumberOfProteinsExhibitingCombination( new SimpleDomain( "C" ).getDomainId() ) != 4 ) {
+                return false;
+            }
+            if ( cd.getNumberOfProteinsExhibitingCombination( new SimpleDomain( "D" ).getDomainId() ) != 3 ) {
+                return false;
+            }
+            if ( cd.getNumberOfProteinsExhibitingCombination( new SimpleDomain( "E" ).getDomainId() ) != 1 ) {
+                return false;
+            }
+            if ( cd.getNumberOfProteinsExhibitingCombination( new SimpleDomain( "U" ).getDomainId() ) != 1 ) {
+                return false;
+            }
+            if ( cd.getNumberOfProteinsExhibitingCombination( new SimpleDomain( "V" ).getDomainId() ) != 1 ) {
+                return false;
+            }
+            if ( cd.getNumberOfProteinsExhibitingCombination( new SimpleDomain( "W" ).getDomainId() ) != 1 ) {
+                return false;
+            }
+            if ( cd.getNumberOfProteinsExhibitingCombination( new SimpleDomain( "X" ).getDomainId() ) != 1 ) {
+                return false;
+            }
+            if ( cd.getNumberOfProteinsExhibitingCombination( new SimpleDomain( "Y" ).getDomainId() ) != 1 ) {
+                return false;
+            }
+            if ( cd.getNumberOfProteinsExhibitingCombination( new SimpleDomain( "Z" ).getDomainId() ) != 1 ) {
+                return false;
+            }
+            if ( cd.getNumberOfProteinsExhibitingCombination( new SimpleDomain( "NN" ).getDomainId() ) != 0 ) {
+                return false;
+            }
+            if ( cd.getKeyDomainCount() != 9 ) {
+                return false;
+            }
+            cd = cdcc.get( new DomainId( "B" ) );
+            if ( cd.getKeyDomainCount() != 12 ) {
+                return false;
+            }
+            if ( cd.getKeyDomainProteinsCount() != 7 ) {
+                return false;
+            }
+            if ( cd.getNumberOfCombinableDomains() != 11 ) {
+                return false;
+            }
+            if ( cd.getNumberOfProteinsExhibitingCombination( new SimpleDomain( "A" ).getDomainId() ) != 6 ) {
+                return false;
+            }
+            if ( cd.getNumberOfProteinsExhibitingCombination( new SimpleDomain( "B" ).getDomainId() ) != 4 ) {
+                return false;
+            }
+            if ( cd.getNumberOfProteinsExhibitingCombination( new SimpleDomain( "C" ).getDomainId() ) != 4 ) {
+                return false;
+            }
+            if ( cd.getNumberOfProteinsExhibitingCombination( new SimpleDomain( "D" ).getDomainId() ) != 3 ) {
+                return false;
+            }
+            if ( cd.getNumberOfProteinsExhibitingCombination( new SimpleDomain( "E" ).getDomainId() ) != 1 ) {
+                return false;
+            }
+            if ( cd.getNumberOfProteinsExhibitingCombination( new SimpleDomain( "U" ).getDomainId() ) != 1 ) {
+                return false;
+            }
+            if ( cd.getNumberOfProteinsExhibitingCombination( new SimpleDomain( "V" ).getDomainId() ) != 1 ) {
+                return false;
+            }
+            if ( cd.getNumberOfProteinsExhibitingCombination( new SimpleDomain( "W" ).getDomainId() ) != 1 ) {
+                return false;
+            }
+            if ( cd.getNumberOfProteinsExhibitingCombination( new SimpleDomain( "X" ).getDomainId() ) != 1 ) {
+                return false;
+            }
+            if ( cd.getNumberOfProteinsExhibitingCombination( new SimpleDomain( "Y" ).getDomainId() ) != 1 ) {
+                return false;
+            }
+            if ( cd.getNumberOfProteinsExhibitingCombination( new SimpleDomain( "Z" ).getDomainId() ) != 1 ) {
+                return false;
+            }
+            if ( cd.getNumberOfProteinsExhibitingCombination( new SimpleDomain( "NN" ).getDomainId() ) != 0 ) {
+                return false;
+            }
+            if ( cd.getKeyDomainCount() != 12 ) {
+                return false;
+            }
+            cd = cdcc.get( new DomainId( "C" ) );
+            if ( cd.getKeyDomainCount() != 10 ) {
+                return false;
+            }
+            if ( cd.getKeyDomainProteinsCount() != 7 ) {
+                return false;
+            }
+            if ( cd.getNumberOfCombinableDomains() != 11 ) {
+                return false;
+            }
+            if ( cd.getNumberOfProteinsExhibitingCombination( new SimpleDomain( "A" ).getDomainId() ) != 4 ) {
+                return false;
+            }
+            if ( cd.getNumberOfProteinsExhibitingCombination( new SimpleDomain( "B" ).getDomainId() ) != 4 ) {
+                return false;
+            }
+            if ( cd.getNumberOfProteinsExhibitingCombination( new SimpleDomain( "C" ).getDomainId() ) != 2 ) {
+                return false;
+            }
+            if ( cd.getNumberOfProteinsExhibitingCombination( new SimpleDomain( "D" ).getDomainId() ) != 3 ) {
+                return false;
+            }
+            if ( cd.getNumberOfProteinsExhibitingCombination( new SimpleDomain( "E" ).getDomainId() ) != 1 ) {
+                return false;
+            }
+            if ( cd.getNumberOfProteinsExhibitingCombination( new SimpleDomain( "U" ).getDomainId() ) != 1 ) {
+                return false;
+            }
+            if ( cd.getNumberOfProteinsExhibitingCombination( new SimpleDomain( "V" ).getDomainId() ) != 1 ) {
+                return false;
+            }
+            if ( cd.getNumberOfProteinsExhibitingCombination( new SimpleDomain( "W" ).getDomainId() ) != 1 ) {
+                return false;
+            }
+            if ( cd.getNumberOfProteinsExhibitingCombination( new SimpleDomain( "X" ).getDomainId() ) != 2 ) {
+                return false;
+            }
+            if ( cd.getNumberOfProteinsExhibitingCombination( new SimpleDomain( "Y" ).getDomainId() ) != 2 ) {
+                return false;
+            }
+            if ( cd.getNumberOfProteinsExhibitingCombination( new SimpleDomain( "Z" ).getDomainId() ) != 2 ) {
+                return false;
+            }
+            if ( cd.getNumberOfProteinsExhibitingCombination( new SimpleDomain( "NN" ).getDomainId() ) != 0 ) {
+                return false;
+            }
+            cd = cdcc.get( new DomainId( "D" ) );
+            if ( cd.getKeyDomainCount() != 15 ) {
+                return false;
+            }
+            if ( cd.getKeyDomainProteinsCount() != 6 ) {
+                return false;
+            }
+            if ( cd.getNumberOfCombinableDomains() != 11 ) {
+                return false;
+            }
+            cd = cdcc.get( new DomainId( "E" ) );
+            if ( cd.getNumberOfCombinableDomains() != 10 ) {
+                return false;
+            }
+            if ( cd.getKeyDomainCount() != 1 ) {
+                return false;
+            }
+            if ( cd.getKeyDomainProteinsCount() != 1 ) {
+                return false;
+            }
+            cd = cdcc.get( new DomainId( "U" ) );
+            if ( cd.getNumberOfCombinableDomains() != 11 ) {
+                return false;
+            }
+            if ( cd.getKeyDomainCount() != 6 ) {
+                return false;
+            }
+            if ( cd.getKeyDomainProteinsCount() != 3 ) {
+                return false;
+            }
+            cd = cdcc.get( new DomainId( "V" ) );
+            if ( cd.getNumberOfCombinableDomains() != 11 ) {
+                return false;
+            }
+            if ( cd.getKeyDomainCount() != 3 ) {
+                return false;
+            }
+            if ( cd.getKeyDomainProteinsCount() != 2 ) {
+                return false;
+            }
+            cd = cdcc.get( new DomainId( "W" ) );
+            if ( cd.getNumberOfCombinableDomains() != 10 ) {
+                return false;
+            }
+            if ( cd.getKeyDomainCount() != 2 ) {
+                return false;
+            }
+            if ( cd.getKeyDomainProteinsCount() != 2 ) {
+                return false;
+            }
+            cd = cdcc.get( new DomainId( "X" ) );
+            if ( cd.getNumberOfCombinableDomains() != 10 ) {
+                return false;
+            }
+            if ( cd.getKeyDomainCount() != 2 ) {
+                return false;
+            }
+            cd = cdcc.get( new DomainId( "Y" ) );
+            if ( cd.getNumberOfCombinableDomains() != 10 ) {
+                return false;
+            }
+            cd = cdcc.get( new DomainId( "Z" ) );
+            if ( cd.getNumberOfCombinableDomains() != 10 ) {
+                return false;
+            }
+            cd = cdcc.get( new DomainId( "NN" ) );
+            if ( cd.getKeyDomainCount() != 1 ) {
+                return false;
+            }
+            if ( cd.getKeyDomainProteinsCount() != 1 ) {
+                return false;
+            }
+            if ( cd.getNumberOfCombinableDomains() != 0 ) {
+                return false;
+            }
+            if ( cd.getNumberOfProteinsExhibitingCombination( new SimpleDomain( "NN" ).getDomainId() ) != 0 ) {
+                return false;
+            }
+            cd = cdcc.get( new DomainId( "MM" ) );
+            if ( cd.getNumberOfCombinableDomains() != 1 ) {
+                return false;
+            }
+            if ( cd.getNumberOfProteinsExhibitingCombination( new SimpleDomain( "MM" ).getDomainId() ) != 0 ) {
+                return false;
+            }
+            if ( cd.getNumberOfProteinsExhibitingCombination( new SimpleDomain( "OO" ).getDomainId() ) != 1 ) {
+                return false;
+            }
+            cd = cdcc.get( new DomainId( "OO" ) );
+            if ( cd.getNumberOfCombinableDomains() != 2 ) {
+                return false;
+            }
+            if ( cd.getNumberOfProteinsExhibitingCombination( new SimpleDomain( "OO" ).getDomainId() ) != 1 ) {
+                return false;
+            }
+            if ( cd.getNumberOfProteinsExhibitingCombination( new SimpleDomain( "MM" ).getDomainId() ) != 1 ) {
+                return false;
+            }
+            cd = cdcc.get( new DomainId( "QQ" ) );
+            if ( cd.getNumberOfCombinableDomains() != 1 ) {
+                return false;
+            }
+            if ( cd.getKeyDomainCount() != 17 ) {
+                return false;
+            }
+            if ( cd.getKeyDomainProteinsCount() != 4 ) {
+                return false;
+            }
+            if ( cd.getNumberOfProteinsExhibitingCombination( new SimpleDomain( "QQ" ).getDomainId() ) != 3 ) {
+                return false;
+            }
+            cd = cdcc.get( new DomainId( "PP" ) );
+            if ( cd.getNumberOfCombinableDomains() != 0 ) {
+                return false;
+            }
+            if ( cd.getKeyDomainCount() != 2 ) {
+                return false;
+            }
+            if ( cd.getKeyDomainProteinsCount() != 2 ) {
+                return false;
+            }
+            cd = cdcc.get( new DomainId( "singlet" ) );
+            if ( cd.getKeyDomainCount() != 1 ) {
+                return false;
+            }
+            if ( cd.getKeyDomainProteinsCount() != 1 ) {
+                return false;
+            }
+            if ( cd.getNumberOfCombinableDomains() != 0 ) {
+                return false;
+            }
+            if ( cd.getNumberOfProteinsExhibitingCombination( new SimpleDomain( "singlet" ).getDomainId() ) != 0 ) {
+                return false;
+            }
+            cd = cdcc.get( new DomainId( "three" ) );
+            if ( cd.getKeyDomainCount() != 3 ) {
+                return false;
+            }
+            if ( cd.getKeyDomainProteinsCount() != 1 ) {
+                return false;
+            }
+            if ( cd.getNumberOfCombinableDomains() != 1 ) {
+                return false;
+            }
+            if ( cd.getNumberOfProteinsExhibitingCombination( new DomainId( "three" ) ) != 1 ) {
+                return false;
+            }
+            if ( cd.getNumberOfProteinsExhibitingCombination( new DomainId( "so_far_so_bad" ) ) != 0 ) {
+                return false;
+            }
+            // Ignore combinations with same:
+            final BasicGenomeWideCombinableDomains cdcc2 = BasicGenomeWideCombinableDomains
+                    .createInstance( domain_collections,
+                                     true,
+                                     new BasicSpecies( "human" ),
+                                     null,
+                                     DomainCombinationType.BASIC );
+            cd = cdcc2.get( new DomainId( "A" ) );
+            if ( cd.getKeyDomainCount() != 9 ) {
+                return false;
+            }
+            if ( cd.getKeyDomainProteinsCount() != 7 ) {
+                return false;
+            }
+            if ( cd.getNumberOfCombinableDomains() != 10 ) {
+                return false;
+            }
+            if ( cd.getNumberOfProteinsExhibitingCombination( new SimpleDomain( "A" ).getDomainId() ) != 0 ) {
+                return false;
+            }
+            if ( cd.getNumberOfProteinsExhibitingCombination( new SimpleDomain( "B" ).getDomainId() ) != 6 ) {
+                return false;
+            }
+            if ( cd.getNumberOfProteinsExhibitingCombination( new SimpleDomain( "C" ).getDomainId() ) != 4 ) {
+                return false;
+            }
+            if ( cd.getNumberOfProteinsExhibitingCombination( new SimpleDomain( "D" ).getDomainId() ) != 3 ) {
+                return false;
+            }
+            if ( cd.getNumberOfProteinsExhibitingCombination( new SimpleDomain( "E" ).getDomainId() ) != 1 ) {
+                return false;
+            }
+            if ( cd.getNumberOfProteinsExhibitingCombination( new DomainId( "U" ) ) != 1 ) {
+                return false;
+            }
+            if ( cd.getNumberOfProteinsExhibitingCombination( new DomainId( "V" ) ) != 1 ) {
+                return false;
+            }
+            if ( cd.getNumberOfProteinsExhibitingCombination( new DomainId( "W" ) ) != 1 ) {
+                return false;
+            }
+            if ( cd.getNumberOfProteinsExhibitingCombination( new DomainId( "X" ) ) != 1 ) {
+                return false;
+            }
+            if ( cd.getNumberOfProteinsExhibitingCombination( new DomainId( "Y" ) ) != 1 ) {
+                return false;
+            }
+            if ( cd.getNumberOfProteinsExhibitingCombination( new DomainId( "Z" ) ) != 1 ) {
+                return false;
+            }
+            if ( cd.getNumberOfProteinsExhibitingCombination( new DomainId( "NN" ) ) != 0 ) {
+                return false;
+            }
+            cd = cdcc2.get( new DomainId( "B" ) );
+            if ( cd.getKeyDomainCount() != 12 ) {
+                return false;
+            }
+            if ( cd.getKeyDomainProteinsCount() != 7 ) {
+                return false;
+            }
+            if ( cd.getNumberOfCombinableDomains() != 10 ) {
+                return false;
+            }
+            if ( cd.getNumberOfProteinsExhibitingCombination( new DomainId( "A" ) ) != 6 ) {
+                return false;
+            }
+            if ( cd.getNumberOfProteinsExhibitingCombination( new DomainId( "B" ) ) != 0 ) {
+                return false;
+            }
+            if ( cd.getNumberOfProteinsExhibitingCombination( new DomainId( "C" ) ) != 4 ) {
+                return false;
+            }
+            if ( cd.getNumberOfProteinsExhibitingCombination( new DomainId( "D" ) ) != 3 ) {
+                return false;
+            }
+            if ( cd.getNumberOfProteinsExhibitingCombination( new DomainId( "E" ) ) != 1 ) {
+                return false;
+            }
+            if ( cd.getNumberOfProteinsExhibitingCombination( new DomainId( "U" ) ) != 1 ) {
+                return false;
+            }
+            if ( cd.getNumberOfProteinsExhibitingCombination( new DomainId( "V" ) ) != 1 ) {
+                return false;
+            }
+            if ( cd.getNumberOfProteinsExhibitingCombination( new DomainId( "W" ) ) != 1 ) {
+                return false;
+            }
+            if ( cd.getNumberOfProteinsExhibitingCombination( new DomainId( "X" ) ) != 1 ) {
+                return false;
+            }
+            if ( cd.getNumberOfProteinsExhibitingCombination( new DomainId( "Y" ) ) != 1 ) {
+                return false;
+            }
+            if ( cd.getNumberOfProteinsExhibitingCombination( new DomainId( "Z" ) ) != 1 ) {
+                return false;
+            }
+            if ( cd.getNumberOfProteinsExhibitingCombination( new DomainId( "NN" ) ) != 0 ) {
+                return false;
+            }
+            cd = cdcc2.get( new DomainId( "C" ) );
+            if ( cd.getKeyDomainCount() != 10 ) {
+                return false;
+            }
+            if ( cd.getKeyDomainProteinsCount() != 7 ) {
+                return false;
+            }
+            if ( cd.getNumberOfCombinableDomains() != 10 ) {
+                return false;
+            }
+            if ( cd.getNumberOfProteinsExhibitingCombination( new DomainId( "A" ) ) != 4 ) {
+                return false;
+            }
+            if ( cd.getNumberOfProteinsExhibitingCombination( new DomainId( "B" ) ) != 4 ) {
+                return false;
+            }
+            if ( cd.getNumberOfProteinsExhibitingCombination( new DomainId( "C" ) ) != 0 ) {
+                return false;
+            }
+            if ( cd.getNumberOfProteinsExhibitingCombination( new DomainId( "D" ) ) != 3 ) {
+                return false;
+            }
+            if ( cd.getNumberOfProteinsExhibitingCombination( new DomainId( "E" ) ) != 1 ) {
+                return false;
+            }
+            if ( cd.getNumberOfProteinsExhibitingCombination( new DomainId( "U" ) ) != 1 ) {
+                return false;
+            }
+            if ( cd.getNumberOfProteinsExhibitingCombination( new DomainId( "V" ) ) != 1 ) {
+                return false;
+            }
+            if ( cd.getNumberOfProteinsExhibitingCombination( new DomainId( "W" ) ) != 1 ) {
+                return false;
+            }
+            if ( cd.getNumberOfProteinsExhibitingCombination( new DomainId( "X" ) ) != 2 ) {
+                return false;
+            }
+            if ( cd.getNumberOfProteinsExhibitingCombination( new DomainId( "Y" ) ) != 2 ) {
+                return false;
+            }
+            if ( cd.getNumberOfProteinsExhibitingCombination( new DomainId( "Z" ) ) != 2 ) {
+                return false;
+            }
+            if ( cd.getNumberOfProteinsExhibitingCombination( new DomainId( "NN" ) ) != 0 ) {
+                return false;
+            }
+            cd = cdcc2.get( new DomainId( "D" ) );
+            if ( cd.getNumberOfCombinableDomains() != 10 ) {
+                return false;
+            }
+            cd = cdcc2.get( new DomainId( "E" ) );
+            if ( cd.getNumberOfCombinableDomains() != 10 ) {
+                return false;
+            }
+            if ( cd.getKeyDomainCount() != 1 ) {
+                return false;
+            }
+            cd = cdcc2.get( new DomainId( "U" ) );
+            if ( cd.getNumberOfCombinableDomains() != 10 ) {
+                return false;
+            }
+            cd = cdcc2.get( new DomainId( "V" ) );
+            if ( cd.getNumberOfCombinableDomains() != 10 ) {
+                return false;
+            }
+            cd = cdcc2.get( new DomainId( "W" ) );
+            if ( cd.getNumberOfCombinableDomains() != 10 ) {
+                return false;
+            }
+            cd = cdcc2.get( new DomainId( "X" ) );
+            if ( cd.getNumberOfCombinableDomains() != 10 ) {
+                return false;
+            }
+            cd = cdcc2.get( new DomainId( "Y" ) );
+            if ( cd.getNumberOfCombinableDomains() != 10 ) {
+                return false;
+            }
+            cd = cdcc2.get( new DomainId( "Z" ) );
+            if ( cd.getNumberOfCombinableDomains() != 10 ) {
+                return false;
+            }
+            cd = cdcc2.get( new DomainId( "NN" ) );
+            if ( cd.getNumberOfCombinableDomains() != 0 ) {
+                return false;
+            }
+            if ( cd.getNumberOfProteinsExhibitingCombination( new DomainId( "NN" ) ) != 0 ) {
+                return false;
+            }
+            cd = cdcc2.get( new DomainId( "MM" ) );
+            if ( cd.getNumberOfCombinableDomains() != 1 ) {
+                return false;
+            }
+            if ( cd.getNumberOfProteinsExhibitingCombination( new DomainId( "MM" ) ) != 0 ) {
+                return false;
+            }
+            if ( cd.getNumberOfProteinsExhibitingCombination( new DomainId( "OO" ) ) != 1 ) {
+                return false;
+            }
+            cd = cdcc2.get( new DomainId( "OO" ) );
+            if ( cd.getNumberOfCombinableDomains() != 1 ) {
+                return false;
+            }
+            if ( cd.getNumberOfProteinsExhibitingCombination( new DomainId( "OO" ) ) != 0 ) {
+                return false;
+            }
+            if ( cd.getNumberOfProteinsExhibitingCombination( new DomainId( "MM" ) ) != 1 ) {
+                return false;
+            }
+            cd = cdcc2.get( new DomainId( "QQ" ) );
+            if ( cd.getNumberOfCombinableDomains() != 0 ) {
+                return false;
+            }
+            if ( cd.getNumberOfProteinsExhibitingCombination( new DomainId( "QQ" ) ) != 0 ) {
+                return false;
+            }
+            cd = cdcc2.get( new DomainId( "singlet" ) );
+            if ( cd.getKeyDomainCount() != 1 ) {
+                return false;
+            }
+            if ( cd.getKeyDomainProteinsCount() != 1 ) {
+                return false;
+            }
+            if ( cd.getNumberOfCombinableDomains() != 0 ) {
+                return false;
+            }
+            if ( cd.getNumberOfProteinsExhibitingCombination( new DomainId( "singlet" ) ) != 0 ) {
+                return false;
+            }
+            cd = cdcc2.get( new DomainId( "three" ) );
+            if ( cd.getKeyDomainCount() != 3 ) {
+                return false;
+            }
+            if ( cd.getKeyDomainProteinsCount() != 1 ) {
+                return false;
+            }
+            if ( cd.getNumberOfCombinableDomains() != 0 ) {
+                return false;
+            }
+            if ( cd.getNumberOfProteinsExhibitingCombination( new DomainId( "three" ) ) != 0 ) {
+                return false;
+            }
+            if ( cd.getNumberOfProteinsExhibitingCombination( new DomainId( "so_far_so_bad" ) ) != 0 ) {
+                return false;
+            }
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace( System.out );
+            return false;
+        }
+        return true;
+    }
+
+    private static boolean testDomainId() {
+        try {
+            final DomainId id1 = new DomainId( "a" );
+            final DomainId id2 = new DomainId( "a" );
+            final DomainId id3 = new DomainId( "A" );
+            final DomainId id4 = new DomainId( "b" );
+            if ( !id1.equals( id1 ) ) {
+                return false;
+            }
+            if ( id1.getId().equals( "x" ) ) {
+                return false;
+            }
+            if ( id1.getId().equals( null ) ) {
+                return false;
+            }
+            if ( !id1.equals( id2 ) ) {
+                return false;
+            }
+            if ( id1.equals( id3 ) ) {
+                return false;
+            }
+            if ( id1.hashCode() != id1.hashCode() ) {
+                return false;
+            }
+            if ( id1.hashCode() != id2.hashCode() ) {
+                return false;
+            }
+            if ( id1.hashCode() == id3.hashCode() ) {
+                return false;
+            }
+            if ( id1.compareTo( id1 ) != 0 ) {
+                return false;
+            }
+            if ( id1.compareTo( id2 ) != 0 ) {
+                return false;
+            }
+            if ( id1.compareTo( id3 ) != 0 ) {
+                return false;
+            }
+            if ( id1.compareTo( id4 ) >= 0 ) {
+                return false;
+            }
+            if ( id4.compareTo( id1 ) <= 0 ) {
+                return false;
+            }
+            if ( !id4.getId().equals( "b" ) ) {
+                return false;
+            }
+            final DomainId id5 = new DomainId( " C " );
+            if ( !id5.getId().equals( "C" ) ) {
+                return false;
+            }
+            if ( id5.equals( id1 ) ) {
+                return false;
+            }
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace( System.out );
+            return false;
+        }
+        return true;
+    }
+
+    private static boolean testDomainSorting() {
+        try {
+            final Domain A = new BasicDomain( "A", ( short ) 1, ( short ) 2, ( short ) 1, ( short ) 1, 0.1, -12 );
+            final Domain B = new BasicDomain( "B", ( short ) 1, ( short ) 2, ( short ) 1, ( short ) 1, 0.1, -12 );
+            final Domain C = new BasicDomain( "C", ( short ) 1, ( short ) 2, ( short ) 1, ( short ) 1, 0.2, -12 );
+            final Domain D = new BasicDomain( "D", ( short ) 1, ( short ) 2, ( short ) 1, ( short ) 1, 0.3, -12 );
+            final Domain E = new BasicDomain( "E", ( short ) 1, ( short ) 2, ( short ) 1, ( short ) 1, 0.4, -12 );
+            final Domain F = new BasicDomain( "F", ( short ) 1, ( short ) 2, ( short ) 1, ( short ) 1, 0.5, -12 );
+            final Domain G = new BasicDomain( "G", ( short ) 1, ( short ) 2, ( short ) 1, ( short ) 1, 0.6, -12 );
+            final Domain H1 = new BasicDomain( "H", ( short ) 100, ( short ) 200, ( short ) 1, ( short ) 5, 0.7, -12 );
+            final Domain H2 = new BasicDomain( "H", ( short ) 300, ( short ) 400, ( short ) 2, ( short ) 5, 0.7, -12 );
+            final Domain H3 = new BasicDomain( "H", ( short ) 500, ( short ) 600, ( short ) 3, ( short ) 5, 0.7, -12 );
+            final Domain H4 = new BasicDomain( "H", ( short ) 700, ( short ) 800, ( short ) 4, ( short ) 5, 0.7, -12 );
+            final Domain H5 = new BasicDomain( "H", ( short ) 700, ( short ) 800, ( short ) 5, ( short ) 5, 0.7, -12 );
+            final Domain H6 = new BasicDomain( "H",
+                                               ( short ) 1199,
+                                               ( short ) 1299,
+                                               ( short ) 6,
+                                               ( short ) 6,
+                                               0.7,
+                                               -0.111 );
+            final Domain H7 = new BasicDomain( "H7", ( short ) 700, ( short ) 800, ( short ) 5, ( short ) 5, 0.7, -12 );
+            final Domain H8 = new BasicDomain( "H7", ( short ) 700, ( short ) 800, ( short ) 5, ( short ) 200, 0.7, -12 );
+            final Protein protein = new BasicProtein( "00", "bat" );
+            protein.addProteinDomain( H5 );
+            protein.addProteinDomain( H2 );
+            protein.addProteinDomain( H7 );
+            protein.addProteinDomain( H6 );
+            protein.addProteinDomain( A );
+            protein.addProteinDomain( G );
+            protein.addProteinDomain( H4 );
+            protein.addProteinDomain( D );
+            protein.addProteinDomain( H1 );
+            protein.addProteinDomain( C );
+            protein.addProteinDomain( E );
+            protein.addProteinDomain( F );
+            protein.addProteinDomain( B );
+            protein.addProteinDomain( H3 );
+            protein.addProteinDomain( H7 );
+            protein.addProteinDomain( H7 );
+            protein.addProteinDomain( H8 );
+            final List<Domain> sorted = SurfacingUtil.sortDomainsWithAscendingConfidenceValues( protein );
+            if ( sorted.size() != 17 ) {
+                return false;
+            }
+            if ( !sorted.get( 0 ).getDomainId().getId().equals( "A" ) ) {
+                return false;
+            }
+            if ( sorted.get( 0 ).getNumber() != 1 ) {
+                return false;
+            }
+            if ( !sorted.get( 1 ).getDomainId().getId().equals( "B" ) ) {
+                return false;
+            }
+            if ( sorted.get( 1 ).getNumber() != 1 ) {
+                return false;
+            }
+            if ( !sorted.get( 2 ).getDomainId().getId().equals( "C" ) ) {
+                return false;
+            }
+            if ( sorted.get( 2 ).getNumber() != 1 ) {
+                return false;
+            }
+            if ( !sorted.get( 3 ).getDomainId().getId().equals( "D" ) ) {
+                return false;
+            }
+            if ( sorted.get( 3 ).getNumber() != 1 ) {
+                return false;
+            }
+            if ( !sorted.get( 4 ).getDomainId().getId().equals( "E" ) ) {
+                return false;
+            }
+            if ( sorted.get( 4 ).getNumber() != 1 ) {
+                return false;
+            }
+            if ( !sorted.get( 5 ).getDomainId().getId().equals( "F" ) ) {
+                return false;
+            }
+            if ( sorted.get( 5 ).getNumber() != 1 ) {
+                return false;
+            }
+            if ( !sorted.get( 6 ).getDomainId().getId().equals( "G" ) ) {
+                return false;
+            }
+            if ( sorted.get( 6 ).getNumber() != 1 ) {
+                return false;
+            }
+            if ( !sorted.get( 7 ).getDomainId().getId().equals( "H" ) ) {
+                return false;
+            }
+            if ( sorted.get( 7 ).getNumber() != 5 ) {
+                return false;
+            }
+            if ( !sorted.get( 8 ).getDomainId().getId().equals( "H" ) ) {
+                return false;
+            }
+            if ( sorted.get( 8 ).getNumber() != 2 ) {
+                return false;
+            }
+            if ( !sorted.get( 9 ).getDomainId().getId().equals( "H" ) ) {
+                return false;
+            }
+            if ( sorted.get( 9 ).getNumber() != 6 ) {
+                return false;
+            }
+            if ( !sorted.get( 10 ).getDomainId().getId().equals( "H" ) ) {
+                return false;
+            }
+            if ( sorted.get( 10 ).getNumber() != 4 ) {
+                return false;
+            }
+            if ( !sorted.get( 11 ).getDomainId().getId().equals( "H" ) ) {
+                return false;
+            }
+            if ( sorted.get( 11 ).getNumber() != 1 ) {
+                return false;
+            }
+            if ( sorted.get( 11 ).getTotalCount() != 5 ) {
+                return false;
+            }
+            if ( !sorted.get( 12 ).getDomainId().getId().equals( "H" ) ) {
+                return false;
+            }
+            if ( sorted.get( 12 ).getNumber() != 3 ) {
+                return false;
+            }
+            if ( !sorted.get( 13 ).getDomainId().getId().equals( "H7" ) ) {
+                return false;
+            }
+            if ( sorted.get( 13 ).getNumber() != 5 ) {
+                return false;
+            }
+            if ( !sorted.get( 14 ).getDomainId().getId().equals( "H7" ) ) {
+                return false;
+            }
+            if ( sorted.get( 14 ).getNumber() != 5 ) {
+                return false;
+            }
+            if ( !sorted.get( 15 ).getDomainId().getId().equals( "H7" ) ) {
+                return false;
+            }
+            if ( sorted.get( 15 ).getNumber() != 5 ) {
+                return false;
+            }
+            // To check if sorting is stable [as claimed by Sun for
+            // Collections.sort( List )]
+            if ( !sorted.get( 16 ).getDomainId().getId().equals( "H7" ) ) {
+                return false;
+            }
+            if ( sorted.get( 16 ).getNumber() != 5 ) {
+                return false;
+            }
+            if ( sorted.get( 16 ).getTotalCount() != 200 ) {
+                return false;
+            }
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace( System.out );
+            return false;
+        }
+        return true;
+    }
+
+    private static boolean testEngulfingOverlapRemoval() {
+        try {
+            final Domain d0 = new BasicDomain( "d0", 0, 8, ( short ) 1, ( short ) 1, 0.1, 1 );
+            final Domain d1 = new BasicDomain( "d1", 0, 1, ( short ) 1, ( short ) 1, 0.1, 1 );
+            final Domain d2 = new BasicDomain( "d2", 0, 2, ( short ) 1, ( short ) 1, 0.1, 1 );
+            final Domain d3 = new BasicDomain( "d3", 7, 8, ( short ) 1, ( short ) 1, 0.1, 1 );
+            final Domain d4 = new BasicDomain( "d4", 7, 9, ( short ) 1, ( short ) 1, 0.1, 1 );
+            final Domain d5 = new BasicDomain( "d4", 0, 9, ( short ) 1, ( short ) 1, 0.1, 1 );
+            final Domain d6 = new BasicDomain( "d4", 4, 5, ( short ) 1, ( short ) 1, 0.1, 1 );
+            final List<Boolean> covered = new ArrayList<Boolean>();
+            covered.add( true ); // 0
+            covered.add( false ); // 1
+            covered.add( true ); // 2
+            covered.add( false ); // 3
+            covered.add( true ); // 4
+            covered.add( true ); // 5
+            covered.add( false ); // 6
+            covered.add( true ); // 7
+            covered.add( true ); // 8
+            if ( SurfacingUtil.isEngulfed( d0, covered ) ) {
+                return false;
+            }
+            if ( SurfacingUtil.isEngulfed( d1, covered ) ) {
+                return false;
+            }
+            if ( SurfacingUtil.isEngulfed( d2, covered ) ) {
+                return false;
+            }
+            if ( !SurfacingUtil.isEngulfed( d3, covered ) ) {
+                return false;
+            }
+            if ( SurfacingUtil.isEngulfed( d4, covered ) ) {
+                return false;
+            }
+            if ( SurfacingUtil.isEngulfed( d5, covered ) ) {
+                return false;
+            }
+            if ( !SurfacingUtil.isEngulfed( d6, covered ) ) {
+                return false;
+            }
+            final Domain a = new BasicDomain( "a", 0, 10, ( short ) 1, ( short ) 1, 0.1, 1 );
+            final Domain b = new BasicDomain( "b", 8, 20, ( short ) 1, ( short ) 1, 0.2, 1 );
+            final Domain c = new BasicDomain( "c", 15, 16, ( short ) 1, ( short ) 1, 0.3, 1 );
+            final Protein abc = new BasicProtein( "abc", "nemve" );
+            abc.addProteinDomain( a );
+            abc.addProteinDomain( b );
+            abc.addProteinDomain( c );
+            final Protein abc_r1 = SurfacingUtil.removeOverlappingDomains( 3, false, abc );
+            final Protein abc_r2 = SurfacingUtil.removeOverlappingDomains( 3, true, abc );
+            if ( abc.getNumberOfProteinDomains() != 3 ) {
+                return false;
+            }
+            if ( abc_r1.getNumberOfProteinDomains() != 3 ) {
+                return false;
+            }
+            if ( abc_r2.getNumberOfProteinDomains() != 2 ) {
+                return false;
+            }
+            if ( !abc_r2.getProteinDomain( 0 ).getDomainId().getId().equals( "a" ) ) {
+                return false;
+            }
+            if ( !abc_r2.getProteinDomain( 1 ).getDomainId().getId().equals( "b" ) ) {
+                return false;
+            }
+            final Domain d = new BasicDomain( "d", 0, 10, ( short ) 1, ( short ) 1, 0.1, 1 );
+            final Domain e = new BasicDomain( "e", 8, 20, ( short ) 1, ( short ) 1, 0.3, 1 );
+            final Domain f = new BasicDomain( "f", 15, 16, ( short ) 1, ( short ) 1, 0.2, 1 );
+            final Protein def = new BasicProtein( "def", "nemve" );
+            def.addProteinDomain( d );
+            def.addProteinDomain( e );
+            def.addProteinDomain( f );
+            final Protein def_r1 = SurfacingUtil.removeOverlappingDomains( 5, false, def );
+            final Protein def_r2 = SurfacingUtil.removeOverlappingDomains( 5, true, def );
+            if ( def.getNumberOfProteinDomains() != 3 ) {
+                return false;
+            }
+            if ( def_r1.getNumberOfProteinDomains() != 3 ) {
+                return false;
+            }
+            if ( def_r2.getNumberOfProteinDomains() != 3 ) {
+                return false;
+            }
+            if ( !def_r2.getProteinDomain( 0 ).getDomainId().getId().equals( "d" ) ) {
+                return false;
+            }
+            if ( !def_r2.getProteinDomain( 1 ).getDomainId().getId().equals( "f" ) ) {
+                return false;
+            }
+            if ( !def_r2.getProteinDomain( 2 ).getDomainId().getId().equals( "e" ) ) {
+                return false;
+            }
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace( System.out );
+            return false;
+        }
+        return true;
+    }
+
+    private static boolean testGenomeWideCombinableDomains() {
+        try {
+            final Domain a = new BasicDomain( "a", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final Domain b = new BasicDomain( "b", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final Domain c = new BasicDomain( "c", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final Domain d = new BasicDomain( "d", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final Domain e = new BasicDomain( "e", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final Domain f = new BasicDomain( "f", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final Domain g = new BasicDomain( "g", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final Domain h = new BasicDomain( "h", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final Domain x = new BasicDomain( "x", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final Protein eel_0 = new BasicProtein( "0", "eel" );
+            final Protein eel_1 = new BasicProtein( "1", "eel" );
+            final Protein eel_2 = new BasicProtein( "2", "eel" );
+            final Protein eel_3 = new BasicProtein( "3", "eel" );
+            final Protein eel_4 = new BasicProtein( "4", "eel" );
+            final Protein eel_5 = new BasicProtein( "5", "eel" );
+            final Protein eel_6 = new BasicProtein( "6", "eel" );
+            eel_1.addProteinDomain( a );
+            eel_2.addProteinDomain( a );
+            eel_2.addProteinDomain( b );
+            eel_3.addProteinDomain( a );
+            eel_3.addProteinDomain( a );
+            eel_3.addProteinDomain( b );
+            eel_4.addProteinDomain( a );
+            eel_4.addProteinDomain( b );
+            eel_4.addProteinDomain( c );
+            eel_4.addProteinDomain( d );
+            eel_4.addProteinDomain( e );
+            eel_5.addProteinDomain( e );
+            eel_5.addProteinDomain( e );
+            eel_5.addProteinDomain( f );
+            eel_5.addProteinDomain( f );
+            eel_5.addProteinDomain( f );
+            eel_5.addProteinDomain( f );
+            eel_6.addProteinDomain( g );
+            eel_6.addProteinDomain( h );
+            final List<Protein> protein_list_eel = new ArrayList<Protein>();
+            protein_list_eel.add( eel_0 );
+            protein_list_eel.add( eel_1 );
+            protein_list_eel.add( eel_2 );
+            protein_list_eel.add( eel_3 );
+            protein_list_eel.add( eel_4 );
+            protein_list_eel.add( eel_5 );
+            protein_list_eel.add( eel_6 );
+            final BasicGenomeWideCombinableDomains eel_not_ignore = BasicGenomeWideCombinableDomains
+                    .createInstance( protein_list_eel, false, new BasicSpecies( "eel" ) );
+            final BasicGenomeWideCombinableDomains eel_ignore = BasicGenomeWideCombinableDomains
+                    .createInstance( protein_list_eel, true, new BasicSpecies( "eel" ) );
+            if ( !eel_not_ignore.contains( new DomainId( "a" ) ) ) {
+                return false;
+            }
+            if ( !eel_not_ignore.contains( new DomainId( "b" ) ) ) {
+                return false;
+            }
+            if ( !eel_not_ignore.contains( new DomainId( "c" ) ) ) {
+                return false;
+            }
+            if ( !eel_not_ignore.contains( new DomainId( "d" ) ) ) {
+                return false;
+            }
+            if ( !eel_not_ignore.contains( new DomainId( "e" ) ) ) {
+                return false;
+            }
+            if ( !eel_not_ignore.contains( new DomainId( "f" ) ) ) {
+                return false;
+            }
+            if ( !eel_not_ignore.contains( new DomainId( "g" ) ) ) {
+                return false;
+            }
+            if ( !eel_not_ignore.contains( new DomainId( "h" ) ) ) {
+                return false;
+            }
+            if ( eel_not_ignore.contains( new DomainId( "x" ) ) ) {
+                return false;
+            }
+            if ( !eel_ignore.contains( new DomainId( "a" ) ) ) {
+                return false;
+            }
+            if ( !eel_ignore.contains( new DomainId( "b" ) ) ) {
+                return false;
+            }
+            if ( !eel_ignore.contains( new DomainId( "c" ) ) ) {
+                return false;
+            }
+            if ( !eel_ignore.contains( new DomainId( "d" ) ) ) {
+                return false;
+            }
+            if ( !eel_ignore.contains( new DomainId( "e" ) ) ) {
+                return false;
+            }
+            if ( !eel_ignore.contains( new DomainId( "f" ) ) ) {
+                return false;
+            }
+            if ( !eel_ignore.contains( new DomainId( "g" ) ) ) {
+                return false;
+            }
+            if ( !eel_ignore.contains( new DomainId( "h" ) ) ) {
+                return false;
+            }
+            if ( eel_ignore.contains( new DomainId( "x" ) ) ) {
+                return false;
+            }
+            if ( eel_not_ignore.getSize() != 8 ) {
+                return false;
+            }
+            if ( eel_ignore.getSize() != 8 ) {
+                return false;
+            }
+            if ( eel_not_ignore.get( new DomainId( "a" ) ).getCombinableDomainsIds().size() != 5 ) {
+                return false;
+            }
+            if ( eel_not_ignore.get( new DomainId( "b" ) ).getCombinableDomainsIds().size() != 4 ) {
+                return false;
+            }
+            if ( eel_not_ignore.get( new DomainId( "c" ) ).getCombinableDomainsIds().size() != 4 ) {
+                return false;
+            }
+            if ( eel_not_ignore.get( new DomainId( "d" ) ).getCombinableDomainsIds().size() != 4 ) {
+                return false;
+            }
+            if ( eel_not_ignore.get( new DomainId( "e" ) ).getCombinableDomainsIds().size() != 6 ) {
+                return false;
+            }
+            if ( eel_not_ignore.get( new DomainId( "f" ) ).getCombinableDomainsIds().size() != 2 ) {
+                return false;
+            }
+            if ( eel_not_ignore.get( new DomainId( "g" ) ).getCombinableDomainsIds().size() != 1 ) {
+                return false;
+            }
+            if ( eel_not_ignore.get( new DomainId( "h" ) ).getCombinableDomainsIds().size() != 1 ) {
+                return false;
+            }
+            if ( eel_ignore.get( new DomainId( "a" ) ).getCombinableDomainsIds().size() != 4 ) {
+                return false;
+            }
+            if ( eel_ignore.get( new DomainId( "b" ) ).getCombinableDomainsIds().size() != 4 ) {
+                return false;
+            }
+            if ( eel_ignore.get( new DomainId( "c" ) ).getCombinableDomainsIds().size() != 4 ) {
+                return false;
+            }
+            if ( eel_ignore.get( new DomainId( "d" ) ).getCombinableDomainsIds().size() != 4 ) {
+                return false;
+            }
+            if ( eel_ignore.get( new DomainId( "e" ) ).getCombinableDomainsIds().size() != 5 ) {
+                return false;
+            }
+            if ( eel_ignore.get( new DomainId( "f" ) ).getCombinableDomainsIds().size() != 1 ) {
+                return false;
+            }
+            if ( eel_ignore.get( new DomainId( "g" ) ).getCombinableDomainsIds().size() != 1 ) {
+                return false;
+            }
+            if ( eel_ignore.get( new DomainId( "h" ) ).getCombinableDomainsIds().size() != 1 ) {
+                return false;
+            }
+            if ( eel_not_ignore.getAllDomainIds().size() != 8 ) {
+                return false;
+            }
+            if ( !eel_not_ignore.getAllDomainIds().contains( a.getDomainId() ) ) {
+                return false;
+            }
+            if ( !eel_not_ignore.getAllDomainIds().contains( b.getDomainId() ) ) {
+                return false;
+            }
+            if ( !eel_not_ignore.getAllDomainIds().contains( c.getDomainId() ) ) {
+                return false;
+            }
+            if ( !eel_not_ignore.getAllDomainIds().contains( d.getDomainId() ) ) {
+                return false;
+            }
+            if ( !eel_not_ignore.getAllDomainIds().contains( e.getDomainId() ) ) {
+                return false;
+            }
+            if ( !eel_not_ignore.getAllDomainIds().contains( f.getDomainId() ) ) {
+                return false;
+            }
+            if ( !eel_not_ignore.getAllDomainIds().contains( g.getDomainId() ) ) {
+                return false;
+            }
+            if ( !eel_not_ignore.getAllDomainIds().contains( h.getDomainId() ) ) {
+                return false;
+            }
+            if ( eel_not_ignore.getAllDomainIds().contains( x.getDomainId() ) ) {
+                return false;
+            }
+            if ( eel_ignore.getAllDomainIds().size() != 8 ) {
+                return false;
+            }
+            if ( !eel_ignore.getAllDomainIds().contains( a.getDomainId() ) ) {
+                return false;
+            }
+            if ( !eel_ignore.getAllDomainIds().contains( b.getDomainId() ) ) {
+                return false;
+            }
+            if ( !eel_ignore.getAllDomainIds().contains( c.getDomainId() ) ) {
+                return false;
+            }
+            if ( !eel_ignore.getAllDomainIds().contains( d.getDomainId() ) ) {
+                return false;
+            }
+            if ( !eel_ignore.getAllDomainIds().contains( e.getDomainId() ) ) {
+                return false;
+            }
+            if ( !eel_ignore.getAllDomainIds().contains( f.getDomainId() ) ) {
+                return false;
+            }
+            if ( !eel_ignore.getAllDomainIds().contains( g.getDomainId() ) ) {
+                return false;
+            }
+            if ( !eel_ignore.getAllDomainIds().contains( h.getDomainId() ) ) {
+                return false;
+            }
+            if ( eel_ignore.getAllDomainIds().contains( x.getDomainId() ) ) {
+                return false;
+            }
+            final SortedSet<BinaryDomainCombination> bc0 = eel_not_ignore.toBinaryDomainCombinations();
+            if ( bc0.size() != 15 ) {
+                return false;
+            }
+            if ( !bc0.contains( new BasicBinaryDomainCombination( "a", "a" ) ) ) {
+                return false;
+            }
+            if ( !bc0.contains( new BasicBinaryDomainCombination( "a", "b" ) ) ) {
+                return false;
+            }
+            if ( !bc0.contains( new BasicBinaryDomainCombination( "b", "a" ) ) ) {
+                return false;
+            }
+            if ( !bc0.contains( new BasicBinaryDomainCombination( "a", "c" ) ) ) {
+                return false;
+            }
+            if ( !bc0.contains( new BasicBinaryDomainCombination( "a", "d" ) ) ) {
+                return false;
+            }
+            if ( !bc0.contains( new BasicBinaryDomainCombination( "a", "e" ) ) ) {
+                return false;
+            }
+            if ( !bc0.contains( new BasicBinaryDomainCombination( "b", "c" ) ) ) {
+                return false;
+            }
+            if ( !bc0.contains( new BasicBinaryDomainCombination( "b", "d" ) ) ) {
+                return false;
+            }
+            if ( !bc0.contains( new BasicBinaryDomainCombination( "b", "e" ) ) ) {
+                return false;
+            }
+            if ( !bc0.contains( new BasicBinaryDomainCombination( "c", "d" ) ) ) {
+                return false;
+            }
+            if ( !bc0.contains( new BasicBinaryDomainCombination( "c", "e" ) ) ) {
+                return false;
+            }
+            if ( !bc0.contains( new BasicBinaryDomainCombination( "d", "e" ) ) ) {
+                return false;
+            }
+            if ( !bc0.contains( new BasicBinaryDomainCombination( "e", "f" ) ) ) {
+                return false;
+            }
+            if ( !bc0.contains( new BasicBinaryDomainCombination( "e", "e" ) ) ) {
+                return false;
+            }
+            if ( !bc0.contains( new BasicBinaryDomainCombination( "f", "f" ) ) ) {
+                return false;
+            }
+            if ( !bc0.contains( new BasicBinaryDomainCombination( "g", "h" ) ) ) {
+                return false;
+            }
+            if ( bc0.contains( new BasicBinaryDomainCombination( "f", "a" ) ) ) {
+                return false;
+            }
+            if ( bc0.contains( new BasicBinaryDomainCombination( "f", "b" ) ) ) {
+                return false;
+            }
+            if ( bc0.contains( new BasicBinaryDomainCombination( "a", "h" ) ) ) {
+                return false;
+            }
+            if ( bc0.contains( new BasicBinaryDomainCombination( "a", "g" ) ) ) {
+                return false;
+            }
+            final SortedSet<BinaryDomainCombination> bc1 = eel_ignore.toBinaryDomainCombinations();
+            if ( bc1.size() != 12 ) {
+                return false;
+            }
+            if ( bc1.contains( new BasicBinaryDomainCombination( "a", "a" ) ) ) {
+                return false;
+            }
+            if ( !bc1.contains( new BasicBinaryDomainCombination( "a", "b" ) ) ) {
+                return false;
+            }
+            if ( !bc1.contains( new BasicBinaryDomainCombination( "b", "a" ) ) ) {
+                return false;
+            }
+            if ( !bc1.contains( new BasicBinaryDomainCombination( "a", "c" ) ) ) {
+                return false;
+            }
+            if ( !bc1.contains( new BasicBinaryDomainCombination( "a", "d" ) ) ) {
+                return false;
+            }
+            if ( !bc1.contains( new BasicBinaryDomainCombination( "a", "e" ) ) ) {
+                return false;
+            }
+            if ( !bc1.contains( new BasicBinaryDomainCombination( "b", "c" ) ) ) {
+                return false;
+            }
+            if ( !bc1.contains( new BasicBinaryDomainCombination( "b", "d" ) ) ) {
+                return false;
+            }
+            if ( !bc1.contains( new BasicBinaryDomainCombination( "b", "e" ) ) ) {
+                return false;
+            }
+            if ( !bc1.contains( new BasicBinaryDomainCombination( "c", "d" ) ) ) {
+                return false;
+            }
+            if ( !bc1.contains( new BasicBinaryDomainCombination( "c", "e" ) ) ) {
+                return false;
+            }
+            if ( !bc1.contains( new BasicBinaryDomainCombination( "d", "e" ) ) ) {
+                return false;
+            }
+            if ( !bc1.contains( new BasicBinaryDomainCombination( "e", "f" ) ) ) {
+                return false;
+            }
+            if ( !bc1.contains( new BasicBinaryDomainCombination( "g", "h" ) ) ) {
+                return false;
+            }
+            if ( bc1.contains( new BasicBinaryDomainCombination( "e", "e" ) ) ) {
+                return false;
+            }
+            if ( bc1.contains( new BasicBinaryDomainCombination( "f", "f" ) ) ) {
+                return false;
+            }
+            if ( bc1.contains( new BasicBinaryDomainCombination( "f", "a" ) ) ) {
+                return false;
+            }
+            if ( bc1.contains( new BasicBinaryDomainCombination( "f", "b" ) ) ) {
+                return false;
+            }
+            if ( bc1.contains( new BasicBinaryDomainCombination( "a", "g" ) ) ) {
+                return false;
+            }
+            if ( bc1.contains( new BasicBinaryDomainCombination( "b", "g" ) ) ) {
+                return false;
+            }
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace( System.out );
+            return false;
+        }
+        return true;
+    }
+
+    private static boolean testHmmPfamOutputParser( final File test_dir ) {
+        try {
+            final HmmPfamOutputParser parser = new HmmPfamOutputParser( new File( test_dir
+                    + ForesterUtil.getFileSeparator() + "hmmpfam_output" ), "human", "ls" );
+            parser.setEValueMaximum( 0.2 );
+            parser.setIgnoreDufs( true );
+            parser.setReturnType( HmmPfamOutputParser.ReturnType.UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN );
+            List<?> domain_collections = null;
+            domain_collections = parser.parse();
+            if ( parser.getDomainsEncountered() != 4 ) {
+                return false;
+            }
+            if ( parser.getDomainsIgnoredDueToDuf() != 0 ) {
+                return false;
+            }
+            if ( parser.getDomainsIgnoredDueToEval() != 1 ) {
+                return false;
+            }
+            if ( parser.getDomainsIgnoredDueToOverlap() != 0 ) {
+                return false;
+            }
+            if ( parser.getDomainsStored() != 3 ) {
+                return false;
+            }
+            if ( domain_collections.size() != 1 ) {
+                return false;
+            }
+            final Protein pdc = ( Protein ) domain_collections.get( 0 );
+            if ( !pdc.getProteinId().equals( new ProteinId( "ENSP00000285681" ) ) ) {
+                return false;
+            }
+            if ( !pdc.getSpecies().getSpeciesId().equals( "human" ) ) {
+                return false;
+            }
+            if ( pdc.getNumberOfProteinDomains() != 3 ) {
+                return false;
+            }
+            if ( !pdc.getAccession().equals( "acc_ENSP00000285681" ) ) {
+                return false;
+            }
+            if ( !pdc
+                    .getDescription()
+                    .equals( "pep:known chromosome:NCBI36:21:16024215:16174248:1 gene:ENSG00000155313 transcript:ENST00000285681" ) ) {
+                return false;
+            }
+            final List<Domain> uba = pdc.getProteinDomains( new DomainId( "UBA" ) );
+            final List<Domain> uim = pdc.getProteinDomains( new DomainId( "UIM" ) );
+            final List<Domain> uch = pdc.getProteinDomains( new DomainId( "UCH" ) );
+            if ( uba.size() != 1 ) {
+                return false;
+            }
+            if ( uim.size() != 2 ) {
+                return false;
+            }
+            if ( uch.size() != 0 ) {
+                return false;
+            }
+            final BasicDomain uim_domain = ( BasicDomain ) uim.get( 1 );
+            if ( !uim_domain.getDomainId().equals( new DomainId( "UIM" ) ) ) {
+                return false;
+            }
+            if ( uim_domain.getTotalCount() != 2 ) {
+                return false;
+            }
+            final BasicDomain uba_domain = ( BasicDomain ) uba.get( 0 );
+            if ( !uba_domain.getDomainId().equals( new DomainId( "UBA" ) ) ) {
+                return false;
+            }
+            if ( uba_domain.getNumber() != 1 ) {
+                return false;
+            }
+            if ( uba_domain.getTotalCount() != 1 ) {
+                return false;
+            }
+            if ( uba_domain.getFrom() != 16 ) {
+                return false;
+            }
+            if ( uba_domain.getTo() != 57 ) {
+                return false;
+            }
+            if ( !Test.isEqual( uba_domain.getPerSequenceEvalue(), 0.00084 ) ) {
+                return false;
+            }
+            if ( !Test.isEqual( uba_domain.getPerSequenceScore(), 23.2 ) ) {
+                return false;
+            }
+            final HmmPfamOutputParser parser2 = new HmmPfamOutputParser( new File( test_dir
+                    + ForesterUtil.getFileSeparator() + "hmmpfam_output_short" ), "human", "ls" );
+            parser2.setEValueMaximum( 0.2 );
+            parser2.setIgnoreDufs( true );
+            parser2.setReturnType( HmmPfamOutputParser.ReturnType.UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN );
+            List<Protein> domain_collections2 = null;
+            domain_collections2 = parser2.parse();
+            if ( parser2.getDomainsEncountered() != 4 ) {
+                return false;
+            }
+            if ( parser.getDomainsIgnoredDueToDuf() != 0 ) {
+                return false;
+            }
+            if ( parser.getDomainsIgnoredDueToEval() != 1 ) {
+                return false;
+            }
+            if ( parser.getDomainsIgnoredDueToOverlap() != 0 ) {
+                return false;
+            }
+            if ( parser2.getDomainsStored() != 3 ) {
+                return false;
+            }
+            if ( domain_collections2.size() != 1 ) {
+                return false;
+            }
+            final Protein pdc2 = domain_collections2.get( 0 );
+            if ( !pdc2.getProteinId().getId().equals( "ENSP00000285681" ) ) {
+                return false;
+            }
+            if ( !pdc2.getSpecies().getSpeciesId().equals( "human" ) ) {
+                return false;
+            }
+            if ( !pdc2.getName().equals( "" ) ) {
+                return false;
+            }
+            if ( !pdc2.getAccession().equals( "223" ) ) {
+                return false;
+            }
+            if ( !pdc2
+                    .getDescription()
+                    .equals( "pep:known chromosome:NCBI36:21:16024215:16174248:1 gene:ENSG00000155313 transcript:ENST00000285681" ) ) {
+                return false;
+            }
+            if ( pdc2.getNumberOfProteinDomains() != 3 ) {
+                return false;
+            }
+            final List<Domain> uba2 = pdc2.getProteinDomains( new DomainId( "UBA" ) );
+            final List<Domain> uim2 = pdc2.getProteinDomains( new DomainId( "UIM" ) );
+            final List<Domain> uch2 = pdc2.getProteinDomains( new DomainId( "UCH" ) );
+            if ( uba2.size() != 1 ) {
+                return false;
+            }
+            if ( uim2.size() != 2 ) {
+                return false;
+            }
+            if ( uch2.size() != 0 ) {
+                return false;
+            }
+            final BasicDomain uim_domain2 = ( BasicDomain ) uim2.get( 1 );
+            if ( !uim_domain2.getDomainId().getId().equals( "UIM" ) ) {
+                return false;
+            }
+            if ( uim_domain2.getTotalCount() != 2 ) {
+                return false;
+            }
+            final BasicDomain uba_domain2 = ( BasicDomain ) uba2.get( 0 );
+            if ( !uba_domain2.getDomainId().getId().equals( "UBA" ) ) {
+                return false;
+            }
+            if ( uba_domain2.getNumber() != 1 ) {
+                return false;
+            }
+            if ( uba_domain2.getTotalCount() != 1 ) {
+                return false;
+            }
+            if ( uba_domain2.getFrom() != 16 ) {
+                return false;
+            }
+            if ( uba_domain2.getTo() != 57 ) {
+                return false;
+            }
+            if ( !Test.isEqual( uba_domain2.getPerSequenceEvalue(), 0.00084 ) ) {
+                return false;
+            }
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace( System.out );
+            return false;
+        }
+        return true;
+    }
+
+    private static boolean testHmmPfamOutputParserWithFilter( final File test_dir ) {
+        try {
+            HmmPfamOutputParser parser = new HmmPfamOutputParser( new File( test_dir + ForesterUtil.getFileSeparator()
+                    + "hmmpfam_output3" ), "human", "ls" );
+            parser.setEValueMaximum( 0.2 );
+            parser.setIgnoreDufs( true );
+            parser.setReturnType( HmmPfamOutputParser.ReturnType.UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN );
+            List<Protein> proteins = null;
+            proteins = parser.parse();
+            if ( parser.getProteinsIgnoredDueToFilter() != 0 ) {
+                return false;
+            }
+            if ( proteins.size() != 4 ) {
+                return false;
+            }
+            //
+            Set<DomainId> filter = new TreeSet<DomainId>();
+            filter.add( new DomainId( "beauty" ) );
+            filter.add( new DomainId( "strange" ) );
+            parser = new HmmPfamOutputParser( new File( test_dir + ForesterUtil.getFileSeparator() + "hmmpfam_output3" ),
+                                              "human",
+                                              "ls",
+                                              filter,
+                                              HmmPfamOutputParser.FilterType.NEGATIVE_PROTEIN );
+            parser.setEValueMaximum( 0.2 );
+            parser.setIgnoreDufs( true );
+            parser.setReturnType( HmmPfamOutputParser.ReturnType.UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN );
+            proteins = null;
+            proteins = parser.parse();
+            if ( parser.getProteinsIgnoredDueToFilter() != 0 ) {
+                return false;
+            }
+            if ( proteins.size() != 4 ) {
+                return false;
+            }
+            //
+            filter = new TreeSet<DomainId>();
+            filter.add( new DomainId( "beauty" ) );
+            filter.add( new DomainId( "strange" ) );
+            parser = new HmmPfamOutputParser( new File( test_dir + ForesterUtil.getFileSeparator() + "hmmpfam_output3" ),
+                                              "human",
+                                              "ls",
+                                              filter,
+                                              HmmPfamOutputParser.FilterType.POSITIVE_PROTEIN );
+            parser.setEValueMaximum( 0.2 );
+            parser.setIgnoreDufs( true );
+            parser.setReturnType( HmmPfamOutputParser.ReturnType.UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN );
+            proteins = null;
+            proteins = parser.parse();
+            if ( parser.getProteinsIgnoredDueToFilter() != 4 ) {
+                return false;
+            }
+            if ( proteins.size() != 0 ) {
+                return false;
+            }
+            //
+            filter = new TreeSet<DomainId>();
+            filter.add( new DomainId( "UIM" ) );
+            filter.add( new DomainId( "A" ) );
+            filter.add( new DomainId( "C" ) );
+            parser = new HmmPfamOutputParser( new File( test_dir + ForesterUtil.getFileSeparator() + "hmmpfam_output3" ),
+                                              "human",
+                                              "ls",
+                                              filter,
+                                              HmmPfamOutputParser.FilterType.POSITIVE_PROTEIN );
+            parser.setEValueMaximum( 0.2 );
+            parser.setIgnoreDufs( true );
+            parser.setReturnType( HmmPfamOutputParser.ReturnType.UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN );
+            proteins = null;
+            proteins = parser.parse();
+            if ( parser.getProteinsIgnoredDueToFilter() != 0 ) {
+                return false;
+            }
+            if ( proteins.size() != 4 ) {
+                return false;
+            }
+            //
+            filter = new TreeSet<DomainId>();
+            filter.add( new DomainId( "UIM" ) );
+            filter.add( new DomainId( "A" ) );
+            filter.add( new DomainId( "C" ) );
+            filter.add( new DomainId( "X" ) );
+            parser = new HmmPfamOutputParser( new File( test_dir + ForesterUtil.getFileSeparator() + "hmmpfam_output3" ),
+                                              "human",
+                                              "ls",
+                                              filter,
+                                              HmmPfamOutputParser.FilterType.NEGATIVE_DOMAIN );
+            parser.setEValueMaximum( 0.2 );
+            parser.setIgnoreDufs( true );
+            parser.setReturnType( HmmPfamOutputParser.ReturnType.UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN );
+            proteins = null;
+            proteins = parser.parse();
+            if ( parser.getDomainsIgnoredDueToNegativeDomainFilter() != 7 ) {
+                return false;
+            }
+            if ( proteins.size() != 3 ) {
+                return false;
+            }
+            //
+            filter = new TreeSet<DomainId>();
+            filter.add( new DomainId( "UIM" ) );
+            filter.add( new DomainId( "A" ) );
+            filter.add( new DomainId( "C" ) );
+            parser = new HmmPfamOutputParser( new File( test_dir + ForesterUtil.getFileSeparator() + "hmmpfam_output3" ),
+                                              "human",
+                                              "ls",
+                                              filter,
+                                              HmmPfamOutputParser.FilterType.NEGATIVE_PROTEIN );
+            parser.setEValueMaximum( 0.2 );
+            parser.setIgnoreDufs( true );
+            parser.setReturnType( HmmPfamOutputParser.ReturnType.UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN );
+            proteins = null;
+            proteins = parser.parse();
+            if ( parser.getProteinsIgnoredDueToFilter() != 4 ) {
+                return false;
+            }
+            if ( proteins.size() != 0 ) {
+                return false;
+            }
+            //
+            filter = new TreeSet<DomainId>();
+            filter.add( new DomainId( "UIM" ) );
+            parser = new HmmPfamOutputParser( new File( test_dir + ForesterUtil.getFileSeparator() + "hmmpfam_output3" ),
+                                              "human",
+                                              "ls",
+                                              filter,
+                                              HmmPfamOutputParser.FilterType.NEGATIVE_PROTEIN );
+            parser.setEValueMaximum( 0.2 );
+            parser.setIgnoreDufs( true );
+            parser.setReturnType( HmmPfamOutputParser.ReturnType.UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN );
+            proteins = null;
+            proteins = parser.parse();
+            if ( parser.getProteinsIgnoredDueToFilter() != 1 ) {
+                return false;
+            }
+            if ( parser.getProteinsStored() != 3 ) {
+                return false;
+            }
+            if ( proteins.size() != 3 ) {
+                return false;
+            }
+            //
+            filter = new TreeSet<DomainId>();
+            filter.add( new DomainId( "UIM" ) );
+            parser = new HmmPfamOutputParser( new File( test_dir + ForesterUtil.getFileSeparator() + "hmmpfam_output3" ),
+                                              "human",
+                                              "ls",
+                                              filter,
+                                              HmmPfamOutputParser.FilterType.POSITIVE_PROTEIN );
+            parser.setEValueMaximum( 0.2 );
+            parser.setIgnoreDufs( true );
+            parser.setReturnType( HmmPfamOutputParser.ReturnType.UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN );
+            proteins = null;
+            proteins = parser.parse();
+            if ( parser.getProteinsIgnoredDueToFilter() != 3 ) {
+                return false;
+            }
+            if ( parser.getProteinsStored() != 1 ) {
+                return false;
+            }
+            if ( proteins.size() != 1 ) {
+                return false;
+            }
+            //
+            filter = new TreeSet<DomainId>();
+            filter.add( new DomainId( "A" ) );
+            filter.add( new DomainId( "C" ) );
+            parser = new HmmPfamOutputParser( new File( test_dir + ForesterUtil.getFileSeparator() + "hmmpfam_output3" ),
+                                              "human",
+                                              "ls",
+                                              filter,
+                                              HmmPfamOutputParser.FilterType.POSITIVE_PROTEIN );
+            parser.setEValueMaximum( 0.2 );
+            parser.setIgnoreDufs( true );
+            parser.setReturnType( HmmPfamOutputParser.ReturnType.UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN );
+            proteins = null;
+            proteins = parser.parse();
+            if ( parser.getDomainsEncountered() != 11 ) {
+                return false;
+            }
+            if ( parser.getProteinsEncountered() != 4 ) {
+                return false;
+            }
+            if ( parser.getProteinsIgnoredDueToFilter() != 1 ) {
+                return false;
+            }
+            if ( parser.getProteinsStored() != 3 ) {
+                return false;
+            }
+            if ( proteins.size() != 3 ) {
+                return false;
+            }
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace( System.out );
+            return false;
+        }
+        return true;
+    }
+
+    private static boolean testOverlapRemoval() {
+        try {
+            final Domain d0 = new BasicDomain( "d0", ( short ) 2, ( short ) 5, ( short ) 1, ( short ) 1, 0.1, 1 );
+            final Domain d1 = new BasicDomain( "d1", ( short ) 7, ( short ) 10, ( short ) 1, ( short ) 1, 0.1, 1 );
+            final Domain d2 = new BasicDomain( "d2", ( short ) 0, ( short ) 20, ( short ) 1, ( short ) 1, 0.1, 1 );
+            final Domain d3 = new BasicDomain( "d3", ( short ) 9, ( short ) 10, ( short ) 1, ( short ) 1, 0.1, 1 );
+            final Domain d4 = new BasicDomain( "d4", ( short ) 7, ( short ) 8, ( short ) 1, ( short ) 1, 0.1, 1 );
+            final List<Boolean> covered = new ArrayList<Boolean>();
+            covered.add( true ); // 0
+            covered.add( false ); // 1
+            covered.add( true ); // 2
+            covered.add( false ); // 3
+            covered.add( true ); // 4
+            covered.add( true ); // 5
+            covered.add( false ); // 6
+            covered.add( true ); // 7
+            covered.add( true ); // 8
+            if ( SurfacingUtil.calculateOverlap( d0, covered ) != 3 ) {
+                return false;
+            }
+            if ( SurfacingUtil.calculateOverlap( d1, covered ) != 2 ) {
+                return false;
+            }
+            if ( SurfacingUtil.calculateOverlap( d2, covered ) != 6 ) {
+                return false;
+            }
+            if ( SurfacingUtil.calculateOverlap( d3, covered ) != 0 ) {
+                return false;
+            }
+            if ( SurfacingUtil.calculateOverlap( d4, covered ) != 2 ) {
+                return false;
+            }
+            final Domain a = new BasicDomain( "a", ( short ) 2, ( short ) 5, ( short ) 1, ( short ) 1, 0.01, 1 );
+            final Domain b = new BasicDomain( "b", ( short ) 2, ( short ) 10, ( short ) 1, ( short ) 1, 0.1, 1 );
+            final Protein ab = new BasicProtein( "ab", "varanus" );
+            ab.addProteinDomain( a );
+            ab.addProteinDomain( b );
+            final Protein ab_s0 = SurfacingUtil.removeOverlappingDomains( 3, false, ab );
+            if ( ab.getNumberOfProteinDomains() != 2 ) {
+                return false;
+            }
+            if ( ab_s0.getNumberOfProteinDomains() != 1 ) {
+                return false;
+            }
+            if ( !ab_s0.getProteinDomain( 0 ).getDomainId().getId().equals( "a" ) ) {
+                return false;
+            }
+            final Protein ab_s1 = SurfacingUtil.removeOverlappingDomains( 4, false, ab );
+            if ( ab.getNumberOfProteinDomains() != 2 ) {
+                return false;
+            }
+            if ( ab_s1.getNumberOfProteinDomains() != 2 ) {
+                return false;
+            }
+            final Domain c = new BasicDomain( "c", ( short ) 20000, ( short ) 20500, ( short ) 1, ( short ) 1, 10, 1 );
+            final Domain d = new BasicDomain( "d",
+                                              ( short ) 10000,
+                                              ( short ) 10500,
+                                              ( short ) 1,
+                                              ( short ) 1,
+                                              0.0000001,
+                                              1 );
+            final Domain e = new BasicDomain( "e", ( short ) 5000, ( short ) 5500, ( short ) 1, ( short ) 1, 0.0001, 1 );
+            final Protein cde = new BasicProtein( "cde", "varanus" );
+            cde.addProteinDomain( c );
+            cde.addProteinDomain( d );
+            cde.addProteinDomain( e );
+            final Protein cde_s0 = SurfacingUtil.removeOverlappingDomains( 0, false, cde );
+            if ( cde.getNumberOfProteinDomains() != 3 ) {
+                return false;
+            }
+            if ( cde_s0.getNumberOfProteinDomains() != 3 ) {
+                return false;
+            }
+            final Domain f = new BasicDomain( "f", ( short ) 10, ( short ) 20, ( short ) 1, ( short ) 1, 10, 1 );
+            final Domain g = new BasicDomain( "g", ( short ) 10, ( short ) 20, ( short ) 1, ( short ) 1, 0.01, 1 );
+            final Domain h = new BasicDomain( "h", ( short ) 10, ( short ) 20, ( short ) 1, ( short ) 1, 0.0001, 1 );
+            final Domain i = new BasicDomain( "i", ( short ) 10, ( short ) 20, ( short ) 1, ( short ) 1, 0.5, 1 );
+            final Domain i2 = new BasicDomain( "i", ( short ) 5, ( short ) 30, ( short ) 1, ( short ) 1, 0.5, 10 );
+            final Protein fghi = new BasicProtein( "fghi", "varanus" );
+            fghi.addProteinDomain( f );
+            fghi.addProteinDomain( g );
+            fghi.addProteinDomain( h );
+            fghi.addProteinDomain( i );
+            fghi.addProteinDomain( i );
+            fghi.addProteinDomain( i );
+            fghi.addProteinDomain( i2 );
+            final Protein fghi_s0 = SurfacingUtil.removeOverlappingDomains( 10, false, fghi );
+            if ( fghi.getNumberOfProteinDomains() != 7 ) {
+                return false;
+            }
+            if ( fghi_s0.getNumberOfProteinDomains() != 1 ) {
+                return false;
+            }
+            if ( !fghi_s0.getProteinDomain( 0 ).getDomainId().getId().equals( "h" ) ) {
+                return false;
+            }
+            final Protein fghi_s1 = SurfacingUtil.removeOverlappingDomains( 11, false, fghi );
+            if ( fghi.getNumberOfProteinDomains() != 7 ) {
+                return false;
+            }
+            if ( fghi_s1.getNumberOfProteinDomains() != 7 ) {
+                return false;
+            }
+            final Domain j = new BasicDomain( "j", ( short ) 10, ( short ) 20, ( short ) 1, ( short ) 1, 10, 1 );
+            final Domain k = new BasicDomain( "k", ( short ) 10, ( short ) 20, ( short ) 1, ( short ) 1, 0.01, 1 );
+            final Domain l = new BasicDomain( "l", ( short ) 10, ( short ) 20, ( short ) 1, ( short ) 1, 0.0001, 1 );
+            final Domain m = new BasicDomain( "m", ( short ) 10, ( short ) 20, ( short ) 1, ( short ) 4, 0.5, 1 );
+            final Domain m0 = new BasicDomain( "m", ( short ) 10, ( short ) 20, ( short ) 2, ( short ) 4, 0.5, 1 );
+            final Domain m1 = new BasicDomain( "m", ( short ) 10, ( short ) 20, ( short ) 3, ( short ) 4, 0.5, 1 );
+            final Domain m2 = new BasicDomain( "m", ( short ) 5, ( short ) 30, ( short ) 4, ( short ) 4, 0.5, 10 );
+            final Protein jklm = new BasicProtein( "jklm", "varanus" );
+            jklm.addProteinDomain( j );
+            jklm.addProteinDomain( k );
+            jklm.addProteinDomain( l );
+            jklm.addProteinDomain( m );
+            jklm.addProteinDomain( m0 );
+            jklm.addProteinDomain( m1 );
+            jklm.addProteinDomain( m2 );
+            final Protein jklm_s0 = SurfacingUtil.removeOverlappingDomains( 10, false, jklm );
+            if ( jklm.getNumberOfProteinDomains() != 7 ) {
+                return false;
+            }
+            if ( jklm_s0.getNumberOfProteinDomains() != 1 ) {
+                return false;
+            }
+            if ( !jklm_s0.getProteinDomain( 0 ).getDomainId().getId().equals( "l" ) ) {
+                return false;
+            }
+            final Protein jklm_s1 = SurfacingUtil.removeOverlappingDomains( 11, false, jklm );
+            if ( jklm.getNumberOfProteinDomains() != 7 ) {
+                return false;
+            }
+            if ( jklm_s1.getNumberOfProteinDomains() != 7 ) {
+                return false;
+            }
+            final Domain only = new BasicDomain( "only", ( short ) 5, ( short ) 30, ( short ) 4, ( short ) 4, 0.5, 10 );
+            final Protein od = new BasicProtein( "od", "varanus" );
+            od.addProteinDomain( only );
+            final Protein od_s0 = SurfacingUtil.removeOverlappingDomains( 0, false, od );
+            if ( od.getNumberOfProteinDomains() != 1 ) {
+                return false;
+            }
+            if ( od_s0.getNumberOfProteinDomains() != 1 ) {
+                return false;
+            }
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace( System.out );
+            return false;
+        }
+        return true;
+    }
+
+    private static boolean testParsimony() {
+        try {
+            final BinaryStates X = BinaryStates.PRESENT;
+            final BinaryStates O = BinaryStates.ABSENT;
+            final GainLossStates G = GainLossStates.GAIN;
+            final GainLossStates L = GainLossStates.LOSS;
+            final GainLossStates A = GainLossStates.UNCHANGED_ABSENT;
+            final GainLossStates P = GainLossStates.UNCHANGED_PRESENT;
+            final Domain a = new BasicDomain( "A", 1, 25, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final Domain b = new BasicDomain( "B", 2, 25, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final Domain c = new BasicDomain( "C", 3, 25, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final Domain d = new BasicDomain( "D", 1, 25, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final Domain e = new BasicDomain( "E", 2, 25, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final Domain f = new BasicDomain( "F", 3, 25, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final Domain g = new BasicDomain( "G", 1, 25, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final Domain h = new BasicDomain( "H", 2, 25, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final Domain i = new BasicDomain( "I", 3, 25, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final Domain j = new BasicDomain( "J", 1, 25, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final Domain l = new BasicDomain( "L", 3, 25, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final Domain m = new BasicDomain( "M", 1, 25, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final Domain n = new BasicDomain( "N", 2, 25, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final Domain o = new BasicDomain( "O", 3, 25, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final Domain p = new BasicDomain( "P", 1, 25, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final Domain q = new BasicDomain( "Q", 2, 25, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final Domain r = new BasicDomain( "R", 3, 25, ( short ) 1, ( short ) 4, 0.1, -12 );
+            // 1 a-a a-b a-c e-f-g-h l-m
+            // 2 a-b a-c e-f-g-i n-o
+            // 3 a-b a-d e-f-g-j p-q
+            // 4 a-b a-d p-r
+            // 1 a-a a-b a-c e-f e-g e-h f-g f-h g-h l-m
+            // 2 a-b a-c e-f e-g e-i f-g f-i g-i n-o
+            // 3 a-b a-d e-f e-g e-j f-g f-j g-j p-q
+            // 4 a-b a-d p-r
+            // 1 a b c e f g h l m
+            // 2 a b c e f g i n o
+            // 3 a b d e f g j p q
+            // 4 a b d p r
+            final Protein aa1 = new BasicProtein( "aa1", "one" );
+            aa1.addProteinDomain( a );
+            aa1.addProteinDomain( a );
+            final Protein ab1 = new BasicProtein( "ab1", "one" );
+            ab1.addProteinDomain( a );
+            ab1.addProteinDomain( b );
+            final Protein ac1 = new BasicProtein( "ac1", "one" );
+            ac1.addProteinDomain( a );
+            ac1.addProteinDomain( c );
+            final Protein efgh1 = new BasicProtein( "efgh1", "one" );
+            efgh1.addProteinDomain( e );
+            efgh1.addProteinDomain( f );
+            efgh1.addProteinDomain( g );
+            efgh1.addProteinDomain( h );
+            final Protein lm1 = new BasicProtein( "lm1", "one" );
+            lm1.addProteinDomain( l );
+            lm1.addProteinDomain( m );
+            final Protein ab2 = new BasicProtein( "ab2", "two" );
+            ab2.addProteinDomain( a );
+            ab2.addProteinDomain( b );
+            final Protein ac2 = new BasicProtein( "ac2", "two" );
+            ac2.addProteinDomain( a );
+            ac2.addProteinDomain( c );
+            final Protein efgi2 = new BasicProtein( "efgi2", "two" );
+            efgi2.addProteinDomain( e );
+            efgi2.addProteinDomain( f );
+            efgi2.addProteinDomain( g );
+            efgi2.addProteinDomain( i );
+            final Protein no2 = new BasicProtein( "no2", "two" );
+            no2.addProteinDomain( n );
+            no2.addProteinDomain( o );
+            final Protein ab3 = new BasicProtein( "ab3", "three" );
+            ab3.addProteinDomain( a );
+            ab3.addProteinDomain( b );
+            final Protein ad3 = new BasicProtein( "ad3", "three" );
+            ad3.addProteinDomain( a );
+            ad3.addProteinDomain( d );
+            final Protein efgj3 = new BasicProtein( "efgj3", "three" );
+            efgj3.addProteinDomain( e );
+            efgj3.addProteinDomain( f );
+            efgj3.addProteinDomain( g );
+            efgj3.addProteinDomain( j );
+            final Protein pq3 = new BasicProtein( "pq3", "three" );
+            pq3.addProteinDomain( p );
+            pq3.addProteinDomain( q );
+            final Protein ab4 = new BasicProtein( "ab4", "four" );
+            ab4.addProteinDomain( a );
+            ab4.addProteinDomain( b );
+            final Protein ad4 = new BasicProtein( "ad4", "four" );
+            ad4.addProteinDomain( a );
+            ad4.addProteinDomain( d );
+            final Protein pr4 = new BasicProtein( "pr4", "four" );
+            pr4.addProteinDomain( p );
+            pr4.addProteinDomain( r );
+            final List<Protein> one_list = new ArrayList<Protein>();
+            one_list.add( aa1 );
+            one_list.add( ab1 );
+            one_list.add( ac1 );
+            one_list.add( efgh1 );
+            one_list.add( lm1 );
+            final List<Protein> two_list = new ArrayList<Protein>();
+            two_list.add( ab2 );
+            two_list.add( ac2 );
+            two_list.add( efgi2 );
+            two_list.add( no2 );
+            final List<Protein> three_list = new ArrayList<Protein>();
+            three_list.add( ab3 );
+            three_list.add( ad3 );
+            three_list.add( efgj3 );
+            three_list.add( pq3 );
+            final List<Protein> four_list = new ArrayList<Protein>();
+            four_list.add( ab4 );
+            four_list.add( ad4 );
+            four_list.add( pr4 );
+            final GenomeWideCombinableDomains one = BasicGenomeWideCombinableDomains
+                    .createInstance( one_list, false, new BasicSpecies( "one" ) );
+            final GenomeWideCombinableDomains two = BasicGenomeWideCombinableDomains
+                    .createInstance( two_list, false, new BasicSpecies( "two" ) );
+            final GenomeWideCombinableDomains three = BasicGenomeWideCombinableDomains
+                    .createInstance( three_list, false, new BasicSpecies( "three" ) );
+            final GenomeWideCombinableDomains four = BasicGenomeWideCombinableDomains
+                    .createInstance( four_list, false, new BasicSpecies( "four" ) );
+            final List<GenomeWideCombinableDomains> gwcd_list = new ArrayList<GenomeWideCombinableDomains>();
+            gwcd_list.add( one );
+            gwcd_list.add( two );
+            gwcd_list.add( three );
+            gwcd_list.add( four );
+            final CharacterStateMatrix<BinaryStates> matrix_d = DomainParsimonyCalculator
+                    .createMatrixOfDomainPresenceOrAbsence( gwcd_list );
+            final CharacterStateMatrix<BinaryStates> matrix_bc = DomainParsimonyCalculator
+                    .createMatrixOfBinaryDomainCombinationPresenceOrAbsence( gwcd_list );
+            // 1 a b c e f g h l m
+            // 2 a b c e f g i n o
+            // 3 a b d e f g j p q
+            // 4 a b d p r
+            if ( matrix_d.getState( 0, 0 ) != X ) {
+                return false;
+            }
+            if ( matrix_d.getState( 0, 1 ) != X ) {
+                return false;
+            }
+            if ( matrix_d.getState( 0, 2 ) != X ) {
+                return false;
+            }
+            if ( matrix_d.getState( 0, 3 ) != O ) {
+                return false;
+            }
+            if ( matrix_d.getState( 0, 4 ) != X ) {
+                return false;
+            }
+            if ( matrix_d.getState( 0, 5 ) != X ) {
+                return false;
+            }
+            if ( matrix_d.getState( 0, 6 ) != X ) {
+                return false;
+            }
+            if ( matrix_d.getState( 0, 7 ) != X ) {
+                return false;
+            }
+            if ( matrix_d.getState( 0, 8 ) != O ) {
+                return false;
+            }
+            // 1 a-a a-b a-c e-f e-g e-h f-g f-h g-h l-m
+            // 2 a-b a-c e-f e-g e-i f-g f-i g-i n-o
+            // 3 a-b a-d e-f e-g e-j f-g f-j g-j p-q
+            // 4 a-b a-d p-r
+            if ( matrix_bc.getState( 0, 0 ) != X ) {
+                return false;
+            }
+            if ( matrix_bc.getState( 0, 1 ) != X ) {
+                return false;
+            }
+            if ( matrix_bc.getState( 0, 2 ) != X ) {
+                return false;
+            }
+            if ( matrix_bc.getState( 0, 3 ) != O ) {
+                return false;
+            }
+            if ( matrix_bc.getState( 0, 4 ) != X ) {
+                return false;
+            }
+            if ( matrix_bc.getState( 1, 0 ) != O ) {
+                return false;
+            }
+            if ( matrix_bc.getState( 1, 1 ) != X ) {
+                return false;
+            }
+            if ( matrix_bc.getState( 1, 2 ) != X ) {
+                return false;
+            }
+            if ( matrix_bc.getState( 1, 3 ) != O ) {
+                return false;
+            }
+            if ( matrix_bc.getState( 1, 4 ) != X ) {
+                return false;
+            }
+            if ( matrix_bc.getState( 2, 0 ) != O ) {
+                return false;
+            }
+            if ( matrix_bc.getState( 2, 1 ) != X ) {
+                return false;
+            }
+            if ( matrix_bc.getState( 2, 2 ) != O ) {
+                return false;
+            }
+            if ( matrix_bc.getState( 2, 3 ) != X ) {
+                return false;
+            }
+            if ( matrix_bc.getState( 2, 4 ) != X ) {
+                return false;
+            }
+            final PhylogenyFactory factory0 = ParserBasedPhylogenyFactory.getInstance();
+            final String p0_str = "((one,two)1-2,(three,four)3-4)root";
+            final Phylogeny p0 = factory0.create( p0_str, new NHXParser() )[ 0 ];
+            final DomainParsimonyCalculator dp0 = DomainParsimonyCalculator.createInstance( p0, gwcd_list );
+            dp0.executeDolloParsimonyOnDomainPresence();
+            final CharacterStateMatrix<GainLossStates> gl_matrix_d = dp0.getGainLossMatrix();
+            final CharacterStateMatrix<BinaryStates> is_matrix_d = dp0.getInternalStatesMatrix();
+            dp0.executeDolloParsimonyOnBinaryDomainCombintionPresence();
+            final CharacterStateMatrix<GainLossStates> gl_matrix_bc = dp0.getGainLossMatrix();
+            final CharacterStateMatrix<BinaryStates> is_matrix_bc = dp0.getInternalStatesMatrix();
+            if ( is_matrix_d.getState( "root", "A" ) != X ) {
+                return false;
+            }
+            if ( is_matrix_d.getState( "root", "B" ) != X ) {
+                return false;
+            }
+            if ( is_matrix_d.getState( "root", "C" ) != O ) {
+                return false;
+            }
+            if ( is_matrix_d.getState( "root", "D" ) != O ) {
+                return false;
+            }
+            if ( is_matrix_d.getState( "root", "E" ) != X ) {
+                return false;
+            }
+            if ( is_matrix_bc.getState( "root", "A=A" ) != O ) {
+                return false;
+            }
+            if ( is_matrix_bc.getState( "root", "A=B" ) != X ) {
+                return false;
+            }
+            if ( is_matrix_bc.getState( "root", "A=C" ) != O ) {
+                return false;
+            }
+            if ( is_matrix_bc.getState( "root", "A=D" ) != O ) {
+                return false;
+            }
+            if ( is_matrix_bc.getState( "root", "G=H" ) != O ) {
+                return false;
+            }
+            if ( is_matrix_bc.getState( "1-2", "G=H" ) != O ) {
+                return false;
+            }
+            if ( is_matrix_bc.getState( "root", "E=F" ) != X ) {
+                return false;
+            }
+            if ( gl_matrix_bc.getState( "root", "E=F" ) != P ) {
+                return false;
+            }
+            if ( gl_matrix_bc.getState( "root", "A=A" ) != A ) {
+                return false;
+            }
+            if ( gl_matrix_bc.getState( "one", "A=A" ) != G ) {
+                return false;
+            }
+            if ( gl_matrix_bc.getState( "root", "A=B" ) != P ) {
+                return false;
+            }
+            if ( gl_matrix_bc.getState( "3-4", "A=D" ) != G ) {
+                return false;
+            }
+            if ( gl_matrix_bc.getState( "four", "E=F" ) != L ) {
+                return false;
+            }
+            if ( gl_matrix_d.getState( "3-4", "P" ) != G ) {
+                return false;
+            }
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace( System.out );
+            return false;
+        }
+        return true;
+    }
+
+    private static boolean testParsimonyOnSecondaryFeatures() {
+        try {
+            final BinaryStates X = BinaryStates.PRESENT;
+            final BinaryStates O = BinaryStates.ABSENT;
+            final GainLossStates G = GainLossStates.GAIN;
+            final GainLossStates L = GainLossStates.LOSS;
+            final GainLossStates A = GainLossStates.UNCHANGED_ABSENT;
+            final GainLossStates P = GainLossStates.UNCHANGED_PRESENT;
+            final Domain a = new BasicDomain( "A", 1, 25, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final Domain b = new BasicDomain( "B", 2, 25, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final Domain c = new BasicDomain( "C", 3, 25, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final Domain d = new BasicDomain( "D", 1, 25, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final Domain e = new BasicDomain( "E", 2, 25, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final Domain f = new BasicDomain( "F", 3, 25, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final Domain g = new BasicDomain( "G", 1, 25, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final Domain h = new BasicDomain( "H", 2, 25, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final Domain i = new BasicDomain( "I", 3, 25, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final Domain j = new BasicDomain( "J", 1, 25, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final Domain l = new BasicDomain( "L", 3, 25, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final Domain m = new BasicDomain( "M", 1, 25, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final Domain n = new BasicDomain( "N", 2, 25, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final Domain o = new BasicDomain( "O", 3, 25, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final Domain p = new BasicDomain( "P", 1, 25, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final Domain q = new BasicDomain( "Q", 2, 25, ( short ) 1, ( short ) 4, 0.1, -12 );
+            final Domain r = new BasicDomain( "R", 3, 25, ( short ) 1, ( short ) 4, 0.1, -12 );
+            // 1 a-a a-b a-c e-f-g-h l-m
+            // 2 a-b a-c e-f-g-i n-o
+            // 3 a-b a-d e-f-g-j p-q
+            // 4 a-b a-d p-r
+            // 1 a-a a-b a-c e-f e-g e-h f-g f-h g-h l-m
+            // 2 a-b a-c e-f e-g e-i f-g f-i g-i n-o
+            // 3 a-b a-d e-f e-g e-j f-g f-j g-j p-q
+            // 4 a-b a-d p-r
+            // 1 a b c e f g h l m
+            // 2 a b c e f g i n o
+            // 3 a b d e f g j p q
+            // 4 a b d p r
+            final Protein aa1 = new BasicProtein( "aa1", "one" );
+            aa1.addProteinDomain( a );
+            aa1.addProteinDomain( a );
+            final Protein ab1 = new BasicProtein( "ab1", "one" );
+            ab1.addProteinDomain( a );
+            ab1.addProteinDomain( b );
+            final Protein ac1 = new BasicProtein( "ac1", "one" );
+            ac1.addProteinDomain( a );
+            ac1.addProteinDomain( c );
+            final Protein efgh1 = new BasicProtein( "efgh1", "one" );
+            efgh1.addProteinDomain( e );
+            efgh1.addProteinDomain( f );
+            efgh1.addProteinDomain( g );
+            efgh1.addProteinDomain( h );
+            final Protein lm1 = new BasicProtein( "lm1", "one" );
+            lm1.addProteinDomain( l );
+            lm1.addProteinDomain( m );
+            final Protein ab2 = new BasicProtein( "ab2", "two" );
+            ab2.addProteinDomain( a );
+            ab2.addProteinDomain( b );
+            final Protein ac2 = new BasicProtein( "ac2", "two" );
+            ac2.addProteinDomain( a );
+            ac2.addProteinDomain( c );
+            final Protein efgi2 = new BasicProtein( "efgi2", "two" );
+            efgi2.addProteinDomain( e );
+            efgi2.addProteinDomain( f );
+            efgi2.addProteinDomain( g );
+            efgi2.addProteinDomain( i );
+            final Protein no2 = new BasicProtein( "no2", "two" );
+            no2.addProteinDomain( n );
+            no2.addProteinDomain( o );
+            final Protein ab3 = new BasicProtein( "ab3", "three" );
+            ab3.addProteinDomain( a );
+            ab3.addProteinDomain( b );
+            final Protein ad3 = new BasicProtein( "ad3", "three" );
+            ad3.addProteinDomain( a );
+            ad3.addProteinDomain( d );
+            final Protein efgj3 = new BasicProtein( "efgj3", "three" );
+            efgj3.addProteinDomain( e );
+            efgj3.addProteinDomain( f );
+            efgj3.addProteinDomain( g );
+            efgj3.addProteinDomain( j );
+            final Protein pq3 = new BasicProtein( "pq3", "three" );
+            pq3.addProteinDomain( p );
+            pq3.addProteinDomain( q );
+            final Protein ab4 = new BasicProtein( "ab4", "four" );
+            ab4.addProteinDomain( a );
+            ab4.addProteinDomain( b );
+            final Protein ad4 = new BasicProtein( "ad4", "four" );
+            ad4.addProteinDomain( a );
+            ad4.addProteinDomain( d );
+            final Protein pr4 = new BasicProtein( "pr4", "four" );
+            pr4.addProteinDomain( p );
+            pr4.addProteinDomain( r );
+            final List<Protein> one_list = new ArrayList<Protein>();
+            one_list.add( aa1 );
+            one_list.add( ab1 );
+            one_list.add( ac1 );
+            one_list.add( efgh1 );
+            one_list.add( lm1 );
+            final List<Protein> two_list = new ArrayList<Protein>();
+            two_list.add( ab2 );
+            two_list.add( ac2 );
+            two_list.add( efgi2 );
+            two_list.add( no2 );
+            final List<Protein> three_list = new ArrayList<Protein>();
+            three_list.add( ab3 );
+            three_list.add( ad3 );
+            three_list.add( efgj3 );
+            three_list.add( pq3 );
+            final List<Protein> four_list = new ArrayList<Protein>();
+            four_list.add( ab4 );
+            four_list.add( ad4 );
+            four_list.add( pr4 );
+            final GenomeWideCombinableDomains one = BasicGenomeWideCombinableDomains
+                    .createInstance( one_list, false, new BasicSpecies( "one" ) );
+            final GenomeWideCombinableDomains two = BasicGenomeWideCombinableDomains
+                    .createInstance( two_list, false, new BasicSpecies( "two" ) );
+            final GenomeWideCombinableDomains three = BasicGenomeWideCombinableDomains
+                    .createInstance( three_list, false, new BasicSpecies( "three" ) );
+            final GenomeWideCombinableDomains four = BasicGenomeWideCombinableDomains
+                    .createInstance( four_list, false, new BasicSpecies( "four" ) );
+            final List<GenomeWideCombinableDomains> gwcd_list = new ArrayList<GenomeWideCombinableDomains>();
+            gwcd_list.add( one );
+            gwcd_list.add( two );
+            gwcd_list.add( three );
+            gwcd_list.add( four );
+            final Map<DomainId, Set<String>> map_same = new HashMap<DomainId, Set<String>>();
+            final HashSet<String> a_s = new HashSet<String>();
+            a_s.add( "AAA" );
+            final HashSet<String> b_s = new HashSet<String>();
+            b_s.add( "BBB" );
+            final HashSet<String> c_s = new HashSet<String>();
+            c_s.add( "CCC" );
+            final HashSet<String> d_s = new HashSet<String>();
+            d_s.add( "DDD" );
+            final HashSet<String> e_s = new HashSet<String>();
+            e_s.add( "EEE" );
+            final HashSet<String> f_s = new HashSet<String>();
+            f_s.add( "FFF" );
+            final HashSet<String> g_s = new HashSet<String>();
+            g_s.add( "GGG" );
+            final HashSet<String> h_s = new HashSet<String>();
+            h_s.add( "HHH" );
+            final HashSet<String> i_s = new HashSet<String>();
+            i_s.add( "III" );
+            final HashSet<String> j_s = new HashSet<String>();
+            j_s.add( "JJJ" );
+            final HashSet<String> l_s = new HashSet<String>();
+            l_s.add( "LLL" );
+            final HashSet<String> m_s = new HashSet<String>();
+            m_s.add( "MMM" );
+            final HashSet<String> n_s = new HashSet<String>();
+            n_s.add( "NNN" );
+            final HashSet<String> o_s = new HashSet<String>();
+            o_s.add( "OOO" );
+            final HashSet<String> p_s = new HashSet<String>();
+            p_s.add( "PPP" );
+            final HashSet<String> q_s = new HashSet<String>();
+            q_s.add( "QQQ" );
+            final HashSet<String> r_s = new HashSet<String>();
+            r_s.add( "RRR" );
+            map_same.put( a.getDomainId(), a_s );
+            map_same.put( b.getDomainId(), b_s );
+            map_same.put( c.getDomainId(), c_s );
+            map_same.put( d.getDomainId(), d_s );
+            map_same.put( e.getDomainId(), e_s );
+            map_same.put( f.getDomainId(), f_s );
+            map_same.put( g.getDomainId(), g_s );
+            map_same.put( h.getDomainId(), h_s );
+            map_same.put( i.getDomainId(), i_s );
+            map_same.put( j.getDomainId(), j_s );
+            map_same.put( l.getDomainId(), l_s );
+            map_same.put( m.getDomainId(), m_s );
+            map_same.put( n.getDomainId(), n_s );
+            map_same.put( o.getDomainId(), o_s );
+            map_same.put( p.getDomainId(), p_s );
+            map_same.put( q.getDomainId(), q_s );
+            map_same.put( r.getDomainId(), r_s );
+            final CharacterStateMatrix<BinaryStates> matrix_s = DomainParsimonyCalculator
+                    .createMatrixOfSecondaryFeaturePresenceOrAbsence( gwcd_list, map_same, null );
+            // 1 a b c e f g h l m
+            // 2 a b c e f g i n o
+            // 3 a b d e f g j p q
+            // 4 a b d p r
+            if ( matrix_s.getState( 0, 0 ) != X ) {
+                return false;
+            }
+            if ( matrix_s.getState( 0, 1 ) != X ) {
+                return false;
+            }
+            if ( matrix_s.getState( 0, 2 ) != X ) {
+                return false;
+            }
+            if ( matrix_s.getState( 0, 3 ) != O ) {
+                return false;
+            }
+            if ( matrix_s.getState( 0, 4 ) != X ) {
+                return false;
+            }
+            if ( matrix_s.getState( 0, 5 ) != X ) {
+                return false;
+            }
+            if ( matrix_s.getState( 0, 6 ) != X ) {
+                return false;
+            }
+            if ( matrix_s.getState( 0, 7 ) != X ) {
+                return false;
+            }
+            if ( matrix_s.getState( 0, 8 ) != O ) {
+                return false;
+            }
+            final PhylogenyFactory factory0 = ParserBasedPhylogenyFactory.getInstance();
+            final String p0_str = "((one,two)1-2,(three,four)3-4)root";
+            final Phylogeny p0 = factory0.create( p0_str, new NHXParser() )[ 0 ];
+            final DomainParsimonyCalculator dp0 = DomainParsimonyCalculator.createInstance( p0, gwcd_list, map_same );
+            dp0.executeDolloParsimonyOnSecondaryFeatures( null );
+            final CharacterStateMatrix<GainLossStates> gl_matrix_d = dp0.getGainLossMatrix();
+            final CharacterStateMatrix<BinaryStates> is_matrix_d = dp0.getInternalStatesMatrix();
+            if ( is_matrix_d.getState( "root", "AAA" ) != X ) {
+                return false;
+            }
+            if ( is_matrix_d.getState( "root", "BBB" ) != X ) {
+                return false;
+            }
+            if ( is_matrix_d.getState( "root", "CCC" ) != O ) {
+                return false;
+            }
+            if ( is_matrix_d.getState( "root", "DDD" ) != O ) {
+                return false;
+            }
+            if ( is_matrix_d.getState( "root", "EEE" ) != X ) {
+                return false;
+            }
+            if ( gl_matrix_d.getState( "3-4", "PPP" ) != G ) {
+                return false;
+            }
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace( System.out );
+            return false;
+        }
+        return true;
+    }
+
+    private static boolean testPaupLogParser( final File test_dir ) {
+        try {
+            final PaupLogParser parser = new PaupLogParser();
+            parser.setSource( new File( test_dir + ForesterUtil.getFileSeparator() + "paup_log_test_1" ) );
+            final CharacterStateMatrix<BinaryStates> matrix = parser.parse();
+            if ( matrix.getNumberOfIdentifiers() != 8 ) {
+                return false;
+            }
+            if ( !matrix.getIdentifier( 0 ).equals( "MOUSE" ) ) {
+                return false;
+            }
+            if ( !matrix.getIdentifier( 1 ).equals( "NEMVE" ) ) {
+                return false;
+            }
+            if ( !matrix.getIdentifier( 2 ).equals( "MONBE" ) ) {
+                return false;
+            }
+            if ( !matrix.getIdentifier( 3 ).equals( "DICDI" ) ) {
+                return false;
+            }
+            if ( !matrix.getIdentifier( 4 ).equals( "ARATH" ) ) {
+                return false;
+            }
+            if ( !matrix.getIdentifier( 5 ).equals( "6" ) ) {
+                return false;
+            }
+            if ( !matrix.getIdentifier( 6 ).equals( "7" ) ) {
+                return false;
+            }
+            if ( !matrix.getIdentifier( 7 ).equals( "8" ) ) {
+                return false;
+            }
+            if ( matrix.getNumberOfCharacters() != ( 66 + 66 + 28 ) ) {
+                return false;
+            }
+            if ( matrix.getState( 0, 4 ) != BinaryStates.ABSENT ) {
+                return false;
+            }
+            if ( matrix.getState( 0, 5 ) != BinaryStates.PRESENT ) {
+                return false;
+            }
+            if ( matrix.getState( 1, 5 ) != BinaryStates.PRESENT ) {
+                return false;
+            }
+            if ( matrix.getState( 7, 154 ) != BinaryStates.ABSENT ) {
+                return false;
+            }
+            if ( matrix.getState( 7, 155 ) != BinaryStates.PRESENT ) {
+                return false;
+            }
+            if ( matrix.getState( 7, 156 ) != BinaryStates.PRESENT ) {
+                return false;
+            }
+            if ( matrix.getState( 7, 157 ) != BinaryStates.ABSENT ) {
+                return false;
+            }
+            if ( matrix.getState( 7, 158 ) != BinaryStates.PRESENT ) {
+                return false;
+            }
+            if ( matrix.getState( 7, 159 ) != BinaryStates.ABSENT ) {
+                return false;
+            }
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace( System.out );
+            return false;
+        }
+        return true;
+    }
+
+    private static boolean testProteinId() {
+        try {
+            final ProteinId id1 = new ProteinId( "a" );
+            final ProteinId id2 = new ProteinId( "a" );
+            final ProteinId id3 = new ProteinId( "A" );
+            final ProteinId id4 = new ProteinId( "b" );
+            if ( !id1.equals( id1 ) ) {
+                return false;
+            }
+            if ( id1.getId().equals( "x" ) ) {
+                return false;
+            }
+            if ( id1.getId().equals( null ) ) {
+                return false;
+            }
+            if ( !id1.equals( id2 ) ) {
+                return false;
+            }
+            if ( id1.equals( id3 ) ) {
+                return false;
+            }
+            if ( id1.hashCode() != id1.hashCode() ) {
+                return false;
+            }
+            if ( id1.hashCode() != id2.hashCode() ) {
+                return false;
+            }
+            if ( id1.hashCode() == id3.hashCode() ) {
+                return false;
+            }
+            if ( id1.compareTo( id1 ) != 0 ) {
+                return false;
+            }
+            if ( id1.compareTo( id2 ) != 0 ) {
+                return false;
+            }
+            if ( id1.compareTo( id3 ) != 0 ) {
+                return false;
+            }
+            if ( id1.compareTo( id4 ) >= 0 ) {
+                return false;
+            }
+            if ( id4.compareTo( id1 ) <= 0 ) {
+                return false;
+            }
+            if ( !id4.getId().equals( "b" ) ) {
+                return false;
+            }
+            final ProteinId id5 = new ProteinId( " C " );
+            if ( !id5.getId().equals( "C" ) ) {
+                return false;
+            }
+            if ( id5.equals( id1 ) ) {
+                return false;
+            }
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace( System.out );
+            return false;
+        }
+        return true;
+    }
+
+    private static boolean testSpecies() {
+        try {
+            final Species s1 = new BasicSpecies( "a" );
+            final Species s2 = new BasicSpecies( "a" );
+            final Species s3 = new BasicSpecies( "A" );
+            final Species s4 = new BasicSpecies( "b" );
+            if ( !s1.equals( s1 ) ) {
+                return false;
+            }
+            if ( s1.getSpeciesId().equals( "x" ) ) {
+                return false;
+            }
+            if ( s1.getSpeciesId().equals( null ) ) {
+                return false;
+            }
+            if ( !s1.equals( s2 ) ) {
+                return false;
+            }
+            if ( s1.equals( s3 ) ) {
+                return false;
+            }
+            if ( s1.hashCode() != s1.hashCode() ) {
+                return false;
+            }
+            if ( s1.hashCode() != s2.hashCode() ) {
+                return false;
+            }
+            if ( s1.hashCode() == s3.hashCode() ) {
+                return false;
+            }
+            if ( s1.compareTo( s1 ) != 0 ) {
+                return false;
+            }
+            if ( s1.compareTo( s2 ) != 0 ) {
+                return false;
+            }
+            if ( s1.compareTo( s3 ) != 0 ) {
+                return false;
+            }
+            if ( s1.compareTo( s4 ) >= 0 ) {
+                return false;
+            }
+            if ( s4.compareTo( s1 ) <= 0 ) {
+                return false;
+            }
+            if ( !s4.getSpeciesId().equals( "b" ) ) {
+                return false;
+            }
+            final Species s5 = new BasicSpecies( " C " );
+            if ( !s5.getSpeciesId().equals( "C" ) ) {
+                return false;
+            }
+            if ( s5.equals( s1 ) ) {
+                return false;
+            }
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace( System.out );
+            return false;
+        }
+        return true;
+    }
+}
diff --git a/forester/java/src/org/forester/test/Test.java b/forester/java/src/org/forester/test/Test.java

new file mode 100644 (file)

index 0000000..d27c23c
--- /dev/null
+++ b/forester/java/src/org/forester/test/Test.java
@@ -0,0 +1,7939 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.test;
+
+import java.io.ByteArrayInputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Date;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Locale;
+import java.util.Set;
+
+import org.forester.application.support_transfer;
+import org.forester.development.DevelopmentTools;
+import org.forester.evoinference.TestPhylogenyReconstruction;
+import org.forester.evoinference.matrix.character.CharacterStateMatrix;
+import org.forester.evoinference.matrix.character.CharacterStateMatrix.BinaryStates;
+import org.forester.go.TestGo;
+import org.forester.io.parsers.FastaParser;
+import org.forester.io.parsers.GeneralMsaParser;
+import org.forester.io.parsers.HmmscanPerDomainTableParser;
+import org.forester.io.parsers.HmmscanPerDomainTableParser.INDIVIDUAL_SCORE_CUTOFF;
+import org.forester.io.parsers.nexus.NexusBinaryStatesMatrixParser;
+import org.forester.io.parsers.nexus.NexusCharactersParser;
+import org.forester.io.parsers.nexus.NexusPhylogeniesParser;
+import org.forester.io.parsers.nhx.NHXParser;
+import org.forester.io.parsers.phyloxml.PhyloXmlParser;
+import org.forester.io.parsers.tol.TolParser;
+import org.forester.io.writers.PhylogenyWriter;
+import org.forester.msa.Mafft;
+import org.forester.msa.Msa;
+import org.forester.msa.MsaInferrer;
+import org.forester.pccx.TestPccx;
+import org.forester.phylogeny.Phylogeny;
+import org.forester.phylogeny.PhylogenyBranch;
+import org.forester.phylogeny.PhylogenyMethods;
+import org.forester.phylogeny.PhylogenyNode;
+import org.forester.phylogeny.data.BinaryCharacters;
+import org.forester.phylogeny.data.BranchWidth;
+import org.forester.phylogeny.data.Confidence;
+import org.forester.phylogeny.data.Distribution;
+import org.forester.phylogeny.data.DomainArchitecture;
+import org.forester.phylogeny.data.Event;
+import org.forester.phylogeny.data.Identifier;
+import org.forester.phylogeny.data.PhylogenyData;
+import org.forester.phylogeny.data.Polygon;
+import org.forester.phylogeny.data.PropertiesMap;
+import org.forester.phylogeny.data.Property;
+import org.forester.phylogeny.data.ProteinDomain;
+import org.forester.phylogeny.data.Taxonomy;
+import org.forester.phylogeny.data.Property.AppliesTo;
+import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
+import org.forester.phylogeny.factories.PhylogenyFactory;
+import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
+import org.forester.sdi.SDI;
+import org.forester.sdi.SDIR;
+import org.forester.sdi.SDIse;
+import org.forester.sdi.TaxonomyAssigner;
+import org.forester.sdi.TestGSDI;
+import org.forester.sequence.BasicSequence;
+import org.forester.sequence.Sequence;
+import org.forester.surfacing.Protein;
+import org.forester.surfacing.TestSurfacing;
+import org.forester.tools.ConfidenceAssessor;
+import org.forester.tools.SupportCount;
+import org.forester.tools.TreeSplitMatrix;
+import org.forester.util.AsciiHistogram;
+import org.forester.util.BasicDescriptiveStatistics;
+import org.forester.util.BasicTable;
+import org.forester.util.BasicTableParser;
+import org.forester.util.DescriptiveStatistics;
+import org.forester.util.ForesterConstants;
+import org.forester.util.ForesterUtil;
+import org.forester.util.GeneralTable;
+import org.forester.ws.uniprot.UniProtTaxonomy;
+import org.forester.ws.uniprot.UniProtWsTools;
+import org.forester.ws.wabi.TxSearch;
+import org.forester.ws.wabi.TxSearch.RANKS;
+import org.forester.ws.wabi.TxSearch.TAX_NAME_CLASS;
+import org.forester.ws.wabi.TxSearch.TAX_RANK;
+
+@SuppressWarnings( "unused")
+public final class Test {
+
+    private final static double  ZERO_DIFF                 = 1.0E-9;
+    private final static String  PATH_TO_TEST_DATA         = System.getProperty( "user.dir" )
+                                                                   + ForesterUtil.getFileSeparator() + "test_data"
+                                                                   + ForesterUtil.getFileSeparator();
+    private final static String  PATH_TO_RESOURCES         = System.getProperty( "user.dir" )
+                                                                   + ForesterUtil.getFileSeparator() + "resources"
+                                                                   + ForesterUtil.getFileSeparator();
+    private final static boolean USE_LOCAL_PHYLOXML_SCHEMA = true;
+    private static final String  PHYLOXML_REMOTE_XSD       = ForesterConstants.PHYLO_XML_LOCATION + "/"
+                                                                   + ForesterConstants.PHYLO_XML_VERSION + "/"
+                                                                   + ForesterConstants.PHYLO_XML_XSD;
+    private static final String  PHYLOXML_LOCAL_XSD        = PATH_TO_RESOURCES + "phyloxml_schema/"
+                                                                   + ForesterConstants.PHYLO_XML_VERSION + "/"
+                                                                   + ForesterConstants.PHYLO_XML_XSD;
+
+    private final static Phylogeny createPhylogeny( final String nhx ) throws IOException {
+        final Phylogeny p = ParserBasedPhylogenyFactory.getInstance().create( nhx, new NHXParser() )[ 0 ];
+        return p;
+    }
+
+    private final static Event getEvent( final Phylogeny p, final String n1, final String n2 ) {
+        final PhylogenyMethods pm = PhylogenyMethods.getInstance();
+        return pm.obtainLCA( p.getNode( n1 ), p.getNode( n2 ) ).getNodeData().getEvent();
+    }
+
+    public static boolean isEqual( final double a, final double b ) {
+        return ( ( Math.abs( a - b ) ) < Test.ZERO_DIFF );
+    }
+
+    public static void main( final String[] args ) {
+        System.out.println( "[Java version: " + ForesterUtil.JAVA_VERSION + " " + ForesterUtil.JAVA_VENDOR + "]" );
+        System.out.println( "[OS: " + ForesterUtil.OS_NAME + " " + ForesterUtil.OS_ARCH + " " + ForesterUtil.OS_VERSION
+                + "]" );
+        Locale.setDefault( Locale.US );
+        System.out.println( "[Locale: " + Locale.getDefault() + "]" );
+        int failed = 0;
+        int succeeded = 0;
+        System.out.print( "[Test if directory with files for testing exists/is readable: " );
+        if ( Test.testDir( PATH_TO_TEST_DATA ) ) {
+            System.out.println( "OK.]" );
+        }
+        else {
+            System.out.println( "could not find/read from directory \"" + PATH_TO_TEST_DATA + "\".]" );
+            System.out.println( "Testing aborted." );
+            System.exit( -1 );
+        }
+        System.out.print( "[Test if resources directory exists/is readable: " );
+        if ( testDir( PATH_TO_RESOURCES ) ) {
+            System.out.println( "OK.]" );
+        }
+        else {
+            System.out.println( "could not find/read from directory \"" + Test.PATH_TO_RESOURCES + "\".]" );
+            System.out.println( "Testing aborted." );
+            System.exit( -1 );
+        }
+        final long start_time = new Date().getTime();
+        System.out.print( "Hmmscan output parser: " );
+        if ( testHmmscanOutputParser() ) {
+            System.out.println( "OK." );
+            succeeded++;
+        }
+        else {
+            System.out.println( "failed." );
+            failed++;
+        }
+        System.out.print( "Basic node methods: " );
+        if ( Test.testBasicNodeMethods() ) {
+            System.out.println( "OK." );
+            succeeded++;
+        }
+        else {
+            System.out.println( "failed." );
+            failed++;
+        }
+        System.out.print( "Basic node construction and parsing of NHX (node level): " );
+        if ( Test.testNHXNodeParsing() ) {
+            System.out.println( "OK." );
+            succeeded++;
+        }
+        else {
+            System.out.println( "failed." );
+            failed++;
+        }
+        System.out.print( "NH parsing: " );
+        if ( Test.testNHParsing() ) {
+            System.out.println( "OK." );
+            succeeded++;
+        }
+        else {
+            System.out.println( "failed." );
+            failed++;
+        }
+        System.out.print( "Conversion to NHX (node level): " );
+        if ( Test.testNHXconversion() ) {
+            System.out.println( "OK." );
+            succeeded++;
+        }
+        else {
+            System.out.println( "failed." );
+            failed++;
+        }
+        System.out.print( "NHX parsing: " );
+        if ( Test.testNHXParsing() ) {
+            System.out.println( "OK." );
+            succeeded++;
+        }
+        else {
+            System.out.println( "failed." );
+            failed++;
+        }
+        System.out.print( "NHX parsing with quotes: " );
+        if ( Test.testNHXParsingQuotes() ) {
+            System.out.println( "OK." );
+            succeeded++;
+        }
+        else {
+            System.out.println( "failed." );
+            failed++;
+        }
+        System.out.print( "Nexus characters parsing: " );
+        if ( Test.testNexusCharactersParsing() ) {
+            System.out.println( "OK." );
+            succeeded++;
+        }
+        else {
+            System.out.println( "failed." );
+            failed++;
+        }
+        System.out.print( "Nexus tree parsing: " );
+        if ( Test.testNexusTreeParsing() ) {
+            System.out.println( "OK." );
+            succeeded++;
+        }
+        else {
+            System.out.println( "failed." );
+            failed++;
+        }
+        System.out.print( "Nexus tree parsing (translating): " );
+        if ( Test.testNexusTreeParsingTranslating() ) {
+            System.out.println( "OK." );
+            succeeded++;
+        }
+        else {
+            System.out.println( "failed." );
+            failed++;
+        }
+        System.out.print( "Nexus matrix parsing: " );
+        if ( Test.testNexusMatrixParsing() ) {
+            System.out.println( "OK." );
+            succeeded++;
+        }
+        else {
+            System.out.println( "failed." );
+            failed++;
+        }
+        System.out.print( "Basic phyloXML parsing: " );
+        if ( Test.testBasicPhyloXMLparsing() ) {
+            System.out.println( "OK." );
+            succeeded++;
+        }
+        else {
+            System.out.println( "failed." );
+            failed++;
+        }
+        System.out.print( "Basic phyloXML parsing (validating against schema): " );
+        if ( testBasicPhyloXMLparsingValidating() ) {
+            System.out.println( "OK." );
+            succeeded++;
+        }
+        else {
+            System.out.println( "failed." );
+            failed++;
+        }
+        System.out.print( "Roundtrip phyloXML parsing (validating against schema): " );
+        if ( Test.testBasicPhyloXMLparsingRoundtrip() ) {
+            System.out.println( "OK." );
+            succeeded++;
+        }
+        else {
+            System.out.println( "failed." );
+            failed++;
+        }
+        System.out.print( "phyloXML Distribution Element: " );
+        if ( Test.testPhyloXMLparsingOfDistributionElement() ) {
+            System.out.println( "OK." );
+            succeeded++;
+        }
+        else {
+            System.out.println( "failed." );
+            failed++;
+        }
+        System.out.print( "Tol XML parsing: " );
+        if ( Test.testBasicTolXMLparsing() ) {
+            System.out.println( "OK." );
+            succeeded++;
+        }
+        else {
+            System.out.println( "failed." );
+            failed++;
+        }
+        System.out.print( "Copying of node data: " );
+        if ( Test.testCopyOfNodeData() ) {
+            System.out.println( "OK." );
+            succeeded++;
+        }
+        else {
+            System.out.println( "failed." );
+            failed++;
+        }
+        System.out.print( "Basic tree methods: " );
+        if ( Test.testBasicTreeMethods() ) {
+            System.out.println( "OK." );
+            succeeded++;
+        }
+        else {
+            System.out.println( "failed." );
+            failed++;
+        }
+        System.out.print( "Postorder Iterator: " );
+        if ( Test.testPostOrderIterator() ) {
+            System.out.println( "OK." );
+            succeeded++;
+        }
+        else {
+            System.out.println( "failed." );
+            failed++;
+        }
+        System.out.print( "Preorder Iterator: " );
+        if ( Test.testPreOrderIterator() ) {
+            System.out.println( "OK." );
+            succeeded++;
+        }
+        else {
+            System.out.println( "failed." );
+            failed++;
+        }
+        System.out.print( "Levelorder Iterator: " );
+        if ( Test.testLevelOrderIterator() ) {
+            System.out.println( "OK." );
+            succeeded++;
+        }
+        else {
+            System.out.println( "failed." );
+            failed++;
+        }
+        System.out.print( "Re-id methods: " );
+        if ( Test.testReIdMethods() ) {
+            System.out.println( "OK." );
+            succeeded++;
+        }
+        else {
+            System.out.println( "failed." );
+            failed++;
+        }
+        System.out.print( "Methods on last external nodes: " );
+        if ( Test.testLastExternalNodeMethods() ) {
+            System.out.println( "OK." );
+            succeeded++;
+        }
+        else {
+            System.out.println( "failed." );
+            failed++;
+        }
+        System.out.print( "Methods on external nodes: " );
+        if ( Test.testExternalNodeRelatedMethods() ) {
+            System.out.println( "OK." );
+            succeeded++;
+        }
+        else {
+            System.out.println( "failed." );
+            failed++;
+        }
+        System.out.print( "Deletion of external nodes: " );
+        if ( Test.testDeletionOfExternalNodes() ) {
+            System.out.println( "OK." );
+            succeeded++;
+        }
+        else {
+            System.out.println( "failed." );
+            failed++;
+        }
+        System.out.print( "Subtree deletion: " );
+        if ( Test.testSubtreeDeletion() ) {
+            System.out.println( "OK." );
+            succeeded++;
+        }
+        else {
+            System.out.println( "failed." );
+            failed++;
+        }
+        System.out.print( "Phylogeny branch: " );
+        if ( Test.testPhylogenyBranch() ) {
+            System.out.println( "OK." );
+            succeeded++;
+        }
+        else {
+            System.out.println( "failed." );
+            failed++;
+        }
+        System.out.print( "Rerooting: " );
+        if ( Test.testRerooting() ) {
+            System.out.println( "OK." );
+            succeeded++;
+        }
+        else {
+            System.out.println( "failed." );
+            failed++;
+        }
+        System.out.print( "Mipoint rooting: " );
+        if ( Test.testMidpointrooting() ) {
+            System.out.println( "OK." );
+            succeeded++;
+        }
+        else {
+            System.out.println( "failed." );
+            failed++;
+        }
+        System.out.print( "Support count: " );
+        if ( Test.testSupportCount() ) {
+            System.out.println( "OK." );
+            succeeded++;
+        }
+        else {
+            System.out.println( "failed." );
+            failed++;
+        }
+        System.out.print( "Support transfer: " );
+        if ( Test.testSupportTransfer() ) {
+            System.out.println( "OK." );
+            succeeded++;
+        }
+        else {
+            System.out.println( "failed." );
+            failed++;
+        }
+        System.out.print( "Finding of LCA: " );
+        if ( Test.testGetLCA() ) {
+            System.out.println( "OK." );
+            succeeded++;
+        }
+        else {
+            System.out.println( "failed." );
+            failed++;
+        }
+        System.out.print( "Calculation of distance between nodes: " );
+        if ( Test.testGetDistance() ) {
+            System.out.println( "OK." );
+            succeeded++;
+        }
+        else {
+            System.out.println( "failed." );
+            failed++;
+        }
+        System.out.print( "SDIse: " );
+        if ( Test.testSDIse() ) {
+            System.out.println( "OK." );
+            succeeded++;
+        }
+        else {
+            System.out.println( "failed." );
+            failed++;
+        }
+        System.out.print( "Taxonomy assigner: " );
+        if ( Test.testTaxonomyAssigner() ) {
+            System.out.println( "OK." );
+            succeeded++;
+        }
+        else {
+            System.out.println( "failed." );
+            failed++;
+        }
+        System.out.print( "SDIunrooted: " );
+        if ( Test.testSDIunrooted() ) {
+            System.out.println( "OK." );
+            succeeded++;
+        }
+        else {
+            System.out.println( "failed." );
+            failed++;
+        }
+        System.out.print( "GSDI: " );
+        if ( TestGSDI.test() ) {
+            System.out.println( "OK." );
+            succeeded++;
+        }
+        else {
+            System.out.println( "failed." );
+            failed++;
+        }
+        System.out.print( "Descriptive statistics: " );
+        if ( Test.testDescriptiveStatistics() ) {
+            System.out.println( "OK." );
+            succeeded++;
+        }
+        else {
+            System.out.println( "failed." );
+            failed++;
+        }
+        System.out.print( "Data objects and methods: " );
+        if ( Test.testDataObjects() ) {
+            System.out.println( "OK." );
+            succeeded++;
+        }
+        else {
+            System.out.println( "failed." );
+            failed++;
+        }
+        System.out.print( "Properties map: " );
+        if ( Test.testPropertiesMap() ) {
+            System.out.println( "OK." );
+            succeeded++;
+        }
+        else {
+            System.out.println( "failed." );
+            failed++;
+        }
+        System.out.print( "Phylogeny reconstruction:" );
+        System.out.println();
+        if ( TestPhylogenyReconstruction.test( new File( PATH_TO_TEST_DATA ) ) ) {
+            System.out.println( "OK." );
+            succeeded++;
+        }
+        else {
+            System.out.println( "failed." );
+            failed++;
+        }
+        System.out.print( "Analysis of domain architectures: " );
+        System.out.println();
+        if ( TestSurfacing.test( new File( PATH_TO_TEST_DATA ) ) ) {
+            System.out.println( "OK." );
+            succeeded++;
+        }
+        else {
+            System.out.println( "failed." );
+            failed++;
+        }
+        System.out.print( "GO: " );
+        System.out.println();
+        if ( TestGo.test( new File( PATH_TO_TEST_DATA ) ) ) {
+            System.out.println( "OK." );
+            succeeded++;
+        }
+        else {
+            System.out.println( "failed." );
+            failed++;
+        }
+        System.out.print( "Modeling tools: " );
+        if ( TestPccx.test() ) {
+            System.out.println( "OK." );
+            succeeded++;
+        }
+        else {
+            System.out.println( "failed." );
+            failed++;
+        }
+        System.out.print( "Split Matrix strict: " );
+        if ( Test.testSplitStrict() ) {
+            System.out.println( "OK." );
+            succeeded++;
+        }
+        else {
+            System.out.println( "failed." );
+            failed++;
+        }
+        System.out.print( "Split Matrix: " );
+        if ( Test.testSplit() ) {
+            System.out.println( "OK." );
+            succeeded++;
+        }
+        else {
+            System.out.println( "failed." );
+            failed++;
+        }
+        System.out.print( "Confidence Assessor: " );
+        if ( Test.testConfidenceAssessor() ) {
+            System.out.println( "OK." );
+            succeeded++;
+        }
+        else {
+            System.out.println( "failed." );
+            failed++;
+        }
+        System.out.print( "Basic table: " );
+        if ( Test.testBasicTable() ) {
+            System.out.println( "OK." );
+            succeeded++;
+        }
+        else {
+            System.out.println( "failed." );
+            failed++;
+        }
+        System.out.print( "General table: " );
+        if ( Test.testGeneralTable() ) {
+            System.out.println( "OK." );
+            succeeded++;
+        }
+        else {
+            System.out.println( "failed." );
+            failed++;
+        }
+        System.out.print( "Amino acid sequence: " );
+        if ( Test.testAminoAcidSequence() ) {
+            System.out.println( "OK." );
+            succeeded++;
+        }
+        else {
+            System.out.println( "failed." );
+            failed++;
+        }
+        System.out.print( "General MSA parser: " );
+        if ( Test.testGeneralMsaParser() ) {
+            System.out.println( "OK." );
+            succeeded++;
+        }
+        else {
+            System.out.println( "failed." );
+            failed++;
+        }
+        System.out.print( "Fasta parser for msa: " );
+        if ( Test.testFastaParser() ) {
+            System.out.println( "OK." );
+            succeeded++;
+        }
+        else {
+            System.out.println( "failed." );
+            failed++;
+        }
+        System.out.print( "Creation of balanced phylogeny: " );
+        if ( Test.testCreateBalancedPhylogeny() ) {
+            System.out.println( "OK." );
+            succeeded++;
+        }
+        else {
+            System.out.println( "failed." );
+            failed++;
+        }
+        System.out.print( "Uniprot Taxonomy Search: " );
+        if ( Test.testUniprotTaxonomySearch() ) {
+            System.out.println( "OK." );
+            succeeded++;
+        }
+        else {
+            System.out
+                    .println( "failed [will not count towards failed tests since it might be due to absence internet connection]" );
+        }
+        if ( Mafft.isInstalled() ) {
+            System.out.print( "MAFFT (external program): " );
+            if ( Test.testMafft() ) {
+                System.out.println( "OK." );
+                succeeded++;
+            }
+            else {
+                System.out.println( "failed [will not count towards failed tests]" );
+            }
+        }
+        //        System.out.print( "WABI TxSearch: " );
+        //        if ( Test.testWabiTxSearch() ) {
+        //            System.out.println( "OK." );
+        //            succeeded++;
+        //        }
+        //        else {
+        //            System.out
+        //                    .println( "failed [will not count towards failed tests since it might be due to absence internet connection]" );
+        //        }
+        System.out.println();
+        final Runtime rt = java.lang.Runtime.getRuntime();
+        final long free_memory = rt.freeMemory() / 1000000;
+        final long total_memory = rt.totalMemory() / 1000000;
+        System.out.println( "Running time    : " + ( new Date().getTime() - start_time ) + "ms " + "(free memory: "
+                + free_memory + "MB, total memory: " + total_memory + "MB)" );
+        System.out.println();
+        System.out.println( "Successful tests: " + succeeded );
+        System.out.println( "Failed     tests: " + failed );
+        System.out.println();
+        if ( failed < 1 ) {
+            System.out.println( "OK." );
+        }
+        else {
+            System.out.println( "Not OK." );
+        }
+        // System.out.println();
+        // Development.setTime( true );
+        //try {
+        //  final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
+        //  final String clc = System.getProperty( "user.dir" ) + ForesterUtil.getFileSeparator()
+        //          + "examples" + ForesterUtil.getFileSeparator() + "CLC.nhx";
+        // final String multi = Test.PATH_TO_EXAMPLE_FILES +
+        // "multifurcations_ex_1.nhx";
+        // final String domains = Test.PATH_TO_EXAMPLE_FILES + "domains1.nhx";
+        // final Phylogeny t1 = factory.create( new File( domains ), new
+        // NHXParser() )[ 0 ];
+        //  final Phylogeny t2 = factory.create( new File( clc ), new NHXParser() )[ 0 ];
+        // }
+        // catch ( final Exception e ) {
+        //     e.printStackTrace();
+        // }
+        // t1.getRoot().preorderPrint();
+        // final PhylogenyFactory factory = ParserBasedPhylogenyFactory
+        // .getInstance();
+        // try {
+        //            
+        // Helper.readNHtree( new File( PATH_TO_EXAMPLE_FILES
+        // + "\\AtNBSpos.nhx" ) );
+        // factory.create(
+        // new File( PATH_TO_EXAMPLE_FILES + "\\AtNBSpos.nhx" ),
+        // new NHXParser() );
+        // Helper.readNHtree( new File( PATH_TO_EXAMPLE_FILES
+        // + "\\AtNBSpos.nhx" ) );
+        // factory.create(
+        // new File( PATH_TO_EXAMPLE_FILES + "\\AtNBSpos.nhx" ),
+        // new NHXParser() );
+        //            
+        //
+        // Helper.readNHtree( new File( PATH_TO_EXAMPLE_FILES
+        // + "\\big_tree.nhx" ) );
+        // Helper.readNHtree( new File( PATH_TO_EXAMPLE_FILES
+        // + "\\big_tree.nhx" ) );
+        // factory.create(
+        // new File( PATH_TO_EXAMPLE_FILES + "\\big_tree.nhx" ),
+        // new NHXParser() );
+        // factory.create(
+        // new File( PATH_TO_EXAMPLE_FILES + "\\big_tree.nhx" ),
+        // new NHXParser() );
+        //
+        // Helper.readNHtree( new File( PATH_TO_EXAMPLE_FILES
+        // + "\\big_tree.nhx" ) );
+        // Helper.readNHtree( new File( PATH_TO_EXAMPLE_FILES
+        // + "\\big_tree.nhx" ) );
+        //
+        // factory.create(
+        // new File( PATH_TO_EXAMPLE_FILES + "\\big_tree.nhx" ),
+        // new NHXParser() );
+        // factory.create(
+        // new File( PATH_TO_EXAMPLE_FILES + "\\big_tree.nhx" ),
+        // new NHXParser() );
+        //
+        // Helper.readNHtree( new File( PATH_TO_EXAMPLE_FILES
+        // + "\\AtNBSpos.nhx" ) );
+        // factory.create(
+        // new File( PATH_TO_EXAMPLE_FILES + "\\AtNBSpos.nhx" ),
+        // new NHXParser() );
+        //
+        // }
+        // catch ( IOException e ) {
+        // // TODO Auto-generated catch block
+        // e.printStackTrace();
+        // }
+    }
+
+    private static boolean testBasicNodeMethods() {
+        try {
+            if ( PhylogenyNode.getNodeCount() != 0 ) {
+                return false;
+            }
+            final PhylogenyNode n1 = new PhylogenyNode();
+            final PhylogenyNode n2 = new PhylogenyNode( "", ForesterUtil.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY );
+            final PhylogenyNode n3 = new PhylogenyNode( "n3", ForesterUtil.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY );
+            final PhylogenyNode n4 = new PhylogenyNode( "n4:0.01", ForesterUtil.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY );
+            if ( n1.isHasAssignedEvent() ) {
+                return false;
+            }
+            if ( PhylogenyNode.getNodeCount() != 4 ) {
+                return false;
+            }
+            if ( n3.getIndicator() != 0 ) {
+                return false;
+            }
+            if ( n3.getNumberOfExternalNodes() != 1 ) {
+                return false;
+            }
+            if ( !n3.isExternal() ) {
+                return false;
+            }
+            if ( !n3.isRoot() ) {
+                return false;
+            }
+            if ( !n4.getName().equals( "n4" ) ) {
+                return false;
+            }
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace( System.out );
+            return false;
+        }
+        return true;
+    }
+
+    private static boolean testBasicPhyloXMLparsing() {
+        try {
+            final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
+            final PhyloXmlParser xml_parser = new PhyloXmlParser();
+            final Phylogeny[] phylogenies_0 = factory.create( Test.PATH_TO_TEST_DATA + "phyloxml_test_t1.xml",
+                                                              xml_parser );
+            if ( xml_parser.getErrorCount() > 0 ) {
+                System.out.println( xml_parser.getErrorMessages().toString() );
+                return false;
+            }
+            if ( phylogenies_0.length != 4 ) {
+                return false;
+            }
+            final Phylogeny t1 = phylogenies_0[ 0 ];
+            final Phylogeny t2 = phylogenies_0[ 1 ];
+            final Phylogeny t3 = phylogenies_0[ 2 ];
+            final Phylogeny t4 = phylogenies_0[ 3 ];
+            if ( t1.getNumberOfExternalNodes() != 1 ) {
+                return false;
+            }
+            if ( !t1.isRooted() ) {
+                return false;
+            }
+            if ( t1.isRerootable() ) {
+                return false;
+            }
+            if ( !t1.getType().equals( "gene_tree" ) ) {
+                return false;
+            }
+            if ( t2.getNumberOfExternalNodes() != 2 ) {
+                return false;
+            }
+            if ( !isEqual( t2.getNode( "node a" ).getDistanceToParent(), 1.0 ) ) {
+                return false;
+            }
+            if ( !isEqual( t2.getNode( "node b" ).getDistanceToParent(), 2.0 ) ) {
+                return false;
+            }
+            if ( t2.getNode( "node a" ).getNodeData().getTaxonomies().size() != 2 ) {
+                return false;
+            }
+            if ( !t2.getNode( "node a" ).getNodeData().getTaxonomy( 0 ).getCommonName().equals( "some parasite" ) ) {
+                return false;
+            }
+            if ( !t2.getNode( "node a" ).getNodeData().getTaxonomy( 1 ).getCommonName().equals( "the host" ) ) {
+                return false;
+            }
+            if ( t2.getNode( "node a" ).getNodeData().getSequences().size() != 2 ) {
+                return false;
+            }
+            if ( !t2.getNode( "node a" ).getNodeData().getSequence( 0 ).getMolecularSequence()
+                    .startsWith( "actgtgggggt" ) ) {
+                return false;
+            }
+            if ( !t2.getNode( "node a" ).getNodeData().getSequence( 1 ).getMolecularSequence()
+                    .startsWith( "ctgtgatgcat" ) ) {
+                return false;
+            }
+            if ( t3.getNumberOfExternalNodes() != 4 ) {
+                return false;
+            }
+            if ( !t1.getName().equals( "t1" ) ) {
+                return false;
+            }
+            if ( !t2.getName().equals( "t2" ) ) {
+                return false;
+            }
+            if ( !t3.getName().equals( "t3" ) ) {
+                return false;
+            }
+            if ( !t4.getName().equals( "t4" ) ) {
+                return false;
+            }
+            if ( !t3.getIdentifier().getValue().equals( "1-1" ) ) {
+                return false;
+            }
+            if ( !t3.getIdentifier().getProvider().equals( "treebank" ) ) {
+                return false;
+            }
+            if ( !t3.getNode( "root node" ).getNodeData().getSequence().getType().equals( "protein" ) ) {
+                return false;
+            }
+            if ( !t3.getNode( "root node" ).getNodeData().getSequence().getName()
+                    .equals( "Apoptosis facilitator Bcl-2-like 14 protein" ) ) {
+                return false;
+            }
+            if ( !t3.getNode( "root node" ).getNodeData().getSequence().getSymbol().equals( "BCL2L14" ) ) {
+                return false;
+            }
+            if ( !t3.getNode( "root node" ).getNodeData().getSequence().getAccession().getValue().equals( "Q9BZR8" ) ) {
+                return false;
+            }
+            if ( !t3.getNode( "root node" ).getNodeData().getSequence().getAccession().getSource().equals( "UniProtKB" ) ) {
+                return false;
+            }
+            if ( !( t3.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 1 ) ).getDesc()
+                    .equals( "apoptosis" ) ) {
+                return false;
+            }
+            if ( !( t3.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 1 ) ).getRef()
+                    .equals( "GO:0006915" ) ) {
+                return false;
+            }
+            if ( !( t3.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 1 ) ).getSource()
+                    .equals( "UniProtKB" ) ) {
+                return false;
+            }
+            if ( !( t3.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 1 ) ).getEvidence()
+                    .equals( "experimental" ) ) {
+                return false;
+            }
+            if ( !( t3.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 1 ) ).getType()
+                    .equals( "function" ) ) {
+                return false;
+            }
+            if ( ( t3.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 1 ) ).getConfidence()
+                    .getValue() != 1 ) {
+                return false;
+            }
+            if ( !( t3.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 1 ) ).getConfidence()
+                    .getType().equals( "ml" ) ) {
+                return false;
+            }
+            if ( !( t3.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 1 ) ).getDesc()
+                    .equals( "apoptosis" ) ) {
+                return false;
+            }
+            if ( ( t3.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 1 ) ).getProperties()
+                    .getProperty( "AFFY:expression" ).getAppliesTo() != AppliesTo.ANNOTATION ) {
+                return false;
+            }
+            if ( !( t3.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 1 ) ).getProperties()
+                    .getProperty( "AFFY:expression" ).getDataType().equals( "xsd:double" ) ) {
+                return false;
+            }
+            if ( !( t3.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 1 ) ).getProperties()
+                    .getProperty( "AFFY:expression" ).getRef().equals( "AFFY:expression" ) ) {
+                return false;
+            }
+            if ( !( t3.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 1 ) ).getProperties()
+                    .getProperty( "AFFY:expression" ).getUnit().equals( "AFFY:x" ) ) {
+                return false;
+            }
+            if ( !( t3.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 1 ) ).getProperties()
+                    .getProperty( "AFFY:expression" ).getValue().equals( "0.2" ) ) {
+                return false;
+            }
+            if ( !( t3.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 1 ) ).getProperties()
+                    .getProperty( "MED:disease" ).getValue().equals( "lymphoma" ) ) {
+                return false;
+            }
+            if ( !( t3.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 0 ) ).getRef()
+                    .equals( "GO:0005829" ) ) {
+                return false;
+            }
+            if ( !( t3.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getDesc()
+                    .equals( "intracellular organelle" ) ) {
+                return false;
+            }
+            if ( !( t3.getNode( "root node" ).getNodeData().getSequence().getUri( 0 ).getType().equals( "source" ) ) ) {
+                return false;
+            }
+            if ( !( t3.getNode( "root node" ).getNodeData().getSequence().getUri( 0 ).getDescription()
+                    .equals( "UniProt link" ) ) ) {
+                return false;
+            }
+            if ( !( t3.getNode( "root node" ).getNodeData().getSequence().getLocation().equals( "12p13-p12" ) ) ) {
+                return false;
+            }
+            //if ( !( t3.getNode( "root node" ).getNodeData().getDistribution().getDesc().equals( "irgendwo" ) ) ) {
+            //     return false;
+            //}
+            //            if ( !( t3.getNode( "root node" ).getNodeData().getReference().getDoi().equals( "10.1074/jbc.M005889200" ) ) ) {
+            //                return false;
+            //            }
+            //            if ( !t3.getNode( "root node" ).getNodeData().getTaxonomy().getType().equals( "host" ) ) {
+            //                return false;
+            //            }
+            //            if ( !t3.getNode( "root node" ).getNodeData().getTaxonomy().getTaxonomyCode().equals( "ECDYS" ) ) {
+            //                return false;
+            //            }
+            //            if ( !t3.getNode( "root node" ).getNodeData().getTaxonomy().getScientificName().equals( "ecdysozoa" ) ) {
+            //                return false;
+            //            }
+            //            if ( !t3.getNode( "root node" ).getNodeData().getTaxonomy().getCommonName().equals( "molting animals" ) ) {
+            //                return false;
+            //            }
+            //            if ( !t3.getNode( "root node" ).getNodeData().getTaxonomy().getIdentifier().getValue().equals( "1" ) ) {
+            //                return false;
+            //            }
+            //            if ( !t3.getNode( "root node" ).getNodeData().getTaxonomy().getIdentifier().getType().equals( "ncbi" ) ) {
+            //                return false;
+            //            }
+            //            if ( t3.getNode( "node bc" ).getNodeData().getSequence().getDomainArchitecture().getTotalLength() != 124 ) {
+            //                return false;
+            //            }
+            //            if ( !t3.getNode( "node bc" ).getNodeData().getSequence().getDomainArchitecture().getDomain( 0 ).getName()
+            //                    .equals( "B" ) ) {
+            //                return false;
+            //            }
+            //            if ( t3.getNode( "node bc" ).getNodeData().getSequence().getDomainArchitecture().getDomain( 0 ).getFrom() != 21 ) {
+            //                return false;
+            //            }
+            //            if ( t3.getNode( "node bc" ).getNodeData().getSequence().getDomainArchitecture().getDomain( 0 ).getTo() != 44 ) {
+            //                return false;
+            //            }
+            //            if ( t3.getNode( "node bc" ).getNodeData().getSequence().getDomainArchitecture().getDomain( 0 ).getLength() != 24 ) {
+            //                return false;
+            //            }
+            //            if ( t3.getNode( "node bc" ).getNodeData().getSequence().getDomainArchitecture().getDomain( 0 )
+            //                    .getConfidence() != 2144 ) {
+            //                return false;
+            //            }
+            //            if ( !t3.getNode( "node bc" ).getNodeData().getSequence().getDomainArchitecture().getDomain( 0 ).getId()
+            //                    .equals( "pfam" ) ) {
+            //                return false;
+            //            }
+            //            if ( t3.getNode( "node bb" ).getNodeData().getBinaryCharacters().getGainedCharacters().size() != 3 ) {
+            //                return false;
+            //            }
+            //            if ( t3.getNode( "node bb" ).getNodeData().getBinaryCharacters().getPresentCharacters().size() != 2 ) {
+            //                return false;
+            //            }
+            //            if ( t3.getNode( "node bb" ).getNodeData().getBinaryCharacters().getLostCharacters().size() != 1 ) {
+            //                return false;
+            //            }
+            //            if ( !t3.getNode( "node bb" ).getNodeData().getBinaryCharacters().getType().equals( "domains" ) ) {
+            //                return false;
+            //            }
+            //            if ( ( ( BinaryCharacters ) t3.getNode( "node bb" ).getNodeData().getBinaryCharacters().copy() )
+            //                    .getLostCount() != BinaryCharacters.COUNT_DEFAULT ) {
+            //                ;
+            //                return false;
+            //            }
+            //            if ( t3.getNode( "node b" ).getNodeData().getBinaryCharacters().getGainedCount() != 1 ) {
+            //                return false;
+            //            }
+            //            if ( t3.getNode( "node b" ).getNodeData().getBinaryCharacters().getGainedCharacters().size() != 1 ) {
+            //                return false;
+            //            }
+            //            if ( t3.getNode( "node b" ).getNodeData().getBinaryCharacters().getLostCount() != 3 ) {
+            //                return false;
+            //            }
+            //            if ( t3.getNode( "node b" ).getNodeData().getBinaryCharacters().getLostCharacters().size() != 3 ) {
+            //                return false;
+            //            }
+            //            if ( t3.getNode( "node b" ).getNodeData().getBinaryCharacters().getPresentCount() != 2 ) {
+            //                return false;
+            //            }
+            //            if ( t3.getNode( "node b" ).getNodeData().getBinaryCharacters().getPresentCharacters().size() != 2 ) {
+            //                return false;
+            //            }
+            //            if ( !t3.getNode( "node b" ).getNodeData().getBinaryCharacters().getType().equals( "characters" ) ) {
+            //                return false;
+            //            }
+            //            final Phylogeny[] phylogenies_1 = factory.create( Test.PATH_TO_TEST_DATA + "phyloxml_test_t4.xml",
+            //                                                              xml_parser );
+            //            if ( xml_parser.getErrorCount() > 0 ) {
+            //                System.out.println( xml_parser.getErrorMessages().toString() );
+            //                return false;
+            //            }
+            //            if ( phylogenies_1.length != 2 ) {
+            //                return false;
+            //            }
+            //            final Phylogeny a = phylogenies_1[ 0 ];
+            //            if ( !a.getName().equals( "tree 4" ) ) {
+            //                return false;
+            //            }
+            //            if ( a.getNumberOfExternalNodes() != 3 ) {
+            //                return false;
+            //            }
+            //            if ( !a.getNode( "node b1" ).getNodeData().getSequence().getName().equals( "b1 gene" ) ) {
+            //                return false;
+            //            }
+            //            if ( !a.getNode( "node b1" ).getNodeData().getTaxonomy().getCommonName().equals( "b1 species" ) ) {
+            //                return false;
+            //            }
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace( System.out );
+            return false;
+        }
+        return true;
+    }
+
+    private static boolean testBasicPhyloXMLparsingRoundtrip() {
+        try {
+            final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
+            final PhyloXmlParser xml_parser = new PhyloXmlParser();
+            if ( USE_LOCAL_PHYLOXML_SCHEMA ) {
+                xml_parser.setValidateAgainstSchema( PHYLOXML_LOCAL_XSD );
+            }
+            else {
+                xml_parser.setValidateAgainstSchema( PHYLOXML_REMOTE_XSD );
+            }
+            final Phylogeny[] phylogenies_0 = factory.create( Test.PATH_TO_TEST_DATA + "phyloxml_test_t1.xml",
+                                                              xml_parser );
+            if ( xml_parser.getErrorCount() > 0 ) {
+                System.out.println( xml_parser.getErrorMessages().toString() );
+                return false;
+            }
+            if ( phylogenies_0.length != 4 ) {
+                return false;
+            }
+            final StringBuffer t1_sb = new StringBuffer( phylogenies_0[ 0 ].toPhyloXML( 0 ) );
+            final Phylogeny[] phylogenies_t1 = factory.create( t1_sb, xml_parser );
+            if ( phylogenies_t1.length != 1 ) {
+                return false;
+            }
+            final Phylogeny t1_rt = phylogenies_t1[ 0 ];
+            if ( !t1_rt.getDistanceUnit().equals( "cc" ) ) {
+                return false;
+            }
+            if ( !t1_rt.isRooted() ) {
+                return false;
+            }
+            if ( t1_rt.isRerootable() ) {
+                return false;
+            }
+            if ( !t1_rt.getType().equals( "gene_tree" ) ) {
+                return false;
+            }
+            final StringBuffer t2_sb = new StringBuffer( phylogenies_0[ 1 ].toPhyloXML( 0 ) );
+            final Phylogeny[] phylogenies_t2 = factory.create( t2_sb, xml_parser );
+            final Phylogeny t2_rt = phylogenies_t2[ 0 ];
+            if ( t2_rt.getNode( "node a" ).getNodeData().getTaxonomies().size() != 2 ) {
+                return false;
+            }
+            if ( !t2_rt.getNode( "node a" ).getNodeData().getTaxonomy( 0 ).getCommonName().equals( "some parasite" ) ) {
+                return false;
+            }
+            if ( !t2_rt.getNode( "node a" ).getNodeData().getTaxonomy( 1 ).getCommonName().equals( "the host" ) ) {
+                return false;
+            }
+            if ( t2_rt.getNode( "node a" ).getNodeData().getSequences().size() != 2 ) {
+                return false;
+            }
+            if ( !t2_rt.getNode( "node a" ).getNodeData().getSequence( 0 ).getMolecularSequence()
+                    .startsWith( "actgtgggggt" ) ) {
+                return false;
+            }
+            if ( !t2_rt.getNode( "node a" ).getNodeData().getSequence( 1 ).getMolecularSequence()
+                    .startsWith( "ctgtgatgcat" ) ) {
+                return false;
+            }
+            final StringBuffer t3_sb_0 = new StringBuffer( phylogenies_0[ 2 ].toPhyloXML( 0 ) );
+            final Phylogeny[] phylogenies_1_0 = factory.create( t3_sb_0, xml_parser );
+            final StringBuffer t3_sb = new StringBuffer( phylogenies_1_0[ 0 ].toPhyloXML( 0 ) );
+            final Phylogeny[] phylogenies_1 = factory.create( t3_sb, xml_parser );
+            if ( phylogenies_1.length != 1 ) {
+                return false;
+            }
+            final Phylogeny t3_rt = phylogenies_1[ 0 ];
+            if ( !t3_rt.getName().equals( "t3" ) ) {
+                return false;
+            }
+            if ( t3_rt.getNumberOfExternalNodes() != 4 ) {
+                return false;
+            }
+            if ( !t3_rt.getIdentifier().getValue().equals( "1-1" ) ) {
+                return false;
+            }
+            if ( !t3_rt.getIdentifier().getProvider().equals( "treebank" ) ) {
+                return false;
+            }
+            if ( !t3_rt.getNode( "root node" ).getNodeData().getSequence().getType().equals( "protein" ) ) {
+                return false;
+            }
+            if ( !t3_rt.getNode( "root node" ).getNodeData().getSequence().getName()
+                    .equals( "Apoptosis facilitator Bcl-2-like 14 protein" ) ) {
+                return false;
+            }
+            if ( !t3_rt.getNode( "root node" ).getNodeData().getSequence().getSymbol().equals( "BCL2L14" ) ) {
+                return false;
+            }
+            if ( !t3_rt.getNode( "root node" ).getNodeData().getSequence().getAccession().getValue().equals( "Q9BZR8" ) ) {
+                return false;
+            }
+            if ( !t3_rt.getNode( "root node" ).getNodeData().getSequence().getAccession().getSource()
+                    .equals( "UniProtKB" ) ) {
+                return false;
+            }
+            if ( !( t3_rt.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 1 ) ).getDesc()
+                    .equals( "apoptosis" ) ) {
+                return false;
+            }
+            if ( !( t3_rt.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 1 ) ).getRef()
+                    .equals( "GO:0006915" ) ) {
+                return false;
+            }
+            if ( !( t3_rt.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 1 ) ).getSource()
+                    .equals( "UniProtKB" ) ) {
+                return false;
+            }
+            if ( !( t3_rt.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 1 ) ).getEvidence()
+                    .equals( "experimental" ) ) {
+                return false;
+            }
+            if ( !( t3_rt.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 1 ) ).getType()
+                    .equals( "function" ) ) {
+                return false;
+            }
+            if ( ( t3_rt.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 1 ) ).getConfidence()
+                    .getValue() != 1 ) {
+                return false;
+            }
+            if ( !( t3_rt.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 1 ) ).getConfidence()
+                    .getType().equals( "ml" ) ) {
+                return false;
+            }
+            if ( !( t3_rt.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 1 ) ).getDesc()
+                    .equals( "apoptosis" ) ) {
+                return false;
+            }
+            if ( ( t3_rt.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 1 ) ).getProperties()
+                    .getProperty( "AFFY:expression" ).getAppliesTo() != AppliesTo.ANNOTATION ) {
+                return false;
+            }
+            if ( !( t3_rt.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 1 ) ).getProperties()
+                    .getProperty( "AFFY:expression" ).getDataType().equals( "xsd:double" ) ) {
+                return false;
+            }
+            if ( !( t3_rt.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 1 ) ).getProperties()
+                    .getProperty( "AFFY:expression" ).getRef().equals( "AFFY:expression" ) ) {
+                return false;
+            }
+            if ( !( t3_rt.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 1 ) ).getProperties()
+                    .getProperty( "AFFY:expression" ).getUnit().equals( "AFFY:x" ) ) {
+                return false;
+            }
+            if ( !( t3_rt.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 1 ) ).getProperties()
+                    .getProperty( "AFFY:expression" ).getValue().equals( "0.2" ) ) {
+                return false;
+            }
+            if ( !( t3_rt.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 1 ) ).getProperties()
+                    .getProperty( "MED:disease" ).getValue().equals( "lymphoma" ) ) {
+                return false;
+            }
+            if ( !( t3_rt.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 0 ) ).getRef()
+                    .equals( "GO:0005829" ) ) {
+                return false;
+            }
+            if ( !( t3_rt.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getDesc()
+                    .equals( "intracellular organelle" ) ) {
+                return false;
+            }
+            if ( !( t3_rt.getNode( "root node" ).getNodeData().getSequence().getUri( 0 ).getType().equals( "source" ) ) ) {
+                return false;
+            }
+            if ( !( t3_rt.getNode( "root node" ).getNodeData().getSequence().getUri( 0 ).getDescription()
+                    .equals( "UniProt link" ) ) ) {
+                return false;
+            }
+            if ( !( t3_rt.getNode( "root node" ).getNodeData().getSequence().getLocation().equals( "12p13-p12" ) ) ) {
+                return false;
+            }
+            if ( !( t3_rt.getNode( "root node" ).getNodeData().getReference().getDoi().equals( "10.1038/387489a0" ) ) ) {
+                return false;
+            }
+            if ( !( t3_rt.getNode( "root node" ).getNodeData().getReference().getDescription()
+                    .equals( "Aguinaldo, A. M. A.; J. M. Turbeville, L. S. Linford, M. C. Rivera, J. R. Garey, R. A. Raff, & J. A. Lake (1997). \"Evidence for a clade of nematodes, arthropods and other moulting animals\". Nature 387 (6632): 489–493." ) ) ) {
+                return false;
+            }
+            if ( !t3_rt.getNode( "root node" ).getNodeData().getTaxonomy().getTaxonomyCode().equals( "ECDYS" ) ) {
+                return false;
+            }
+            if ( !t3_rt.getNode( "root node" ).getNodeData().getTaxonomy().getScientificName().equals( "ecdysozoa" ) ) {
+                return false;
+            }
+            if ( !t3_rt.getNode( "root node" ).getNodeData().getTaxonomy().getCommonName().equals( "molting animals" ) ) {
+                return false;
+            }
+            if ( !t3_rt.getNode( "root node" ).getNodeData().getTaxonomy().getIdentifier().getValue().equals( "1" ) ) {
+                return false;
+            }
+            if ( !t3_rt.getNode( "root node" ).getNodeData().getTaxonomy().getIdentifier().getProvider()
+                    .equals( "ncbi" ) ) {
+                return false;
+            }
+            if ( t3_rt.getNode( "node bc" ).getNodeData().getSequence().getDomainArchitecture().getTotalLength() != 124 ) {
+                return false;
+            }
+            if ( !t3_rt.getNode( "node bc" ).getNodeData().getSequence().getDomainArchitecture().getDomain( 0 )
+                    .getName().equals( "B" ) ) {
+                return false;
+            }
+            if ( t3_rt.getNode( "node bc" ).getNodeData().getSequence().getDomainArchitecture().getDomain( 0 )
+                    .getFrom() != 21 ) {
+                return false;
+            }
+            if ( t3_rt.getNode( "node bc" ).getNodeData().getSequence().getDomainArchitecture().getDomain( 0 ).getTo() != 44 ) {
+                return false;
+            }
+            if ( t3_rt.getNode( "node bc" ).getNodeData().getSequence().getDomainArchitecture().getDomain( 0 )
+                    .getLength() != 24 ) {
+                return false;
+            }
+            if ( t3_rt.getNode( "node bc" ).getNodeData().getSequence().getDomainArchitecture().getDomain( 0 )
+                    .getConfidence() != 2144 ) {
+                return false;
+            }
+            if ( !t3_rt.getNode( "node bc" ).getNodeData().getSequence().getDomainArchitecture().getDomain( 0 ).getId()
+                    .equals( "pfam" ) ) {
+                return false;
+            }
+            if ( t3_rt.getNode( "node bb" ).getNodeData().getBinaryCharacters().getGainedCharacters().size() != 3 ) {
+                return false;
+            }
+            if ( t3_rt.getNode( "node bb" ).getNodeData().getBinaryCharacters().getPresentCharacters().size() != 2 ) {
+                return false;
+            }
+            if ( t3_rt.getNode( "node bb" ).getNodeData().getBinaryCharacters().getLostCharacters().size() != 1 ) {
+                return false;
+            }
+            if ( !t3_rt.getNode( "node bb" ).getNodeData().getBinaryCharacters().getType().equals( "domains" ) ) {
+                return false;
+            }
+            final Taxonomy taxbb = t3_rt.getNode( "node bb" ).getNodeData().getTaxonomy();
+            if ( !taxbb.getAuthority().equals( "Stephenson, 1935" ) ) {
+                return false;
+            }
+            if ( !taxbb.getCommonName().equals( "starlet sea anemone" ) ) {
+                return false;
+            }
+            if ( !taxbb.getIdentifier().getProvider().equals( "EOL" ) ) {
+                return false;
+            }
+            if ( !taxbb.getIdentifier().getValue().equals( "704294" ) ) {
+                return false;
+            }
+            if ( !taxbb.getTaxonomyCode().equals( "NEMVE" ) ) {
+                return false;
+            }
+            if ( !taxbb.getScientificName().equals( "Nematostella vectensis" ) ) {
+                return false;
+            }
+            if ( taxbb.getSynonyms().size() != 2 ) {
+                return false;
+            }
+            if ( !taxbb.getSynonyms().contains( "Nematostella vectensis Stephenson1935" ) ) {
+                return false;
+            }
+            if ( !taxbb.getSynonyms().contains( "See Anemone" ) ) {
+                return false;
+            }
+            if ( !taxbb.getUri( 0 ).getDescription().equals( "EOL" ) ) {
+                return false;
+            }
+            if ( !taxbb.getUri( 0 ).getType().equals( "linkout" ) ) {
+                return false;
+            }
+            if ( !taxbb.getUri( 0 ).getValue().toString().equals( "http://www.eol.org/pages/704294" ) ) {
+                return false;
+            }
+            if ( ( ( BinaryCharacters ) t3_rt.getNode( "node bb" ).getNodeData().getBinaryCharacters().copy() )
+                    .getLostCount() != BinaryCharacters.COUNT_DEFAULT ) {
+                ;
+                return false;
+            }
+            if ( t3_rt.getNode( "node b" ).getNodeData().getBinaryCharacters().getGainedCount() != 1 ) {
+                return false;
+            }
+            if ( t3_rt.getNode( "node b" ).getNodeData().getBinaryCharacters().getGainedCharacters().size() != 1 ) {
+                return false;
+            }
+            if ( t3_rt.getNode( "node b" ).getNodeData().getBinaryCharacters().getLostCount() != 3 ) {
+                return false;
+            }
+            if ( t3_rt.getNode( "node b" ).getNodeData().getBinaryCharacters().getLostCharacters().size() != 3 ) {
+                return false;
+            }
+            if ( t3_rt.getNode( "node b" ).getNodeData().getBinaryCharacters().getPresentCount() != 2 ) {
+                return false;
+            }
+            if ( t3_rt.getNode( "node b" ).getNodeData().getBinaryCharacters().getPresentCharacters().size() != 2 ) {
+                return false;
+            }
+            if ( !t3_rt.getNode( "node b" ).getNodeData().getBinaryCharacters().getType().equals( "characters" ) ) {
+                return false;
+            }
+            //
+            if ( !t3_rt.getNode( "node ba" ).getNodeData().getDate().getDesc().equals( "Silurian" ) ) {
+                return false;
+            }
+            if ( !t3_rt.getNode( "node ba" ).getNodeData().getDate().getValue().toPlainString()
+                    .equalsIgnoreCase( "435" ) ) {
+                return false;
+            }
+            if ( !t3_rt.getNode( "node ba" ).getNodeData().getDate().getMin().toPlainString().equalsIgnoreCase( "416" ) ) {
+                return false;
+            }
+            if ( !t3_rt.getNode( "node ba" ).getNodeData().getDate().getMax().toPlainString()
+                    .equalsIgnoreCase( "443.7" ) ) {
+                return false;
+            }
+            if ( !t3_rt.getNode( "node ba" ).getNodeData().getDate().getUnit().equals( "mya" ) ) {
+                return false;
+            }
+            if ( !t3_rt.getNode( "node bb" ).getNodeData().getDate().getDesc().equals( "Triassic" ) ) {
+                return false;
+            }
+            if ( !t3_rt.getNode( "node bc" ).getNodeData().getDate().getValue().toPlainString()
+                    .equalsIgnoreCase( "433" ) ) {
+                return false;
+            }
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace( System.out );
+            return false;
+        }
+        return true;
+    }
+
+    private static boolean testBasicPhyloXMLparsingValidating() {
+        try {
+            final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
+            PhyloXmlParser xml_parser = null;
+            try {
+                xml_parser = PhyloXmlParser.createPhyloXmlParserXsdValidating();
+            }
+            catch ( final Exception e ) {
+                // Do nothing -- means were not running from jar.
+            }
+            if ( xml_parser == null ) {
+                xml_parser = new PhyloXmlParser();
+                if ( USE_LOCAL_PHYLOXML_SCHEMA ) {
+                    xml_parser.setValidateAgainstSchema( PHYLOXML_LOCAL_XSD );
+                }
+                else {
+                    xml_parser.setValidateAgainstSchema( PHYLOXML_REMOTE_XSD );
+                }
+            }
+            final Phylogeny[] phylogenies_0 = factory.create( Test.PATH_TO_TEST_DATA + "phyloxml_test_t1.xml",
+                                                              xml_parser );
+            if ( xml_parser.getErrorCount() > 0 ) {
+                System.out.println( xml_parser.getErrorMessages().toString() );
+                return false;
+            }
+            if ( phylogenies_0.length != 4 ) {
+                return false;
+            }
+            final Phylogeny t1 = phylogenies_0[ 0 ];
+            final Phylogeny t2 = phylogenies_0[ 1 ];
+            final Phylogeny t3 = phylogenies_0[ 2 ];
+            final Phylogeny t4 = phylogenies_0[ 3 ];
+            if ( !t1.getName().equals( "t1" ) ) {
+                return false;
+            }
+            if ( !t2.getName().equals( "t2" ) ) {
+                return false;
+            }
+            if ( !t3.getName().equals( "t3" ) ) {
+                return false;
+            }
+            if ( !t4.getName().equals( "t4" ) ) {
+                return false;
+            }
+            if ( t1.getNumberOfExternalNodes() != 1 ) {
+                return false;
+            }
+            if ( t2.getNumberOfExternalNodes() != 2 ) {
+                return false;
+            }
+            if ( t3.getNumberOfExternalNodes() != 4 ) {
+                return false;
+            }
+            final String x2 = Test.PATH_TO_TEST_DATA + "phyloxml_test_t1.xml";
+            final Phylogeny[] phylogenies_1 = factory.create( x2, xml_parser );
+            if ( xml_parser.getErrorCount() > 0 ) {
+                System.out.println( "errors:" );
+                System.out.println( xml_parser.getErrorMessages().toString() );
+                return false;
+            }
+            if ( phylogenies_1.length != 4 ) {
+                return false;
+            }
+            final Phylogeny[] phylogenies_2 = factory.create( Test.PATH_TO_TEST_DATA + "phyloxml_test_t3.xml",
+                                                              xml_parser );
+            if ( xml_parser.getErrorCount() > 0 ) {
+                System.out.println( "errors:" );
+                System.out.println( xml_parser.getErrorMessages().toString() );
+                return false;
+            }
+            if ( phylogenies_2.length != 1 ) {
+                return false;
+            }
+            if ( phylogenies_2[ 0 ].getNumberOfExternalNodes() != 2 ) {
+                return false;
+            }
+            final Phylogeny[] phylogenies_3 = factory.create( Test.PATH_TO_TEST_DATA + "phyloxml_test_t4.xml",
+                                                              xml_parser );
+            if ( xml_parser.getErrorCount() > 0 ) {
+                System.out.println( xml_parser.getErrorMessages().toString() );
+                return false;
+            }
+            if ( phylogenies_3.length != 2 ) {
+                return false;
+            }
+            final Phylogeny a = phylogenies_3[ 0 ];
+            if ( !a.getName().equals( "tree 4" ) ) {
+                return false;
+            }
+            if ( a.getNumberOfExternalNodes() != 3 ) {
+                return false;
+            }
+            if ( !a.getNode( "node b1" ).getNodeData().getSequence().getName().equals( "b1 gene" ) ) {
+                return false;
+            }
+            if ( !a.getNode( "node b1" ).getNodeData().getTaxonomy().getCommonName().equals( "b1 species" ) ) {
+                return false;
+            }
+            final Phylogeny[] phylogenies_4 = factory.create( Test.PATH_TO_TEST_DATA + "special_characters.xml",
+                                                              xml_parser );
+            if ( xml_parser.getErrorCount() > 0 ) {
+                System.out.println( xml_parser.getErrorMessages().toString() );
+                return false;
+            }
+            if ( phylogenies_4.length != 1 ) {
+                return false;
+            }
+            final Phylogeny s = phylogenies_4[ 0 ];
+            if ( s.getNumberOfExternalNodes() != 6 ) {
+                return false;
+            }
+            s.getNode( "first" );
+            s.getNode( "<>" );
+            s.getNode( "\"<a'b&c'd\">\"" );
+            s.getNode( "'''\"" );
+            s.getNode( "\"\"\"" );
+            s.getNode( "dick & doof" );
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace( System.out );
+            return false;
+        }
+        return true;
+    }
+
+    private static boolean testBasicTable() {
+        try {
+            final BasicTable<String> t0 = new BasicTable<String>();
+            if ( t0.getNumberOfColumns() != 0 ) {
+                return false;
+            }
+            if ( t0.getNumberOfRows() != 0 ) {
+                return false;
+            }
+            t0.setValue( 3, 2, "23" );
+            t0.setValue( 10, 1, "error" );
+            t0.setValue( 10, 1, "110" );
+            t0.setValue( 9, 1, "19" );
+            t0.setValue( 1, 10, "101" );
+            t0.setValue( 10, 10, "1010" );
+            t0.setValue( 100, 10, "10100" );
+            t0.setValue( 0, 0, "00" );
+            if ( !t0.getValue( 3, 2 ).equals( "23" ) ) {
+                return false;
+            }
+            if ( !t0.getValue( 10, 1 ).equals( "110" ) ) {
+                return false;
+            }
+            if ( !t0.getValueAsString( 1, 10 ).equals( "101" ) ) {
+                return false;
+            }
+            if ( !t0.getValueAsString( 10, 10 ).equals( "1010" ) ) {
+                return false;
+            }
+            if ( !t0.getValueAsString( 100, 10 ).equals( "10100" ) ) {
+                return false;
+            }
+            if ( !t0.getValueAsString( 9, 1 ).equals( "19" ) ) {
+                return false;
+            }
+            if ( !t0.getValueAsString( 0, 0 ).equals( "00" ) ) {
+                return false;
+            }
+            if ( t0.getNumberOfColumns() != 101 ) {
+                return false;
+            }
+            if ( t0.getNumberOfRows() != 11 ) {
+                return false;
+            }
+            if ( t0.getValueAsString( 49, 4 ) != null ) {
+                return false;
+            }
+            final String l = ForesterUtil.getLineSeparator();
+            final StringBuffer source = new StringBuffer();
+            source.append( "" + l );
+            source.append( "# 1 1 1 1 1 1 1 1" + l );
+            source.append( " 00 01 02 03" + l );
+            source.append( "   10 11 12 13  " + l );
+            source.append( "20 21 22 23 " + l );
+            source.append( "    30  31    32 33" + l );
+            source.append( "40 41 42 43" + l );
+            source.append( "  # 1 1 1 1 1 " + l );
+            source.append( "50 51 52 53 54" + l );
+            final BasicTable<String> t1 = BasicTableParser.parse( source.toString(), " " );
+            if ( t1.getNumberOfColumns() != 5 ) {
+                return false;
+            }
+            if ( t1.getNumberOfRows() != 6 ) {
+                return false;
+            }
+            if ( !t1.getValueAsString( 0, 0 ).equals( "00" ) ) {
+                return false;
+            }
+            if ( !t1.getValueAsString( 1, 0 ).equals( "01" ) ) {
+                return false;
+            }
+            if ( !t1.getValueAsString( 3, 0 ).equals( "03" ) ) {
+                return false;
+            }
+            if ( !t1.getValueAsString( 4, 5 ).equals( "54" ) ) {
+                return false;
+            }
+            final StringBuffer source1 = new StringBuffer();
+            source1.append( "" + l );
+            source1.append( "# 1; 1; 1; 1 ;1 ;1; 1 ;1;" + l );
+            source1.append( " 00; 01 ;02;03" + l );
+            source1.append( "   10; 11; 12; 13  " + l );
+            source1.append( "20; 21; 22; 23 " + l );
+            source1.append( "    30;  31;    32; 33" + l );
+            source1.append( "40;41;42;43" + l );
+            source1.append( "  # 1 1 1 1 1 " + l );
+            source1.append( ";;;50  ;  ;52; 53;;54   " + l );
+            final BasicTable<String> t2 = BasicTableParser.parse( source1.toString(), ";" );
+            if ( t2.getNumberOfColumns() != 5 ) {
+                return false;
+            }
+            if ( t2.getNumberOfRows() != 6 ) {
+                return false;
+            }
+            if ( !t2.getValueAsString( 0, 0 ).equals( "00" ) ) {
+                return false;
+            }
+            if ( !t2.getValueAsString( 1, 0 ).equals( "01" ) ) {
+                return false;
+            }
+            if ( !t2.getValueAsString( 3, 0 ).equals( "03" ) ) {
+                return false;
+            }
+            if ( !t2.getValueAsString( 3, 3 ).equals( "33" ) ) {
+                return false;
+            }
+            if ( !t2.getValueAsString( 3, 5 ).equals( "53" ) ) {
+                return false;
+            }
+            if ( !t2.getValueAsString( 1, 5 ).equals( "" ) ) {
+                return false;
+            }
+            final StringBuffer source2 = new StringBuffer();
+            source2.append( "" + l );
+            source2.append( "comment: 1; 1; 1; 1 ;1 ;1; 1 ;1;" + l );
+            source2.append( " 00; 01 ;02;03" + l );
+            source2.append( "   10; 11; 12; 13  " + l );
+            source2.append( "20; 21; 22; 23 " + l );
+            source2.append( "                     " + l );
+            source2.append( "    30;  31;    32; 33" + l );
+            source2.append( "40;41;42;43" + l );
+            source2.append( "  comment: 1 1 1 1 1 " + l );
+            source2.append( ";;;50  ;   52; 53;;54   " + l );
+            final List<BasicTable<String>> tl = BasicTableParser.parse( source2.toString(),
+                                                                        ";",
+                                                                        false,
+                                                                        "comment:",
+                                                                        false );
+            if ( tl.size() != 2 ) {
+                return false;
+            }
+            final BasicTable<String> t3 = tl.get( 0 );
+            final BasicTable<String> t4 = tl.get( 1 );
+            if ( t3.getNumberOfColumns() != 4 ) {
+                return false;
+            }
+            if ( t3.getNumberOfRows() != 3 ) {
+                return false;
+            }
+            if ( t4.getNumberOfColumns() != 4 ) {
+                return false;
+            }
+            if ( t4.getNumberOfRows() != 3 ) {
+                return false;
+            }
+            if ( !t3.getValueAsString( 0, 0 ).equals( "00" ) ) {
+                return false;
+            }
+            if ( !t4.getValueAsString( 0, 0 ).equals( "30" ) ) {
+                return false;
+            }
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace( System.out );
+            return false;
+        }
+        return true;
+    }
+
+    private static boolean testBasicTolXMLparsing() {
+        try {
+            final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
+            final TolParser parser = new TolParser();
+            final Phylogeny[] phylogenies_0 = factory.create( Test.PATH_TO_TEST_DATA + "tol_2484.tol", parser );
+            if ( parser.getErrorCount() > 0 ) {
+                System.out.println( parser.getErrorMessages().toString() );
+                return false;
+            }
+            if ( phylogenies_0.length != 1 ) {
+                return false;
+            }
+            final Phylogeny t1 = phylogenies_0[ 0 ];
+            if ( t1.getNumberOfExternalNodes() != 5 ) {
+                return false;
+            }
+            if ( !t1.isRooted() ) {
+                return false;
+            }
+            if ( !t1.getRoot().getNodeData().getTaxonomy().getScientificName().equals( "Mesozoa" ) ) {
+                return false;
+            }
+            if ( !t1.getRoot().getNodeData().getTaxonomy().getIdentifier().getValue().equals( "2484" ) ) {
+                return false;
+            }
+            if ( !t1.getRoot().getChildNode( 0 ).getNodeData().getTaxonomy().getScientificName().equals( "Rhombozoa" ) ) {
+                return false;
+            }
+            if ( t1.getRoot().getChildNode( 0 ).getNumberOfDescendants() != 3 ) {
+                return false;
+            }
+            final Phylogeny[] phylogenies_1 = factory.create( Test.PATH_TO_TEST_DATA + "tol_2.tol", parser );
+            if ( parser.getErrorCount() > 0 ) {
+                System.out.println( parser.getErrorMessages().toString() );
+                return false;
+            }
+            if ( phylogenies_1.length != 1 ) {
+                return false;
+            }
+            final Phylogeny t2 = phylogenies_1[ 0 ];
+            if ( t2.getNumberOfExternalNodes() != 664 ) {
+                return false;
+            }
+            if ( !t2.isRooted() ) {
+                return false;
+            }
+            if ( !t2.getRoot().getNodeData().getTaxonomy().getScientificName().equals( "Eubacteria" ) ) {
+                return false;
+            }
+            if ( !t2.getRoot().getNodeData().getTaxonomy().getIdentifier().getValue().equals( "2" ) ) {
+                return false;
+            }
+            if ( t2.getRoot().getNumberOfDescendants() != 24 ) {
+                return false;
+            }
+            if ( t2.getRoot().getNumberOfDescendants() != 24 ) {
+                return false;
+            }
+            if ( !t2.getRoot().getChildNode( 0 ).getNodeData().getTaxonomy().getScientificName().equals( "Aquificae" ) ) {
+                return false;
+            }
+            if ( !t2.getRoot().getChildNode( 0 ).getChildNode( 0 ).getNodeData().getTaxonomy().getScientificName()
+                    .equals( "Aquifex" ) ) {
+                return false;
+            }
+            final Phylogeny[] phylogenies_2 = factory.create( Test.PATH_TO_TEST_DATA + "tol_5.tol", parser );
+            if ( parser.getErrorCount() > 0 ) {
+                System.out.println( parser.getErrorMessages().toString() );
+                return false;
+            }
+            if ( phylogenies_2.length != 1 ) {
+                return false;
+            }
+            final Phylogeny t3 = phylogenies_2[ 0 ];
+            if ( t3.getNumberOfExternalNodes() != 184 ) {
+                return false;
+            }
+            if ( !t3.getRoot().getNodeData().getTaxonomy().getScientificName().equals( "Viruses" ) ) {
+                return false;
+            }
+            if ( !t3.getRoot().getNodeData().getTaxonomy().getIdentifier().getValue().equals( "5" ) ) {
+                return false;
+            }
+            if ( t3.getRoot().getNumberOfDescendants() != 6 ) {
+                return false;
+            }
+            final Phylogeny[] phylogenies_3 = factory.create( Test.PATH_TO_TEST_DATA + "tol_4567.tol", parser );
+            if ( parser.getErrorCount() > 0 ) {
+                System.out.println( parser.getErrorMessages().toString() );
+                return false;
+            }
+            if ( phylogenies_3.length != 1 ) {
+                return false;
+            }
+            final Phylogeny t4 = phylogenies_3[ 0 ];
+            if ( t4.getNumberOfExternalNodes() != 1 ) {
+                return false;
+            }
+            if ( !t4.getRoot().getNodeData().getTaxonomy().getScientificName().equals( "Marpissa decorata" ) ) {
+                return false;
+            }
+            if ( !t4.getRoot().getNodeData().getTaxonomy().getIdentifier().getValue().equals( "4567" ) ) {
+                return false;
+            }
+            if ( t4.getRoot().getNumberOfDescendants() != 0 ) {
+                return false;
+            }
+            final Phylogeny[] phylogenies_4 = factory.create( Test.PATH_TO_TEST_DATA + "tol_16299.tol", parser );
+            if ( parser.getErrorCount() > 0 ) {
+                System.out.println( parser.getErrorMessages().toString() );
+                return false;
+            }
+            if ( phylogenies_4.length != 1 ) {
+                return false;
+            }
+            final Phylogeny t5 = phylogenies_4[ 0 ];
+            if ( t5.getNumberOfExternalNodes() != 13 ) {
+                return false;
+            }
+            if ( !t5.getRoot().getNodeData().getTaxonomy().getScientificName().equals( "Hominidae" ) ) {
+                return false;
+            }
+            if ( !t5.getRoot().getNodeData().getTaxonomy().getIdentifier().getValue().equals( "16299" ) ) {
+                return false;
+            }
+            if ( t5.getRoot().getNumberOfDescendants() != 2 ) {
+                return false;
+            }
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace( System.out );
+            return false;
+        }
+        return true;
+    }
+
+    private static boolean testBasicTreeMethods() {
+        try {
+            final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
+            final Phylogeny t1 = factory.create();
+            if ( !t1.isEmpty() ) {
+                return false;
+            }
+            final Phylogeny t2 = factory.create( "((A:1,B:2)AB:1,(C:3,D:5)CD:3)ABCD:0.5", new NHXParser() )[ 0 ];
+            if ( t2.getNumberOfExternalNodes() != 4 ) {
+                return false;
+            }
+            if ( t2.getHeight() != 8.5 ) {
+                return false;
+            }
+            if ( !t2.isCompletelyBinary() ) {
+                return false;
+            }
+            if ( t2.isEmpty() ) {
+                return false;
+            }
+            final Phylogeny t3 = factory.create( "((A:1,B:2,C:10)ABC:1,(D:3,E:5)DE:3)", new NHXParser() )[ 0 ];
+            if ( t3.getNumberOfExternalNodes() != 5 ) {
+                return false;
+            }
+            if ( t3.getHeight() != 11 ) {
+                return false;
+            }
+            if ( t3.isCompletelyBinary() ) {
+                return false;
+            }
+            final PhylogenyNode n = t3.getNode( "ABC" );
+            PhylogenyNodeIterator it;
+            for( it = n.iterateChildNodesForward(); it.hasNext(); ) {
+                it.next();
+            }
+            for( it.reset(); it.hasNext(); ) {
+                it.next();
+            }
+            final PhylogenyNodeIterator it2 = n.iterateChildNodesForward();
+            if ( !it2.next().getName().equals( "A" ) ) {
+                return false;
+            }
+            if ( !it2.next().getName().equals( "B" ) ) {
+                return false;
+            }
+            if ( !it2.next().getName().equals( "C" ) ) {
+                return false;
+            }
+            if ( it2.hasNext() ) {
+                return false;
+            }
+            final Phylogeny t4 = factory.create( "((A:1,B:2,C:10)ABC:1,(D:3,E:5)DE:3,(F,G,H,I))", new NHXParser() )[ 0 ];
+            if ( t4.getNumberOfExternalNodes() != 9 ) {
+                return false;
+            }
+            if ( t4.getHeight() != 11 ) {
+                return false;
+            }
+            if ( t4.isCompletelyBinary() ) {
+                return false;
+            }
+            final StringBuffer sb5 = new StringBuffer( "(((A11:2)A1:2,(A21:1,A22:2,A23)A2:11,A3:2)A:2,B:10,C:3,D:8)" );
+            final Phylogeny t5 = factory.create( sb5, new NHXParser() )[ 0 ];
+            if ( t5.getNumberOfExternalNodes() != 8 ) {
+                return false;
+            }
+            if ( t5.getHeight() != 15 ) {
+                return false;
+            }
+            final StringBuffer sb6 = new StringBuffer( "(X,Y,Z,(((A111)A11:2)A1:2,(X,Y,Z,A21:1,A22:2,A23)A2:11,A3:2)A:2,B:10,C:3,D:8)" );
+            final Phylogeny t6 = factory.create( sb6, new NHXParser() )[ 0 ];
+            if ( t6.getHeight() != 15 ) {
+                return false;
+            }
+            final StringBuffer sb7 = new StringBuffer( "(((A11:2)A1:2,(A21:1,A22:2,A23)A2:11,A3:2)A:2,B:10,C:15,D:8)" );
+            final Phylogeny t7 = factory.create( sb7, new NHXParser() )[ 0 ];
+            if ( t7.getHeight() != 15 ) {
+                return false;
+            }
+            final StringBuffer sb8 = new StringBuffer( "(((A11:11)A1:2,(A21:2,A22:2,A23,A24,AA:)A2:11,A3:2)A:2,B:15,C:15,D:15)" );
+            final Phylogeny t8 = factory.create( sb8, new NHXParser() )[ 0 ];
+            if ( t8.getNumberOfExternalNodes() != 10 ) {
+                return false;
+            }
+            if ( t8.getHeight() != 15 ) {
+                return false;
+            }
+            final char[] a9 = new char[] {};
+            final Phylogeny t9 = factory.create( a9, new NHXParser() )[ 0 ];
+            if ( t9.getHeight() != 0 ) {
+                return false;
+            }
+            final char[] a10 = new char[] { 'a', ':', '6' };
+            final Phylogeny t10 = factory.create( a10, new NHXParser() )[ 0 ];
+            if ( t10.getHeight() != 6 ) {
+                return false;
+            }
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace( System.out );
+            return false;
+        }
+        return true;
+    }
+
+    private static boolean testConfidenceAssessor() {
+        try {
+            final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
+            final Phylogeny t0 = factory.create( "((((A,B)ab,C)abc,D)abcd,E)abcde", new NHXParser() )[ 0 ];
+            final Phylogeny[] ev0 = factory
+                    .create( "((((A,B),C),D),E);((((A,B),C),D),E);((((A,B),C),D),E);((((A,B),C),D),E);",
+                             new NHXParser() );
+            ConfidenceAssessor.evaluate( "bootstrap", ev0, t0, false, 1, 0, 2 );
+            if ( !isEqual( t0.getNode( "ab" ).getBranchData().getConfidence( 0 ).getValue(), 3 ) ) {
+                return false;
+            }
+            if ( !isEqual( t0.getNode( "abc" ).getBranchData().getConfidence( 0 ).getValue(), 3 ) ) {
+                return false;
+            }
+            final Phylogeny t1 = factory.create( "((((A,B)ab[&&NHX:B=50],C)abc,D)abcd,E)abcde", new NHXParser() )[ 0 ];
+            final Phylogeny[] ev1 = factory
+                    .create( "((((A,B),C),D),E);((A,B),((E,D),C));(((A,B),C),(E,D));(A,(((E,D),C),B));(B,(A,((E,D),C)));(C,((E,D),(A,B)));(D,(E,((A,B),C)));",
+                             new NHXParser() );
+            ConfidenceAssessor.evaluate( "bootstrap", ev1, t1, false, 1 );
+            if ( !isEqual( t1.getNode( "ab" ).getBranchData().getConfidence( 1 ).getValue(), 7 ) ) {
+                return false;
+            }
+            if ( !isEqual( t1.getNode( "abc" ).getBranchData().getConfidence( 0 ).getValue(), 7 ) ) {
+                return false;
+            }
+            final Phylogeny t_b = factory.create( "((((A,C)ac,D)acd,E)acde,B)abcde", new NHXParser() )[ 0 ];
+            final Phylogeny[] ev_b = factory
+                    .create( "((A,C),X);((A,X),C);(A,C);((((A,B),C),D),E);((A,B),((E,D),C));(((A,B),C),(E,D));(A,(((E,D),C),B));(B,(A,((E,D),C)));(C,((E,D),(A,B)));(D,(E,((A,B),C)));((((A,C)ac,D)acd,E)acde,B)abcd",
+                             new NHXParser() );
+            ConfidenceAssessor.evaluate( "bootstrap", ev_b, t_b, false, 1 );
+            // Archaeopteryx.createApplication( t_b ); //TODO use me again me working here...
+            if ( !isEqual( t_b.getNode( "ac" ).getBranchData().getConfidence( 0 ).getValue(), 4 ) ) {
+                return false;
+            }
+            if ( !isEqual( t_b.getNode( "acd" ).getBranchData().getConfidence( 0 ).getValue(), 1 ) ) {
+                return false;
+            }
+            //
+            final Phylogeny t1x = factory.create( "((((A,B)ab,C)abc,D)abcd,E)abcde", new NHXParser() )[ 0 ];
+            final Phylogeny[] ev1x = factory
+                    .create( "((((A,B),C),D),E);((A,B),((E,D),C));(((A,B),C),(E,D));(A,(((E,D),C),B));(B,(A,((E,D),C)));(C,((E,D),(A,B)));(D,(E,((A,B),C)));",
+                             new NHXParser() );
+            ConfidenceAssessor.evaluate( "bootstrap", ev1x, t1x, true, 1 );
+            if ( !isEqual( t1x.getNode( "ab" ).getBranchData().getConfidence( 0 ).getValue(), 7 ) ) {
+                return false;
+            }
+            if ( !isEqual( t1x.getNode( "abc" ).getBranchData().getConfidence( 0 ).getValue(), 7 ) ) {
+                return false;
+            }
+            final Phylogeny t_bx = factory.create( "((((A,C)ac,D)acd,E)acde,B)abcde", new NHXParser() )[ 0 ];
+            final Phylogeny[] ev_bx = factory
+                    .create( "((((A,B),C),D),E);((A,B),((E,D),C));(((A,B),C),(E,D));(A,(((E,D),C),B));(B,(A,((E,D),C)));(C,((E,D),(A,B)));(D,(E,((A,B),C)));((((A,C)ac,D)acd,E)acde,B)abcd",
+                             new NHXParser() );
+            ConfidenceAssessor.evaluate( "bootstrap", ev_bx, t_bx, true, 1 );
+            if ( !isEqual( t_bx.getNode( "ac" ).getBranchData().getConfidence( 0 ).getValue(), 1 ) ) {
+                return false;
+            }
+            if ( !isEqual( t_bx.getNode( "acd" ).getBranchData().getConfidence( 0 ).getValue(), 1 ) ) {
+                return false;
+            }
+            //
+            final Phylogeny[] t2 = factory
+                    .create( "((((a,b),c),d),e);(((a,b),c),(d,e));(((((a,b),c),d),e),f);((((a,b),c),(d,e)),f);(((a,b),c),d,e);((a,b,c),d,e);",
+                             new NHXParser() );
+            final Phylogeny[] ev2 = factory
+                    .create( "((((a,b),c),d),e);((((a,b),c),d),e);((((a,b),e),d),c);((((a,b),e),d),c);(((a,b),(c,d)),e);((a,b),x);((a,b),(x,y));(a,b);(a,e);(a,b,c);",
+                             new NHXParser() );
+            for( final Phylogeny target : t2 ) {
+                ConfidenceAssessor.evaluate( "bootstrap", ev2, target, false, 1 );
+            }
+            //
+            final Phylogeny t4 = factory.create( "((((((A,B)ab,C)abc,D)abcd,E)abcde,F)abcdef,G)abcdefg",
+                                                 new NHXParser() )[ 0 ];
+            final Phylogeny[] ev4 = factory.create( "(((A,B),C),(X,Y));((F,G),((A,B,C),(D,E)))", new NHXParser() );
+            ConfidenceAssessor.evaluate( "bootstrap", ev4, t4, false, 1 );
+            if ( !isEqual( t4.getNode( "ab" ).getBranchData().getConfidence( 0 ).getValue(), 1 ) ) {
+                return false;
+            }
+            if ( !isEqual( t4.getNode( "abc" ).getBranchData().getConfidence( 0 ).getValue(), 2 ) ) {
+                return false;
+            }
+            if ( !isEqual( t4.getNode( "abcde" ).getBranchData().getConfidence( 0 ).getValue(), 1 ) ) {
+                return false;
+            }
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace();
+            return false;
+        }
+        return true;
+    }
+
+    private static boolean testCopyOfNodeData() {
+        try {
+            final PhylogenyNode n1 = new PhylogenyNode( "n5:0.1[&&NHX:S=Ecoli:E=1.1.1.1:D=Y:Co=Y:B=56:T=1:O=22:SO=33:SN=44:W=2:C=10.20.30:XN=S=tag1=value1=unit1]" );
+            final PhylogenyNode n2 = n1.copyNodeData();
+            if ( !n1.toNewHampshireX().equals( n2.toNewHampshireX() ) ) {
+                return false;
+            }
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace();
+            return false;
+        }
+        return true;
+    }
+
+    private static boolean testDataObjects() {
+        try {
+            final Confidence s0 = new Confidence();
+            final Confidence s1 = new Confidence();
+            if ( !s0.isEqual( s1 ) ) {
+                return false;
+            }
+            final Confidence s2 = new Confidence( 0.23, "bootstrap" );
+            final Confidence s3 = new Confidence( 0.23, "bootstrap" );
+            if ( s2.isEqual( s1 ) ) {
+                return false;
+            }
+            if ( !s2.isEqual( s3 ) ) {
+                return false;
+            }
+            final Confidence s4 = ( Confidence ) s3.copy();
+            if ( !s4.isEqual( s3 ) ) {
+                return false;
+            }
+            s3.asSimpleText();
+            s3.asText();
+            // Taxonomy
+            // ----------
+            final Taxonomy t1 = new Taxonomy();
+            final Taxonomy t2 = new Taxonomy();
+            final Taxonomy t3 = new Taxonomy();
+            final Taxonomy t4 = new Taxonomy();
+            final Taxonomy t5 = new Taxonomy();
+            t1.setIdentifier( new Identifier( "ecoli" ) );
+            t1.setTaxonomyCode( "ECOLI" );
+            t1.setScientificName( "E. coli" );
+            t1.setCommonName( "coli" );
+            final Taxonomy t0 = ( Taxonomy ) t1.copy();
+            if ( !t1.isEqual( t0 ) ) {
+                return false;
+            }
+            t2.setIdentifier( new Identifier( "ecoli" ) );
+            t2.setTaxonomyCode( "other" );
+            t2.setScientificName( "what" );
+            t2.setCommonName( "something" );
+            if ( !t1.isEqual( t2 ) ) {
+                return false;
+            }
+            t2.setIdentifier( new Identifier( "nemve" ) );
+            if ( t1.isEqual( t2 ) ) {
+                return false;
+            }
+            t1.setIdentifier( null );
+            t3.setTaxonomyCode( "ECOLI" );
+            t3.setScientificName( "what" );
+            t3.setCommonName( "something" );
+            if ( !t1.isEqual( t3 ) ) {
+                return false;
+            }
+            t1.setIdentifier( null );
+            t1.setTaxonomyCode( "" );
+            t4.setScientificName( "E. ColI" );
+            t4.setCommonName( "something" );
+            if ( !t1.isEqual( t4 ) ) {
+                return false;
+            }
+            t4.setScientificName( "B. subtilis" );
+            t4.setCommonName( "something" );
+            if ( t1.isEqual( t4 ) ) {
+                return false;
+            }
+            t1.setIdentifier( null );
+            t1.setTaxonomyCode( "" );
+            t1.setScientificName( "" );
+            t5.setCommonName( "COLI" );
+            if ( !t1.isEqual( t5 ) ) {
+                return false;
+            }
+            t5.setCommonName( "vibrio" );
+            if ( t1.isEqual( t5 ) ) {
+                return false;
+            }
+            // Identifier
+            // ----------
+            final Identifier id0 = new Identifier( "123", "pfam" );
+            final Identifier id1 = ( Identifier ) id0.copy();
+            if ( !id1.isEqual( id1 ) ) {
+                return false;
+            }
+            if ( !id1.isEqual( id0 ) ) {
+                return false;
+            }
+            if ( !id0.isEqual( id1 ) ) {
+                return false;
+            }
+            id1.asSimpleText();
+            id1.asText();
+            // ProteinDomain
+            // ---------------
+            final ProteinDomain pd0 = new ProteinDomain( "abc", 100, 200 );
+            final ProteinDomain pd1 = ( ProteinDomain ) pd0.copy();
+            if ( !pd1.isEqual( pd1 ) ) {
+                return false;
+            }
+            if ( !pd1.isEqual( pd0 ) ) {
+                return false;
+            }
+            pd1.asSimpleText();
+            pd1.asText();
+            final ProteinDomain pd2 = new ProteinDomain( pd0.getName(), pd0.getFrom(), pd0.getTo(), "id" );
+            final ProteinDomain pd3 = ( ProteinDomain ) pd2.copy();
+            if ( !pd3.isEqual( pd3 ) ) {
+                return false;
+            }
+            if ( !pd2.isEqual( pd3 ) ) {
+                return false;
+            }
+            if ( !pd0.isEqual( pd3 ) ) {
+                return false;
+            }
+            pd3.asSimpleText();
+            pd3.asText();
+            // DomainArchitecture
+            // ------------------
+            final ProteinDomain d0 = new ProteinDomain( "domain0", 10, 20 );
+            final ProteinDomain d1 = new ProteinDomain( "domain1", 30, 40 );
+            final ProteinDomain d2 = new ProteinDomain( "domain2", 50, 60 );
+            final ProteinDomain d3 = new ProteinDomain( "domain3", 70, 80 );
+            final ProteinDomain d4 = new ProteinDomain( "domain4", 90, 100 );
+            final ArrayList<PhylogenyData> domains0 = new ArrayList<PhylogenyData>();
+            domains0.add( d2 );
+            domains0.add( d0 );
+            domains0.add( d3 );
+            domains0.add( d1 );
+            final DomainArchitecture ds0 = new DomainArchitecture( domains0, 110 );
+            if ( ds0.getNumberOfDomains() != 4 ) {
+                return false;
+            }
+            final DomainArchitecture ds1 = ( DomainArchitecture ) ds0.copy();
+            if ( !ds0.isEqual( ds0 ) ) {
+                return false;
+            }
+            if ( !ds0.isEqual( ds1 ) ) {
+                return false;
+            }
+            if ( ds1.getNumberOfDomains() != 4 ) {
+                return false;
+            }
+            final ArrayList<PhylogenyData> domains1 = new ArrayList<PhylogenyData>();
+            domains1.add( d1 );
+            domains1.add( d2 );
+            domains1.add( d4 );
+            domains1.add( d0 );
+            final DomainArchitecture ds2 = new DomainArchitecture( domains1, 200 );
+            if ( ds0.isEqual( ds2 ) ) {
+                return false;
+            }
+            ds1.asSimpleText();
+            ds1.asText();
+            ds1.toNHX();
+            final DomainArchitecture ds3 = new DomainArchitecture( "120>30>40>0.9>b>50>60>0.4>c>10>20>0.1>a" );
+            if ( !ds3.toNHX().toString().equals( ":DS=120>10>20>0.1>a>30>40>0.9>b>50>60>0.4>c" ) ) {
+                System.out.println( ds3.toNHX() );
+                return false;
+            }
+            if ( ds3.getNumberOfDomains() != 3 ) {
+                return false;
+            }
+            // Event
+            // -----
+            final Event e1 = new Event( Event.EventType.fusion );
+            if ( e1.isDuplication() ) {
+                return false;
+            }
+            if ( !e1.isFusion() ) {
+                return false;
+            }
+            if ( !e1.asText().toString().equals( "fusion" ) ) {
+                return false;
+            }
+            if ( !e1.asSimpleText().toString().equals( "fusion" ) ) {
+                return false;
+            }
+            final Event e11 = new Event( Event.EventType.fusion );
+            if ( !e11.isEqual( e1 ) ) {
+                return false;
+            }
+            if ( !e11.toNHX().toString().equals( "" ) ) {
+                return false;
+            }
+            final Event e2 = new Event( Event.EventType.speciation_or_duplication );
+            if ( e2.isDuplication() ) {
+                return false;
+            }
+            if ( !e2.isSpeciationOrDuplication() ) {
+                return false;
+            }
+            if ( !e2.asText().toString().equals( "speciation_or_duplication" ) ) {
+                return false;
+            }
+            if ( !e2.asSimpleText().toString().equals( "?" ) ) {
+                return false;
+            }
+            if ( !e2.toNHX().toString().equals( ":D=?" ) ) {
+                return false;
+            }
+            if ( e11.isEqual( e2 ) ) {
+                return false;
+            }
+            final Event e2c = ( Event ) e2.copy();
+            if ( !e2c.isEqual( e2 ) ) {
+                return false;
+            }
+            Event e3 = new Event( 1, 2, 3 );
+            if ( e3.isDuplication() ) {
+                return false;
+            }
+            if ( e3.isSpeciation() ) {
+                return false;
+            }
+            if ( e3.isGeneLoss() ) {
+                return false;
+            }
+            if ( !e3.asText().toString().equals( "duplications [1] speciations [2] gene-losses [3]" ) ) {
+                return false;
+            }
+            final Event e3c = ( Event ) e3.copy();
+            final Event e3cc = ( Event ) e3c.copy();
+            if ( !e3c.asSimpleText().toString().equals( "D2S3L" ) ) {
+                return false;
+            }
+            e3 = null;
+            if ( !e3c.isEqual( e3cc ) ) {
+                return false;
+            }
+            Event e4 = new Event( 1, 2, 3 );
+            if ( !e4.asText().toString().equals( "duplications [1] speciations [2] gene-losses [3]" ) ) {
+                return false;
+            }
+            if ( !e4.asSimpleText().toString().equals( "D2S3L" ) ) {
+                return false;
+            }
+            final Event e4c = ( Event ) e4.copy();
+            e4 = null;
+            final Event e4cc = ( Event ) e4c.copy();
+            if ( !e4cc.asText().toString().equals( "duplications [1] speciations [2] gene-losses [3]" ) ) {
+                return false;
+            }
+            if ( !e4c.isEqual( e4cc ) ) {
+                return false;
+            }
+            final Event e5 = new Event();
+            if ( !e5.isUnassigned() ) {
+                return false;
+            }
+            if ( !e5.asText().toString().equals( "unassigned" ) ) {
+                return false;
+            }
+            if ( !e5.asSimpleText().toString().equals( "" ) ) {
+                return false;
+            }
+            final Event e6 = new Event( 1, 0, 0 );
+            if ( !e6.asText().toString().equals( "duplication" ) ) {
+                return false;
+            }
+            if ( !e6.asSimpleText().toString().equals( "D" ) ) {
+                return false;
+            }
+            final Event e7 = new Event( 0, 1, 0 );
+            if ( !e7.asText().toString().equals( "speciation" ) ) {
+                return false;
+            }
+            if ( !e7.asSimpleText().toString().equals( "S" ) ) {
+                return false;
+            }
+            final Event e8 = new Event( 0, 0, 1 );
+            if ( !e8.asText().toString().equals( "gene-loss" ) ) {
+                return false;
+            }
+            if ( !e8.asSimpleText().toString().equals( "L" ) ) {
+                return false;
+            }
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace( System.out );
+            return false;
+        }
+        return true;
+    }
+
+    private static boolean testDeletionOfExternalNodes() {
+        try {
+            final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
+            final Phylogeny t0 = factory.create( "A", new NHXParser() )[ 0 ];
+            final PhylogenyWriter w = new PhylogenyWriter();
+            if ( t0.isEmpty() ) {
+                return false;
+            }
+            if ( t0.getNumberOfExternalNodes() != 1 ) {
+                return false;
+            }
+            t0.deleteSubtree( t0.getNode( "A" ), false );
+            if ( t0.getNumberOfExternalNodes() != 0 ) {
+                return false;
+            }
+            if ( !t0.isEmpty() ) {
+                return false;
+            }
+            final Phylogeny t1 = factory.create( "(A,B)r", new NHXParser() )[ 0 ];
+            if ( t1.getNumberOfExternalNodes() != 2 ) {
+                return false;
+            }
+            t1.deleteSubtree( t1.getNode( "A" ), false );
+            if ( t1.getNumberOfExternalNodes() != 1 ) {
+                return false;
+            }
+            if ( !t1.getNode( "B" ).getName().equals( "B" ) ) {
+                return false;
+            }
+            t1.deleteSubtree( t1.getNode( "B" ), false );
+            if ( t1.getNumberOfExternalNodes() != 1 ) {
+                return false;
+            }
+            t1.deleteSubtree( t1.getNode( "r" ), false );
+            if ( !t1.isEmpty() ) {
+                return false;
+            }
+            final Phylogeny t2 = factory.create( "((A,B),C)", new NHXParser() )[ 0 ];
+            if ( t2.getNumberOfExternalNodes() != 3 ) {
+                return false;
+            }
+            t2.deleteSubtree( t2.getNode( "B" ), false );
+            if ( t2.getNumberOfExternalNodes() != 2 ) {
+                return false;
+            }
+            t2.toNewHampshireX();
+            PhylogenyNode n = t2.getNode( "A" );
+            if ( !n.getNextExternalNode().getName().equals( "C" ) ) {
+                return false;
+            }
+            t2.deleteSubtree( t2.getNode( "A" ), false );
+            if ( t2.getNumberOfExternalNodes() != 2 ) {
+                return false;
+            }
+            t2.deleteSubtree( t2.getNode( "C" ), true );
+            if ( t2.getNumberOfExternalNodes() != 1 ) {
+                return false;
+            }
+            final Phylogeny t3 = factory.create( "((A,B),(C,D))", new NHXParser() )[ 0 ];
+            if ( t3.getNumberOfExternalNodes() != 4 ) {
+                return false;
+            }
+            t3.deleteSubtree( t3.getNode( "B" ), true );
+            if ( t3.getNumberOfExternalNodes() != 3 ) {
+                return false;
+            }
+            n = t3.getNode( "A" );
+            if ( !n.getNextExternalNode().getName().equals( "C" ) ) {
+                return false;
+            }
+            n = n.getNextExternalNode();
+            if ( !n.getNextExternalNode().getName().equals( "D" ) ) {
+                return false;
+            }
+            t3.deleteSubtree( t3.getNode( "A" ), true );
+            if ( t3.getNumberOfExternalNodes() != 2 ) {
+                return false;
+            }
+            n = t3.getNode( "C" );
+            if ( !n.getNextExternalNode().getName().equals( "D" ) ) {
+                return false;
+            }
+            t3.deleteSubtree( t3.getNode( "C" ), true );
+            if ( t3.getNumberOfExternalNodes() != 1 ) {
+                return false;
+            }
+            t3.deleteSubtree( t3.getNode( "D" ), true );
+            if ( t3.getNumberOfExternalNodes() != 0 ) {
+                return false;
+            }
+            final Phylogeny t4 = factory.create( "((A,((B11,B12),B2)),(C,D))", new NHXParser() )[ 0 ];
+            if ( t4.getNumberOfExternalNodes() != 6 ) {
+                return false;
+            }
+            t4.deleteSubtree( t4.getNode( "B2" ), true );
+            if ( t4.getNumberOfExternalNodes() != 5 ) {
+                return false;
+            }
+            String s = w.toNewHampshire( t4, false, true ).toString();
+            if ( !s.equals( "((A,(B11,B12)),(C,D));" ) ) {
+                return false;
+            }
+            t4.deleteSubtree( t4.getNode( "B11" ), true );
+            if ( t4.getNumberOfExternalNodes() != 4 ) {
+                return false;
+            }
+            t4.deleteSubtree( t4.getNode( "C" ), true );
+            if ( t4.getNumberOfExternalNodes() != 3 ) {
+                return false;
+            }
+            n = t4.getNode( "A" );
+            n = n.getNextExternalNode();
+            if ( !n.getName().equals( "B12" ) ) {
+                return false;
+            }
+            n = n.getNextExternalNode();
+            if ( !n.getName().equals( "D" ) ) {
+                return false;
+            }
+            s = w.toNewHampshire( t4, false, true ).toString();
+            if ( !s.equals( "((A,B12),D);" ) ) {
+                return false;
+            }
+            final Phylogeny t5 = factory.create( "((A,((B11,B12),B2)),(C,D))", new NHXParser() )[ 0 ];
+            t5.deleteSubtree( t5.getNode( "A" ), true );
+            if ( t5.getNumberOfExternalNodes() != 5 ) {
+                return false;
+            }
+            s = w.toNewHampshire( t5, false, true ).toString();
+            if ( !s.equals( "(((B11,B12),B2),(C,D));" ) ) {
+                return false;
+            }
+            final Phylogeny t6 = factory.create( "((A,((B11,B12),B2)),(C,D))", new NHXParser() )[ 0 ];
+            t6.deleteSubtree( t6.getNode( "B11" ), true );
+            if ( t6.getNumberOfExternalNodes() != 5 ) {
+                return false;
+            }
+            s = w.toNewHampshire( t6, false, false ).toString();
+            if ( !s.equals( "((A,(B12,B2)),(C,D));" ) ) {
+                return false;
+            }
+            final Phylogeny t7 = factory.create( "((A,((B11,B12),B2)),(C,D))", new NHXParser() )[ 0 ];
+            t7.deleteSubtree( t7.getNode( "B12" ), true );
+            if ( t7.getNumberOfExternalNodes() != 5 ) {
+                return false;
+            }
+            s = w.toNewHampshire( t7, false, true ).toString();
+            if ( !s.equals( "((A,(B11,B2)),(C,D));" ) ) {
+                return false;
+            }
+            final Phylogeny t8 = factory.create( "((A,((B11,B12),B2)),(C,D))", new NHXParser() )[ 0 ];
+            t8.deleteSubtree( t8.getNode( "B2" ), true );
+            if ( t8.getNumberOfExternalNodes() != 5 ) {
+                return false;
+            }
+            s = w.toNewHampshire( t8, false, false ).toString();
+            if ( !s.equals( "((A,(B11,B12)),(C,D));" ) ) {
+                return false;
+            }
+            final Phylogeny t9 = factory.create( "((A,((B11,B12),B2)),(C,D))", new NHXParser() )[ 0 ];
+            t9.deleteSubtree( t9.getNode( "C" ), true );
+            if ( t9.getNumberOfExternalNodes() != 5 ) {
+                return false;
+            }
+            s = w.toNewHampshire( t9, false, true ).toString();
+            if ( !s.equals( "((A,((B11,B12),B2)),D);" ) ) {
+                return false;
+            }
+            final Phylogeny t10 = factory.create( "((A,((B11,B12),B2)),(C,D))", new NHXParser() )[ 0 ];
+            t10.deleteSubtree( t10.getNode( "D" ), true );
+            if ( t10.getNumberOfExternalNodes() != 5 ) {
+                return false;
+            }
+            s = w.toNewHampshire( t10, false, true ).toString();
+            if ( !s.equals( "((A,((B11,B12),B2)),C);" ) ) {
+                return false;
+            }
+            final Phylogeny t11 = factory.create( "(A,B,C)", new NHXParser() )[ 0 ];
+            t11.deleteSubtree( t11.getNode( "A" ), true );
+            if ( t11.getNumberOfExternalNodes() != 2 ) {
+                return false;
+            }
+            s = w.toNewHampshire( t11, false, true ).toString();
+            if ( !s.equals( "(B,C);" ) ) {
+                return false;
+            }
+            t11.deleteSubtree( t11.getNode( "C" ), true );
+            if ( t11.getNumberOfExternalNodes() != 1 ) {
+                return false;
+            }
+            s = w.toNewHampshire( t11, false, false ).toString();
+            if ( !s.equals( "B;" ) ) {
+                return false;
+            }
+            final Phylogeny t12 = factory.create( "((A1,A2,A3),(B1,B2,B3),(C1,C2,C3))", new NHXParser() )[ 0 ];
+            t12.deleteSubtree( t12.getNode( "B2" ), true );
+            if ( t12.getNumberOfExternalNodes() != 8 ) {
+                return false;
+            }
+            s = w.toNewHampshire( t12, false, true ).toString();
+            if ( !s.equals( "((A1,A2,A3),(B1,B3),(C1,C2,C3));" ) ) {
+                return false;
+            }
+            t12.deleteSubtree( t12.getNode( "B3" ), true );
+            if ( t12.getNumberOfExternalNodes() != 7 ) {
+                return false;
+            }
+            s = w.toNewHampshire( t12, false, true ).toString();
+            if ( !s.equals( "((A1,A2,A3),B1,(C1,C2,C3));" ) ) {
+                return false;
+            }
+            t12.deleteSubtree( t12.getNode( "C3" ), true );
+            if ( t12.getNumberOfExternalNodes() != 6 ) {
+                return false;
+            }
+            s = w.toNewHampshire( t12, false, true ).toString();
+            if ( !s.equals( "((A1,A2,A3),B1,(C1,C2));" ) ) {
+                return false;
+            }
+            t12.deleteSubtree( t12.getNode( "A1" ), true );
+            if ( t12.getNumberOfExternalNodes() != 5 ) {
+                return false;
+            }
+            s = w.toNewHampshire( t12, false, true ).toString();
+            if ( !s.equals( "((A2,A3),B1,(C1,C2));" ) ) {
+                return false;
+            }
+            t12.deleteSubtree( t12.getNode( "B1" ), true );
+            if ( t12.getNumberOfExternalNodes() != 4 ) {
+                return false;
+            }
+            s = w.toNewHampshire( t12, false, true ).toString();
+            if ( !s.equals( "((A2,A3),(C1,C2));" ) ) {
+                return false;
+            }
+            t12.deleteSubtree( t12.getNode( "A3" ), true );
+            if ( t12.getNumberOfExternalNodes() != 3 ) {
+                return false;
+            }
+            s = w.toNewHampshire( t12, false, true ).toString();
+            if ( !s.equals( "(A2,(C1,C2));" ) ) {
+                return false;
+            }
+            t12.deleteSubtree( t12.getNode( "A2" ), true );
+            if ( t12.getNumberOfExternalNodes() != 2 ) {
+                return false;
+            }
+            s = w.toNewHampshire( t12, false, true ).toString();
+            if ( !s.equals( "(C1,C2);" ) ) {
+                return false;
+            }
+            final Phylogeny t13 = factory.create( "(A,B,C,(D:1.0,E:2.0):3.0)", new NHXParser() )[ 0 ];
+            t13.deleteSubtree( t13.getNode( "D" ), true );
+            if ( t13.getNumberOfExternalNodes() != 4 ) {
+                return false;
+            }
+            s = w.toNewHampshire( t13, false, true ).toString();
+            if ( !s.equals( "(A,B,C,E:5.0);" ) ) {
+                return false;
+            }
+            final Phylogeny t14 = factory.create( "((A,B,C,(D:0.1,E:0.4):1.0),F)", new NHXParser() )[ 0 ];
+            t14.deleteSubtree( t14.getNode( "E" ), true );
+            if ( t14.getNumberOfExternalNodes() != 5 ) {
+                return false;
+            }
+            s = w.toNewHampshire( t14, false, true ).toString();
+            if ( !s.equals( "((A,B,C,D:1.1),F);" ) ) {
+                return false;
+            }
+            final Phylogeny t15 = factory.create( "((A1,A2,A3,A4),(B1,B2,B3,B4),(C1,C2,C3,C4))", new NHXParser() )[ 0 ];
+            t15.deleteSubtree( t15.getNode( "B2" ), true );
+            if ( t15.getNumberOfExternalNodes() != 11 ) {
+                return false;
+            }
+            t15.deleteSubtree( t15.getNode( "B1" ), true );
+            if ( t15.getNumberOfExternalNodes() != 10 ) {
+                return false;
+            }
+            t15.deleteSubtree( t15.getNode( "B3" ), true );
+            if ( t15.getNumberOfExternalNodes() != 9 ) {
+                return false;
+            }
+            t15.deleteSubtree( t15.getNode( "B4" ), true );
+            if ( t15.getNumberOfExternalNodes() != 8 ) {
+                return false;
+            }
+            t15.deleteSubtree( t15.getNode( "A1" ), true );
+            if ( t15.getNumberOfExternalNodes() != 7 ) {
+                return false;
+            }
+            t15.deleteSubtree( t15.getNode( "C4" ), true );
+            if ( t15.getNumberOfExternalNodes() != 6 ) {
+                return false;
+            }
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace( System.out );
+            return false;
+        }
+        return true;
+    }
+
+    private static boolean testDescriptiveStatistics() {
+        try {
+            final DescriptiveStatistics dss1 = new BasicDescriptiveStatistics();
+            dss1.addValue( 82 );
+            dss1.addValue( 78 );
+            dss1.addValue( 70 );
+            dss1.addValue( 58 );
+            dss1.addValue( 42 );
+            if ( dss1.getN() != 5 ) {
+                return false;
+            }
+            if ( !Test.isEqual( dss1.getMin(), 42 ) ) {
+                return false;
+            }
+            if ( !Test.isEqual( dss1.getMax(), 82 ) ) {
+                return false;
+            }
+            if ( !Test.isEqual( dss1.arithmeticMean(), 66 ) ) {
+                return false;
+            }
+            if ( !Test.isEqual( dss1.sampleStandardDeviation(), 16.24807680927192 ) ) {
+                return false;
+            }
+            if ( !Test.isEqual( dss1.median(), 70 ) ) {
+                return false;
+            }
+            if ( !Test.isEqual( dss1.midrange(), 62 ) ) {
+                return false;
+            }
+            if ( !Test.isEqual( dss1.sampleVariance(), 264 ) ) {
+                return false;
+            }
+            if ( !Test.isEqual( dss1.pearsonianSkewness(), -0.7385489458759964 ) ) {
+                return false;
+            }
+            if ( !Test.isEqual( dss1.coefficientOfVariation(), 0.24618298195866547 ) ) {
+                return false;
+            }
+            if ( !Test.isEqual( dss1.sampleStandardUnit( 66 - 16.24807680927192 ), -1.0 ) ) {
+                return false;
+            }
+            if ( !Test.isEqual( dss1.getValue( 1 ), 78 ) ) {
+                return false;
+            }
+            dss1.addValue( 123 );
+            if ( !Test.isEqual( dss1.arithmeticMean(), 75.5 ) ) {
+                return false;
+            }
+            if ( !Test.isEqual( dss1.getMax(), 123 ) ) {
+                return false;
+            }
+            if ( !Test.isEqual( dss1.standardErrorOfMean(), 11.200446419674531 ) ) {
+                return false;
+            }
+            final DescriptiveStatistics dss2 = new BasicDescriptiveStatistics();
+            dss2.addValue( -1.85 );
+            dss2.addValue( 57.5 );
+            dss2.addValue( 92.78 );
+            dss2.addValue( 57.78 );
+            if ( !Test.isEqual( dss2.median(), 57.64 ) ) {
+                return false;
+            }
+            if ( !Test.isEqual( dss2.sampleStandardDeviation(), 39.266984753946495 ) ) {
+                return false;
+            }
+            final double[] a = dss2.getDataAsDoubleArray();
+            if ( !Test.isEqual( a[ 3 ], 57.78 ) ) {
+                return false;
+            }
+            dss2.addValue( -100 );
+            if ( !Test.isEqual( dss2.sampleStandardDeviation(), 75.829111296388 ) ) {
+                return false;
+            }
+            if ( !Test.isEqual( dss2.sampleVariance(), 5750.05412 ) ) {
+                return false;
+            }
+            final double[] ds = new double[ 14 ];
+            ds[ 0 ] = 34;
+            ds[ 1 ] = 23;
+            ds[ 2 ] = 1;
+            ds[ 3 ] = 32;
+            ds[ 4 ] = 11;
+            ds[ 5 ] = 2;
+            ds[ 6 ] = 12;
+            ds[ 7 ] = 33;
+            ds[ 8 ] = 13;
+            ds[ 9 ] = 22;
+            ds[ 10 ] = 21;
+            ds[ 11 ] = 35;
+            ds[ 12 ] = 24;
+            ds[ 13 ] = 31;
+            final int[] bins = BasicDescriptiveStatistics.performBinning( ds, 0, 40, 4 );
+            if ( bins.length != 4 ) {
+                return false;
+            }
+            if ( bins[ 0 ] != 2 ) {
+                return false;
+            }
+            if ( bins[ 1 ] != 3 ) {
+                return false;
+            }
+            if ( bins[ 2 ] != 4 ) {
+                return false;
+            }
+            if ( bins[ 3 ] != 5 ) {
+                return false;
+            }
+            final double[] ds1 = new double[ 9 ];
+            ds1[ 0 ] = 10.0;
+            ds1[ 1 ] = 19.0;
+            ds1[ 2 ] = 9.999;
+            ds1[ 3 ] = 0.0;
+            ds1[ 4 ] = 39.9;
+            ds1[ 5 ] = 39.999;
+            ds1[ 6 ] = 30.0;
+            ds1[ 7 ] = 19.999;
+            ds1[ 8 ] = 30.1;
+            final int[] bins1 = BasicDescriptiveStatistics.performBinning( ds1, 0, 40, 4 );
+            if ( bins1.length != 4 ) {
+                return false;
+            }
+            if ( bins1[ 0 ] != 2 ) {
+                return false;
+            }
+            if ( bins1[ 1 ] != 3 ) {
+                return false;
+            }
+            if ( bins1[ 2 ] != 0 ) {
+                return false;
+            }
+            if ( bins1[ 3 ] != 4 ) {
+                return false;
+            }
+            final int[] bins1_1 = BasicDescriptiveStatistics.performBinning( ds1, 0, 40, 3 );
+            if ( bins1_1.length != 3 ) {
+                return false;
+            }
+            if ( bins1_1[ 0 ] != 3 ) {
+                return false;
+            }
+            if ( bins1_1[ 1 ] != 2 ) {
+                return false;
+            }
+            if ( bins1_1[ 2 ] != 4 ) {
+                return false;
+            }
+            final int[] bins1_2 = BasicDescriptiveStatistics.performBinning( ds1, 1, 39, 3 );
+            if ( bins1_2.length != 3 ) {
+                return false;
+            }
+            if ( bins1_2[ 0 ] != 2 ) {
+                return false;
+            }
+            if ( bins1_2[ 1 ] != 2 ) {
+                return false;
+            }
+            if ( bins1_2[ 2 ] != 2 ) {
+                return false;
+            }
+            final DescriptiveStatistics dss3 = new BasicDescriptiveStatistics();
+            dss3.addValue( 1 );
+            dss3.addValue( 1 );
+            dss3.addValue( 1 );
+            dss3.addValue( 2 );
+            dss3.addValue( 3 );
+            dss3.addValue( 4 );
+            dss3.addValue( 5 );
+            dss3.addValue( 5 );
+            dss3.addValue( 5 );
+            dss3.addValue( 6 );
+            dss3.addValue( 7 );
+            dss3.addValue( 8 );
+            dss3.addValue( 9 );
+            dss3.addValue( 10 );
+            dss3.addValue( 10 );
+            dss3.addValue( 10 );
+            final AsciiHistogram histo = new AsciiHistogram( dss3 );
+            histo.toStringBuffer( 10, '=', 40, 5 );
+            histo.toStringBuffer( 3, 8, 10, '=', 40, 5 );
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace( System.out );
+            return false;
+        }
+        return true;
+    }
+
+    private static boolean testDir( final String file ) {
+        try {
+            final File f = new File( file );
+            if ( !f.exists() ) {
+                return false;
+            }
+            if ( !f.isDirectory() ) {
+                return false;
+            }
+            if ( !f.canRead() ) {
+                return false;
+            }
+        }
+        catch ( final Exception e ) {
+            return false;
+        }
+        return true;
+    }
+
+    private static boolean testExternalNodeRelatedMethods() {
+        try {
+            final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
+            final Phylogeny t1 = factory.create( "((A,B),(C,D))", new NHXParser() )[ 0 ];
+            PhylogenyNode n = t1.getNode( "A" );
+            n = n.getNextExternalNode();
+            if ( !n.getName().equals( "B" ) ) {
+                return false;
+            }
+            n = n.getNextExternalNode();
+            if ( !n.getName().equals( "C" ) ) {
+                return false;
+            }
+            n = n.getNextExternalNode();
+            if ( !n.getName().equals( "D" ) ) {
+                return false;
+            }
+            n = t1.getNode( "B" );
+            while ( !n.isLastExternalNode() ) {
+                n = n.getNextExternalNode();
+            }
+            final Phylogeny t2 = factory.create( "(((A,B),C),D)", new NHXParser() )[ 0 ];
+            n = t2.getNode( "A" );
+            n = n.getNextExternalNode();
+            if ( !n.getName().equals( "B" ) ) {
+                return false;
+            }
+            n = n.getNextExternalNode();
+            if ( !n.getName().equals( "C" ) ) {
+                return false;
+            }
+            n = n.getNextExternalNode();
+            if ( !n.getName().equals( "D" ) ) {
+                return false;
+            }
+            n = t2.getNode( "B" );
+            while ( !n.isLastExternalNode() ) {
+                n = n.getNextExternalNode();
+            }
+            final Phylogeny t3 = factory.create( "(((A,B),(C,D)),((E,F),(G,H)))", new NHXParser() )[ 0 ];
+            n = t3.getNode( "A" );
+            n = n.getNextExternalNode();
+            if ( !n.getName().equals( "B" ) ) {
+                return false;
+            }
+            n = n.getNextExternalNode();
+            if ( !n.getName().equals( "C" ) ) {
+                return false;
+            }
+            n = n.getNextExternalNode();
+            if ( !n.getName().equals( "D" ) ) {
+                return false;
+            }
+            n = n.getNextExternalNode();
+            if ( !n.getName().equals( "E" ) ) {
+                return false;
+            }
+            n = n.getNextExternalNode();
+            if ( !n.getName().equals( "F" ) ) {
+                return false;
+            }
+            n = n.getNextExternalNode();
+            if ( !n.getName().equals( "G" ) ) {
+                return false;
+            }
+            n = n.getNextExternalNode();
+            if ( !n.getName().equals( "H" ) ) {
+                return false;
+            }
+            n = t3.getNode( "B" );
+            while ( !n.isLastExternalNode() ) {
+                n = n.getNextExternalNode();
+            }
+            final Phylogeny t4 = factory.create( "((A,B),(C,D))", new NHXParser() )[ 0 ];
+            for( final PhylogenyNodeIterator iter = t4.iteratorExternalForward(); iter.hasNext(); ) {
+                final PhylogenyNode node = iter.next();
+            }
+            final Phylogeny t5 = factory.create( "(((A,B),(C,D)),((E,F),(G,H)))", new NHXParser() )[ 0 ];
+            for( final PhylogenyNodeIterator iter = t5.iteratorExternalForward(); iter.hasNext(); ) {
+                final PhylogenyNode node = iter.next();
+            }
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace( System.out );
+            return false;
+        }
+        return true;
+    }
+
+    private static boolean testGeneralTable() {
+        try {
+            final GeneralTable<Integer, String> t0 = new GeneralTable<Integer, String>();
+            t0.setValue( 3, 2, "23" );
+            t0.setValue( 10, 1, "error" );
+            t0.setValue( 10, 1, "110" );
+            t0.setValue( 9, 1, "19" );
+            t0.setValue( 1, 10, "101" );
+            t0.setValue( 10, 10, "1010" );
+            t0.setValue( 100, 10, "10100" );
+            t0.setValue( 0, 0, "00" );
+            if ( !t0.getValue( 3, 2 ).equals( "23" ) ) {
+                return false;
+            }
+            if ( !t0.getValue( 10, 1 ).equals( "110" ) ) {
+                return false;
+            }
+            if ( !t0.getValueAsString( 1, 10 ).equals( "101" ) ) {
+                return false;
+            }
+            if ( !t0.getValueAsString( 10, 10 ).equals( "1010" ) ) {
+                return false;
+            }
+            if ( !t0.getValueAsString( 100, 10 ).equals( "10100" ) ) {
+                return false;
+            }
+            if ( !t0.getValueAsString( 9, 1 ).equals( "19" ) ) {
+                return false;
+            }
+            if ( !t0.getValueAsString( 0, 0 ).equals( "00" ) ) {
+                return false;
+            }
+            if ( !t0.getValueAsString( 49, 4 ).equals( "" ) ) {
+                return false;
+            }
+            if ( !t0.getValueAsString( 22349, 3434344 ).equals( "" ) ) {
+                return false;
+            }
+            final GeneralTable<String, String> t1 = new GeneralTable<String, String>();
+            t1.setValue( "3", "2", "23" );
+            t1.setValue( "10", "1", "error" );
+            t1.setValue( "10", "1", "110" );
+            t1.setValue( "9", "1", "19" );
+            t1.setValue( "1", "10", "101" );
+            t1.setValue( "10", "10", "1010" );
+            t1.setValue( "100", "10", "10100" );
+            t1.setValue( "0", "0", "00" );
+            t1.setValue( "qwerty", "zxcvbnm", "asdef" );
+            if ( !t1.getValue( "3", "2" ).equals( "23" ) ) {
+                return false;
+            }
+            if ( !t1.getValue( "10", "1" ).equals( "110" ) ) {
+                return false;
+            }
+            if ( !t1.getValueAsString( "1", "10" ).equals( "101" ) ) {
+                return false;
+            }
+            if ( !t1.getValueAsString( "10", "10" ).equals( "1010" ) ) {
+                return false;
+            }
+            if ( !t1.getValueAsString( "100", "10" ).equals( "10100" ) ) {
+                return false;
+            }
+            if ( !t1.getValueAsString( "9", "1" ).equals( "19" ) ) {
+                return false;
+            }
+            if ( !t1.getValueAsString( "0", "0" ).equals( "00" ) ) {
+                return false;
+            }
+            if ( !t1.getValueAsString( "qwerty", "zxcvbnm" ).equals( "asdef" ) ) {
+                return false;
+            }
+            if ( !t1.getValueAsString( "49", "4" ).equals( "" ) ) {
+                return false;
+            }
+            if ( !t1.getValueAsString( "22349", "3434344" ).equals( "" ) ) {
+                return false;
+            }
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace( System.out );
+            return false;
+        }
+        return true;
+    }
+
+    private static boolean testGetDistance() {
+        try {
+            final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
+            final Phylogeny p1 = factory.create( "(((A:1,B:2,X:100)ab:3,C:4)abc:5,(D:7,(E:9,F:10)ef:8)def:6)r",
+                                                 new NHXParser() )[ 0 ];
+            final PhylogenyMethods pm = PhylogenyMethods.getInstance();
+            if ( pm.calculateDistance( p1.getNode( "C" ), p1.getNode( "C" ) ) != 0 ) {
+                return false;
+            }
+            if ( pm.calculateDistance( p1.getNode( "def" ), p1.getNode( "def" ) ) != 0 ) {
+                return false;
+            }
+            if ( pm.calculateDistance( p1.getNode( "ef" ), p1.getNode( "ef" ) ) != 0 ) {
+                return false;
+            }
+            if ( pm.calculateDistance( p1.getNode( "r" ), p1.getNode( "r" ) ) != 0 ) {
+                return false;
+            }
+            if ( pm.calculateDistance( p1.getNode( "A" ), p1.getNode( "A" ) ) != 0 ) {
+                return false;
+            }
+            if ( pm.calculateDistance( p1.getNode( "A" ), p1.getNode( "B" ) ) != 3 ) {
+                return false;
+            }
+            if ( pm.calculateDistance( p1.getNode( "B" ), p1.getNode( "A" ) ) != 3 ) {
+                return false;
+            }
+            if ( pm.calculateDistance( p1.getNode( "A" ), p1.getNode( "C" ) ) != 8 ) {
+                return false;
+            }
+            if ( pm.calculateDistance( p1.getNode( "C" ), p1.getNode( "A" ) ) != 8 ) {
+                return false;
+            }
+            if ( pm.calculateDistance( p1.getNode( "A" ), p1.getNode( "D" ) ) != 22 ) {
+                return false;
+            }
+            if ( pm.calculateDistance( p1.getNode( "A" ), p1.getNode( "E" ) ) != 32 ) {
+                return false;
+            }
+            if ( pm.calculateDistance( p1.getNode( "E" ), p1.getNode( "A" ) ) != 32 ) {
+                return false;
+            }
+            if ( pm.calculateDistance( p1.getNode( "A" ), p1.getNode( "F" ) ) != 33 ) {
+                return false;
+            }
+            if ( pm.calculateDistance( p1.getNode( "F" ), p1.getNode( "A" ) ) != 33 ) {
+                return false;
+            }
+            if ( pm.calculateDistance( p1.getNode( "A" ), p1.getNode( "ab" ) ) != 1 ) {
+                return false;
+            }
+            if ( pm.calculateDistance( p1.getNode( "ab" ), p1.getNode( "A" ) ) != 1 ) {
+                return false;
+            }
+            if ( pm.calculateDistance( p1.getNode( "A" ), p1.getNode( "abc" ) ) != 4 ) {
+                return false;
+            }
+            if ( pm.calculateDistance( p1.getNode( "abc" ), p1.getNode( "A" ) ) != 4 ) {
+                return false;
+            }
+            if ( pm.calculateDistance( p1.getNode( "A" ), p1.getNode( "r" ) ) != 9 ) {
+                return false;
+            }
+            if ( pm.calculateDistance( p1.getNode( "r" ), p1.getNode( "A" ) ) != 9 ) {
+                return false;
+            }
+            if ( pm.calculateDistance( p1.getNode( "A" ), p1.getNode( "def" ) ) != 15 ) {
+                return false;
+            }
+            if ( pm.calculateDistance( p1.getNode( "def" ), p1.getNode( "A" ) ) != 15 ) {
+                return false;
+            }
+            if ( pm.calculateDistance( p1.getNode( "A" ), p1.getNode( "ef" ) ) != 23 ) {
+                return false;
+            }
+            if ( pm.calculateDistance( p1.getNode( "ef" ), p1.getNode( "A" ) ) != 23 ) {
+                return false;
+            }
+            if ( pm.calculateDistance( p1.getNode( "ef" ), p1.getNode( "def" ) ) != 8 ) {
+                return false;
+            }
+            if ( pm.calculateDistance( p1.getNode( "def" ), p1.getNode( "ef" ) ) != 8 ) {
+                return false;
+            }
+            if ( pm.calculateDistance( p1.getNode( "ef" ), p1.getNode( "r" ) ) != 14 ) {
+                return false;
+            }
+            if ( pm.calculateDistance( p1.getNode( "ef" ), p1.getNode( "abc" ) ) != 19 ) {
+                return false;
+            }
+            if ( pm.calculateDistance( p1.getNode( "ef" ), p1.getNode( "ab" ) ) != 22 ) {
+                return false;
+            }
+            if ( pm.calculateDistance( p1.getNode( "ab" ), p1.getNode( "ef" ) ) != 22 ) {
+                return false;
+            }
+            if ( pm.calculateDistance( p1.getNode( "def" ), p1.getNode( "abc" ) ) != 11 ) {
+                return false;
+            }
+            final Phylogeny p2 = factory.create( "((A:4,B:5,C:6)abc:1,(D:7,E:8,F:9)def:2,(G:10,H:11,I:12)ghi:3)r",
+                                                 new NHXParser() )[ 0 ];
+            if ( pm.calculateDistance( p2.getNode( "A" ), p2.getNode( "B" ) ) != 9 ) {
+                return false;
+            }
+            if ( pm.calculateDistance( p2.getNode( "A" ), p2.getNode( "C" ) ) != 10 ) {
+                return false;
+            }
+            if ( pm.calculateDistance( p2.getNode( "A" ), p2.getNode( "D" ) ) != 14 ) {
+                return false;
+            }
+            if ( pm.calculateDistance( p2.getNode( "A" ), p2.getNode( "ghi" ) ) != 8 ) {
+                return false;
+            }
+            if ( pm.calculateDistance( p2.getNode( "A" ), p2.getNode( "I" ) ) != 20 ) {
+                return false;
+            }
+            if ( pm.calculateDistance( p2.getNode( "G" ), p2.getNode( "ghi" ) ) != 10 ) {
+                return false;
+            }
+            if ( pm.calculateDistance( p2.getNode( "r" ), p2.getNode( "r" ) ) != 0 ) {
+                return false;
+            }
+            if ( pm.calculateDistance( p2.getNode( "r" ), p2.getNode( "G" ) ) != 13 ) {
+                return false;
+            }
+            if ( pm.calculateDistance( p2.getNode( "G" ), p2.getNode( "r" ) ) != 13 ) {
+                return false;
+            }
+            if ( pm.calculateDistance( p2.getNode( "G" ), p2.getNode( "H" ) ) != 21 ) {
+                return false;
+            }
+            if ( pm.calculateDistance( p2.getNode( "G" ), p2.getNode( "I" ) ) != 22 ) {
+                return false;
+            }
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace( System.out );
+            return false;
+        }
+        return true;
+    }
+
+    private static boolean testGetLCA() {
+        try {
+            final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
+            final Phylogeny p1 = factory.create( "((((((A,B)ab,C)abc,D)abcd,E)abcde,F)abcdef,(G,H)gh)abcdefgh",
+                                                 new NHXParser() )[ 0 ];
+            final PhylogenyMethods pm = PhylogenyMethods.getInstance();
+            final PhylogenyNode A = pm.obtainLCA( p1.getNode( "A" ), p1.getNode( "A" ) );
+            if ( !A.getName().equals( "A" ) ) {
+                return false;
+            }
+            final PhylogenyNode gh = pm.obtainLCA( p1.getNode( "gh" ), p1.getNode( "gh" ) );
+            if ( !gh.getName().equals( "gh" ) ) {
+                return false;
+            }
+            final PhylogenyNode ab = pm.obtainLCA( p1.getNode( "A" ), p1.getNode( "B" ) );
+            if ( !ab.getName().equals( "ab" ) ) {
+                return false;
+            }
+            final PhylogenyNode ab2 = pm.obtainLCA( p1.getNode( "B" ), p1.getNode( "A" ) );
+            if ( !ab2.getName().equals( "ab" ) ) {
+                return false;
+            }
+            final PhylogenyNode gh2 = pm.obtainLCA( p1.getNode( "H" ), p1.getNode( "G" ) );
+            if ( !gh2.getName().equals( "gh" ) ) {
+                return false;
+            }
+            final PhylogenyNode gh3 = pm.obtainLCA( p1.getNode( "G" ), p1.getNode( "H" ) );
+            if ( !gh3.getName().equals( "gh" ) ) {
+                return false;
+            }
+            final PhylogenyNode abc = pm.obtainLCA( p1.getNode( "C" ), p1.getNode( "A" ) );
+            if ( !abc.getName().equals( "abc" ) ) {
+                return false;
+            }
+            final PhylogenyNode abc2 = pm.obtainLCA( p1.getNode( "A" ), p1.getNode( "C" ) );
+            if ( !abc2.getName().equals( "abc" ) ) {
+                return false;
+            }
+            final PhylogenyNode abcd = pm.obtainLCA( p1.getNode( "A" ), p1.getNode( "D" ) );
+            if ( !abcd.getName().equals( "abcd" ) ) {
+                return false;
+            }
+            final PhylogenyNode abcd2 = pm.obtainLCA( p1.getNode( "D" ), p1.getNode( "A" ) );
+            if ( !abcd2.getName().equals( "abcd" ) ) {
+                return false;
+            }
+            final PhylogenyNode abcdef = pm.obtainLCA( p1.getNode( "A" ), p1.getNode( "F" ) );
+            if ( !abcdef.getName().equals( "abcdef" ) ) {
+                return false;
+            }
+            final PhylogenyNode abcdef2 = pm.obtainLCA( p1.getNode( "F" ), p1.getNode( "A" ) );
+            if ( !abcdef2.getName().equals( "abcdef" ) ) {
+                return false;
+            }
+            final PhylogenyNode abcdef3 = pm.obtainLCA( p1.getNode( "ab" ), p1.getNode( "F" ) );
+            if ( !abcdef3.getName().equals( "abcdef" ) ) {
+                return false;
+            }
+            final PhylogenyNode abcdef4 = pm.obtainLCA( p1.getNode( "F" ), p1.getNode( "ab" ) );
+            if ( !abcdef4.getName().equals( "abcdef" ) ) {
+                return false;
+            }
+            final PhylogenyNode abcde = pm.obtainLCA( p1.getNode( "A" ), p1.getNode( "E" ) );
+            if ( !abcde.getName().equals( "abcde" ) ) {
+                return false;
+            }
+            final PhylogenyNode abcde2 = pm.obtainLCA( p1.getNode( "E" ), p1.getNode( "A" ) );
+            if ( !abcde2.getName().equals( "abcde" ) ) {
+                return false;
+            }
+            final PhylogenyNode r = pm.obtainLCA( p1.getNode( "abcdefgh" ), p1.getNode( "abcdefgh" ) );
+            if ( !r.getName().equals( "abcdefgh" ) ) {
+                return false;
+            }
+            final PhylogenyNode r2 = pm.obtainLCA( p1.getNode( "A" ), p1.getNode( "H" ) );
+            if ( !r2.getName().equals( "abcdefgh" ) ) {
+                return false;
+            }
+            final PhylogenyNode r3 = pm.obtainLCA( p1.getNode( "H" ), p1.getNode( "A" ) );
+            if ( !r3.getName().equals( "abcdefgh" ) ) {
+                return false;
+            }
+            final PhylogenyNode abcde3 = pm.obtainLCA( p1.getNode( "E" ), p1.getNode( "abcde" ) );
+            if ( !abcde3.getName().equals( "abcde" ) ) {
+                return false;
+            }
+            final PhylogenyNode abcde4 = pm.obtainLCA( p1.getNode( "abcde" ), p1.getNode( "E" ) );
+            if ( !abcde4.getName().equals( "abcde" ) ) {
+                return false;
+            }
+            final PhylogenyNode ab3 = pm.obtainLCA( p1.getNode( "ab" ), p1.getNode( "B" ) );
+            if ( !ab3.getName().equals( "ab" ) ) {
+                return false;
+            }
+            final PhylogenyNode ab4 = pm.obtainLCA( p1.getNode( "B" ), p1.getNode( "ab" ) );
+            if ( !ab4.getName().equals( "ab" ) ) {
+                return false;
+            }
+            final Phylogeny p2 = factory.create( "(a,b,(((c,d)cd,e)cde,f)cdef)r", new NHXParser() )[ 0 ];
+            final PhylogenyNode cd = pm.obtainLCA( p2.getNode( "c" ), p2.getNode( "d" ) );
+            if ( !cd.getName().equals( "cd" ) ) {
+                return false;
+            }
+            final PhylogenyNode cd2 = pm.obtainLCA( p2.getNode( "d" ), p2.getNode( "c" ) );
+            if ( !cd2.getName().equals( "cd" ) ) {
+                return false;
+            }
+            final PhylogenyNode cde = pm.obtainLCA( p2.getNode( "c" ), p2.getNode( "e" ) );
+            if ( !cde.getName().equals( "cde" ) ) {
+                return false;
+            }
+            final PhylogenyNode cde2 = pm.obtainLCA( p2.getNode( "e" ), p2.getNode( "c" ) );
+            if ( !cde2.getName().equals( "cde" ) ) {
+                return false;
+            }
+            final PhylogenyNode cdef = pm.obtainLCA( p2.getNode( "c" ), p2.getNode( "f" ) );
+            if ( !cdef.getName().equals( "cdef" ) ) {
+                return false;
+            }
+            final PhylogenyNode cdef2 = pm.obtainLCA( p2.getNode( "d" ), p2.getNode( "f" ) );
+            if ( !cdef2.getName().equals( "cdef" ) ) {
+                return false;
+            }
+            final PhylogenyNode cdef3 = pm.obtainLCA( p2.getNode( "f" ), p2.getNode( "d" ) );
+            if ( !cdef3.getName().equals( "cdef" ) ) {
+                return false;
+            }
+            final PhylogenyNode rt = pm.obtainLCA( p2.getNode( "c" ), p2.getNode( "a" ) );
+            if ( !rt.getName().equals( "r" ) ) {
+                return false;
+            }
+            final Phylogeny p3 = factory
+                    .create( "((((a,(b,c)bc)abc,(d,e)de)abcde,f)abcdef,(((g,h)gh,(i,j)ij)ghij,k)ghijk,l)",
+                             new NHXParser() )[ 0 ];
+            final PhylogenyNode bc_3 = pm.obtainLCA( p3.getNode( "b" ), p3.getNode( "c" ) );
+            if ( !bc_3.getName().equals( "bc" ) ) {
+                return false;
+            }
+            final PhylogenyNode ac_3 = pm.obtainLCA( p3.getNode( "a" ), p3.getNode( "c" ) );
+            if ( !ac_3.getName().equals( "abc" ) ) {
+                return false;
+            }
+            final PhylogenyNode ad_3 = pm.obtainLCA( p3.getNode( "a" ), p3.getNode( "d" ) );
+            if ( !ad_3.getName().equals( "abcde" ) ) {
+                return false;
+            }
+            final PhylogenyNode af_3 = pm.obtainLCA( p3.getNode( "a" ), p3.getNode( "f" ) );
+            if ( !af_3.getName().equals( "abcdef" ) ) {
+                return false;
+            }
+            final PhylogenyNode ag_3 = pm.obtainLCA( p3.getNode( "a" ), p3.getNode( "g" ) );
+            if ( !ag_3.getName().equals( "" ) ) {
+                return false;
+            }
+            if ( !ag_3.isRoot() ) {
+                return false;
+            }
+            final PhylogenyNode al_3 = pm.obtainLCA( p3.getNode( "a" ), p3.getNode( "l" ) );
+            if ( !al_3.getName().equals( "" ) ) {
+                return false;
+            }
+            if ( !al_3.isRoot() ) {
+                return false;
+            }
+            final PhylogenyNode kl_3 = pm.obtainLCA( p3.getNode( "k" ), p3.getNode( "l" ) );
+            if ( !kl_3.getName().equals( "" ) ) {
+                return false;
+            }
+            if ( !kl_3.isRoot() ) {
+                return false;
+            }
+            final PhylogenyNode fl_3 = pm.obtainLCA( p3.getNode( "f" ), p3.getNode( "l" ) );
+            if ( !fl_3.getName().equals( "" ) ) {
+                return false;
+            }
+            if ( !fl_3.isRoot() ) {
+                return false;
+            }
+            final PhylogenyNode gk_3 = pm.obtainLCA( p3.getNode( "g" ), p3.getNode( "k" ) );
+            if ( !gk_3.getName().equals( "ghijk" ) ) {
+                return false;
+            }
+            final Phylogeny p4 = factory.create( "(a,b,c)r", new NHXParser() )[ 0 ];
+            final PhylogenyNode r_4 = pm.obtainLCA( p4.getNode( "b" ), p4.getNode( "c" ) );
+            if ( !r_4.getName().equals( "r" ) ) {
+                return false;
+            }
+            final Phylogeny p5 = factory.create( "((a,b),c,d)root", new NHXParser() )[ 0 ];
+            final PhylogenyNode r_5 = pm.obtainLCA( p5.getNode( "a" ), p5.getNode( "c" ) );
+            if ( !r_5.getName().equals( "root" ) ) {
+                return false;
+            }
+            final Phylogeny p6 = factory.create( "((a,b),c,d)rot", new NHXParser() )[ 0 ];
+            final PhylogenyNode r_6 = pm.obtainLCA( p6.getNode( "c" ), p6.getNode( "a" ) );
+            if ( !r_6.getName().equals( "rot" ) ) {
+                return false;
+            }
+            final Phylogeny p7 = factory.create( "(((a,b)x,c)x,d,e)rott", new NHXParser() )[ 0 ];
+            final PhylogenyNode r_7 = pm.obtainLCA( p7.getNode( "a" ), p7.getNode( "e" ) );
+            if ( !r_7.getName().equals( "rott" ) ) {
+                return false;
+            }
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace( System.out );
+            return false;
+        }
+        return true;
+    }
+
+    private static boolean testHmmscanOutputParser() {
+        final String test_dir = Test.PATH_TO_TEST_DATA;
+        try {
+            final HmmscanPerDomainTableParser parser1 = new HmmscanPerDomainTableParser( new File( test_dir
+                    + ForesterUtil.getFileSeparator() + "hmmscan30b3_output_1" ), "MONBR", INDIVIDUAL_SCORE_CUTOFF.NONE );
+            parser1.parse();
+            final HmmscanPerDomainTableParser parser2 = new HmmscanPerDomainTableParser( new File( test_dir
+                    + ForesterUtil.getFileSeparator() + "hmmscan30b3_output_2" ), "MONBR", INDIVIDUAL_SCORE_CUTOFF.NONE );
+            final List<Protein> domain_collections = parser2.parse();
+            if ( parser2.getProteinsEncountered() != 4 ) {
+                return false;
+            }
+            if ( domain_collections.size() != 4 ) {
+                return false;
+            }
+            if ( parser2.getDomainsEncountered() != 69 ) {
+                return false;
+            }
+            if ( parser2.getDomainsIgnoredDueToDuf() != 0 ) {
+                return false;
+            }
+            if ( parser2.getDomainsIgnoredDueToEval() != 0 ) {
+                return false;
+            }
+            final Protein p1 = domain_collections.get( 0 );
+            if ( p1.getNumberOfProteinDomains() != 15 ) {
+                return false;
+            }
+            final Protein p4 = domain_collections.get( 3 );
+            if ( p4.getNumberOfProteinDomains() != 1 ) {
+                return false;
+            }
+            if ( !p4.getProteinDomain( 0 ).getDomainId().toString().equals( "DNA_pol_B_new" ) ) {
+                return false;
+            }
+            if ( p4.getProteinDomain( 0 ).getFrom() != 51 ) {
+                return false;
+            }
+            if ( p4.getProteinDomain( 0 ).getTo() != 395 ) {
+                return false;
+            }
+            if ( !Test.isEqual( p4.getProteinDomain( 0 ).getPerDomainEvalue(), 1.2e-39 ) ) {
+                return false;
+            }
+            if ( !Test.isEqual( p4.getProteinDomain( 0 ).getPerDomainScore(), 135.7 ) ) {
+                return false;
+            }
+            if ( !Test.isEqual( p4.getProteinDomain( 0 ).getPerSequenceEvalue(), 8.3e-40 ) ) {
+                return false;
+            }
+            if ( !Test.isEqual( p4.getProteinDomain( 0 ).getPerSequenceScore(), 136.3 ) ) {
+                return false;
+            }
+            if ( !Test.isEqual( p4.getProteinDomain( 0 ).getNumber(), 1 ) ) {
+                return false;
+            }
+            if ( !Test.isEqual( p4.getProteinDomain( 0 ).getTotalCount(), 1 ) ) {
+                return false;
+            }
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace( System.out );
+            return false;
+        }
+        return true;
+    }
+
+    private static boolean testLastExternalNodeMethods() {
+        try {
+            final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
+            final char[] a0 = { '(', '(', 'A', ',', 'B', ')', ',', '(', 'C', ',', 'D', ')', ')', };
+            final Phylogeny t0 = factory.create( a0, new NHXParser() )[ 0 ];
+            final PhylogenyNode n1 = t0.getNode( "A" );
+            if ( n1.isLastExternalNode() ) {
+                return false;
+            }
+            final PhylogenyNode n2 = t0.getNode( "B" );
+            if ( n2.isLastExternalNode() ) {
+                return false;
+            }
+            final PhylogenyNode n3 = t0.getNode( "C" );
+            if ( n3.isLastExternalNode() ) {
+                return false;
+            }
+            final PhylogenyNode n4 = t0.getNode( "D" );
+            if ( !n4.isLastExternalNode() ) {
+                return false;
+            }
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace( System.out );
+            return false;
+        }
+        return true;
+    }
+
+    private static boolean testLevelOrderIterator() {
+        try {
+            final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
+            final Phylogeny t0 = factory.create( "((A,B)ab,(C,D)cd)r", new NHXParser() )[ 0 ];
+            PhylogenyNodeIterator it0;
+            for( it0 = t0.iteratorLevelOrder(); it0.hasNext(); ) {
+                it0.next();
+            }
+            for( it0.reset(); it0.hasNext(); ) {
+                it0.next();
+            }
+            final PhylogenyNodeIterator it = t0.iteratorLevelOrder();
+            if ( !it.next().getName().equals( "r" ) ) {
+                return false;
+            }
+            if ( !it.next().getName().equals( "ab" ) ) {
+                return false;
+            }
+            if ( !it.next().getName().equals( "cd" ) ) {
+                return false;
+            }
+            if ( !it.next().getName().equals( "A" ) ) {
+                return false;
+            }
+            if ( !it.next().getName().equals( "B" ) ) {
+                return false;
+            }
+            if ( !it.next().getName().equals( "C" ) ) {
+                return false;
+            }
+            if ( !it.next().getName().equals( "D" ) ) {
+                return false;
+            }
+            if ( it.hasNext() ) {
+                return false;
+            }
+            final Phylogeny t2 = factory.create( "(((1,2,(a,(X,Y,Z)b)3,4,5,6)A,B,C)abc,(D,E,(f1,(f21)f2,f3)F,G)defg)r",
+                                                 new NHXParser() )[ 0 ];
+            PhylogenyNodeIterator it2;
+            for( it2 = t2.iteratorLevelOrder(); it2.hasNext(); ) {
+                it2.next();
+            }
+            for( it2.reset(); it2.hasNext(); ) {
+                it2.next();
+            }
+            final PhylogenyNodeIterator it3 = t2.iteratorLevelOrder();
+            if ( !it3.next().getName().equals( "r" ) ) {
+                return false;
+            }
+            if ( !it3.next().getName().equals( "abc" ) ) {
+                return false;
+            }
+            if ( !it3.next().getName().equals( "defg" ) ) {
+                return false;
+            }
+            if ( !it3.next().getName().equals( "A" ) ) {
+                return false;
+            }
+            if ( !it3.next().getName().equals( "B" ) ) {
+                return false;
+            }
+            if ( !it3.next().getName().equals( "C" ) ) {
+                return false;
+            }
+            if ( !it3.next().getName().equals( "D" ) ) {
+                return false;
+            }
+            if ( !it3.next().getName().equals( "E" ) ) {
+                return false;
+            }
+            if ( !it3.next().getName().equals( "F" ) ) {
+                return false;
+            }
+            if ( !it3.next().getName().equals( "G" ) ) {
+                return false;
+            }
+            if ( !it3.next().getName().equals( "1" ) ) {
+                return false;
+            }
+            if ( !it3.next().getName().equals( "2" ) ) {
+                return false;
+            }
+            if ( !it3.next().getName().equals( "3" ) ) {
+                return false;
+            }
+            if ( !it3.next().getName().equals( "4" ) ) {
+                return false;
+            }
+            if ( !it3.next().getName().equals( "5" ) ) {
+                return false;
+            }
+            if ( !it3.next().getName().equals( "6" ) ) {
+                return false;
+            }
+            if ( !it3.next().getName().equals( "f1" ) ) {
+                return false;
+            }
+            if ( !it3.next().getName().equals( "f2" ) ) {
+                return false;
+            }
+            if ( !it3.next().getName().equals( "f3" ) ) {
+                return false;
+            }
+            if ( !it3.next().getName().equals( "a" ) ) {
+                return false;
+            }
+            if ( !it3.next().getName().equals( "b" ) ) {
+                return false;
+            }
+            if ( !it3.next().getName().equals( "f21" ) ) {
+                return false;
+            }
+            if ( !it3.next().getName().equals( "X" ) ) {
+                return false;
+            }
+            if ( !it3.next().getName().equals( "Y" ) ) {
+                return false;
+            }
+            if ( !it3.next().getName().equals( "Z" ) ) {
+                return false;
+            }
+            if ( it3.hasNext() ) {
+                return false;
+            }
+            final Phylogeny t4 = factory.create( "((((D)C)B)A)r", new NHXParser() )[ 0 ];
+            PhylogenyNodeIterator it4;
+            for( it4 = t4.iteratorLevelOrder(); it4.hasNext(); ) {
+                it4.next();
+            }
+            for( it4.reset(); it4.hasNext(); ) {
+                it4.next();
+            }
+            final PhylogenyNodeIterator it5 = t4.iteratorLevelOrder();
+            if ( !it5.next().getName().equals( "r" ) ) {
+                return false;
+            }
+            if ( !it5.next().getName().equals( "A" ) ) {
+                return false;
+            }
+            if ( !it5.next().getName().equals( "B" ) ) {
+                return false;
+            }
+            if ( !it5.next().getName().equals( "C" ) ) {
+                return false;
+            }
+            if ( !it5.next().getName().equals( "D" ) ) {
+                return false;
+            }
+            final Phylogeny t5 = factory.create( "A", new NHXParser() )[ 0 ];
+            PhylogenyNodeIterator it6;
+            for( it6 = t5.iteratorLevelOrder(); it6.hasNext(); ) {
+                it6.next();
+            }
+            for( it6.reset(); it6.hasNext(); ) {
+                it6.next();
+            }
+            final PhylogenyNodeIterator it7 = t5.iteratorLevelOrder();
+            if ( !it7.next().getName().equals( "A" ) ) {
+                return false;
+            }
+            if ( it.hasNext() ) {
+                return false;
+            }
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace( System.out );
+            return false;
+        }
+        return true;
+    }
+
+    private static boolean testMidpointrooting() {
+        try {
+            final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
+            final Phylogeny t1 = factory.create( "((A:1,B:2)AB:1[&&NHX:B=55],(C:3,D:4)CD:3[&&NHX:B=10])ABCD:0.5",
+                                                 new NHXParser() )[ 0 ];
+            if ( !t1.isRooted() ) {
+                return false;
+            }
+            PhylogenyMethods.midpointRoot( t1 );
+            if ( !isEqual( t1.getNode( "A" ).getDistanceToParent(), 1 ) ) {
+                return false;
+            }
+            if ( !isEqual( t1.getNode( "B" ).getDistanceToParent(), 2 ) ) {
+                return false;
+            }
+            if ( !isEqual( t1.getNode( "C" ).getDistanceToParent(), 3 ) ) {
+                return false;
+            }
+            if ( !isEqual( t1.getNode( "D" ).getDistanceToParent(), 4 ) ) {
+                return false;
+            }
+            if ( !isEqual( t1.getNode( "CD" ).getDistanceToParent(), 1 ) ) {
+                return false;
+            }
+            if ( !isEqual( t1.getNode( "AB" ).getDistanceToParent(), 3 ) ) {
+                return false;
+            }
+            t1.reRoot( t1.getNode( "A" ) );
+            PhylogenyMethods.midpointRoot( t1 );
+            if ( !isEqual( t1.getNode( "A" ).getDistanceToParent(), 1 ) ) {
+                return false;
+            }
+            if ( !isEqual( t1.getNode( "B" ).getDistanceToParent(), 2 ) ) {
+                return false;
+            }
+            if ( !isEqual( t1.getNode( "C" ).getDistanceToParent(), 3 ) ) {
+                return false;
+            }
+            if ( !isEqual( t1.getNode( "D" ).getDistanceToParent(), 4 ) ) {
+                return false;
+            }
+            if ( !isEqual( t1.getNode( "CD" ).getDistanceToParent(), 1 ) ) {
+                return false;
+            }
+            if ( !isEqual( t1.getNode( "AB" ).getDistanceToParent(), 3 ) ) {
+                return false;
+            }
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace( System.out );
+            return false;
+        }
+        return true;
+    }
+
+    private static boolean testNexusCharactersParsing() {
+        try {
+            final NexusCharactersParser parser = new NexusCharactersParser();
+            parser.setSource( new File( Test.PATH_TO_TEST_DATA + "nexus_test_7.nex" ) );
+            parser.parse();
+            String[] labels = parser.getCharStateLabels();
+            if ( labels.length != 7 ) {
+                return false;
+            }
+            if ( !labels[ 0 ].equals( "14-3-3" ) ) {
+                return false;
+            }
+            if ( !labels[ 1 ].equals( "2-Hacid_dh" ) ) {
+                return false;
+            }
+            if ( !labels[ 2 ].equals( "2-Hacid_dh_C" ) ) {
+                return false;
+            }
+            if ( !labels[ 3 ].equals( "2-oxoacid_dh" ) ) {
+                return false;
+            }
+            if ( !labels[ 4 ].equals( "2OG-FeII_Oxy" ) ) {
+                return false;
+            }
+            if ( !labels[ 5 ].equals( "3-HAO" ) ) {
+                return false;
+            }
+            if ( !labels[ 6 ].equals( "3_5_exonuc" ) ) {
+                return false;
+            }
+            parser.setSource( new File( Test.PATH_TO_TEST_DATA + "nexus_test_8.nex" ) );
+            parser.parse();
+            labels = parser.getCharStateLabels();
+            if ( labels.length != 7 ) {
+                return false;
+            }
+            if ( !labels[ 0 ].equals( "14-3-3" ) ) {
+                return false;
+            }
+            if ( !labels[ 1 ].equals( "2-Hacid_dh" ) ) {
+                return false;
+            }
+            if ( !labels[ 2 ].equals( "2-Hacid_dh_C" ) ) {
+                return false;
+            }
+            if ( !labels[ 3 ].equals( "2-oxoacid_dh" ) ) {
+                return false;
+            }
+            if ( !labels[ 4 ].equals( "2OG-FeII_Oxy" ) ) {
+                return false;
+            }
+            if ( !labels[ 5 ].equals( "3-HAO" ) ) {
+                return false;
+            }
+            if ( !labels[ 6 ].equals( "3_5_exonuc" ) ) {
+                return false;
+            }
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace( System.out );
+            return false;
+        }
+        return true;
+    }
+
+    private static boolean testNexusMatrixParsing() {
+        try {
+            final NexusBinaryStatesMatrixParser parser = new NexusBinaryStatesMatrixParser();
+            parser.setSource( new File( Test.PATH_TO_TEST_DATA + "nexus_test_9.nex" ) );
+            parser.parse();
+            final CharacterStateMatrix<BinaryStates> m = parser.getMatrix();
+            if ( m.getNumberOfCharacters() != 9 ) {
+                return false;
+            }
+            if ( m.getNumberOfIdentifiers() != 5 ) {
+                return false;
+            }
+            if ( m.getState( 0, 0 ) != BinaryStates.PRESENT ) {
+                return false;
+            }
+            if ( m.getState( 0, 1 ) != BinaryStates.ABSENT ) {
+                return false;
+            }
+            if ( m.getState( 1, 0 ) != BinaryStates.PRESENT ) {
+                return false;
+            }
+            if ( m.getState( 2, 0 ) != BinaryStates.ABSENT ) {
+                return false;
+            }
+            if ( m.getState( 4, 8 ) != BinaryStates.PRESENT ) {
+                return false;
+            }
+            if ( !m.getIdentifier( 0 ).equals( "MOUSE" ) ) {
+                return false;
+            }
+            if ( !m.getIdentifier( 4 ).equals( "ARATH" ) ) {
+                return false;
+            }
+            //            if ( labels.length != 7 ) {
+            //                return false;
+            //            }
+            //            if ( !labels[ 0 ].equals( "14-3-3" ) ) {
+            //                return false;
+            //            }
+            //            if ( !labels[ 1 ].equals( "2-Hacid_dh" ) ) {
+            //                return false;
+            //            }
+            //            if ( !labels[ 2 ].equals( "2-Hacid_dh_C" ) ) {
+            //                return false;
+            //            }
+            //            if ( !labels[ 3 ].equals( "2-oxoacid_dh" ) ) {
+            //                return false;
+            //            }
+            //            if ( !labels[ 4 ].equals( "2OG-FeII_Oxy" ) ) {
+            //                return false;
+            //            }
+            //            if ( !labels[ 5 ].equals( "3-HAO" ) ) {
+            //                return false;
+            //            }
+            //            if ( !labels[ 6 ].equals( "3_5_exonuc" ) ) {
+            //                return false;
+            //            }
+            //            parser.setSource( new File( Test.PATH_TO_TEST_DATA + "nexus_test_8.nex" ) );
+            //            parser.parse();
+            //            labels = parser.getCharStateLabels();
+            //            if ( labels.length != 7 ) {
+            //                return false;
+            //            }
+            //            if ( !labels[ 0 ].equals( "14-3-3" ) ) {
+            //                return false;
+            //            }
+            //            if ( !labels[ 1 ].equals( "2-Hacid_dh" ) ) {
+            //                return false;
+            //            }
+            //            if ( !labels[ 2 ].equals( "2-Hacid_dh_C" ) ) {
+            //                return false;
+            //            }
+            //            if ( !labels[ 3 ].equals( "2-oxoacid_dh" ) ) {
+            //                return false;
+            //            }
+            //            if ( !labels[ 4 ].equals( "2OG-FeII_Oxy" ) ) {
+            //                return false;
+            //            }
+            //            if ( !labels[ 5 ].equals( "3-HAO" ) ) {
+            //                return false;
+            //            }
+            //            if ( !labels[ 6 ].equals( "3_5_exonuc" ) ) {
+            //                return false;
+            //            }
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace( System.out );
+            return false;
+        }
+        return true;
+    }
+
+    private static boolean testNexusTreeParsing() {
+        try {
+            final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
+            final NexusPhylogeniesParser parser = new NexusPhylogeniesParser();
+            Phylogeny[] phylogenies = factory.create( Test.PATH_TO_TEST_DATA + "nexus_test_1.nex", parser );
+            if ( phylogenies.length != 1 ) {
+                return false;
+            }
+            if ( phylogenies[ 0 ].getNumberOfExternalNodes() != 25 ) {
+                return false;
+            }
+            if ( !phylogenies[ 0 ].getName().equals( "" ) ) {
+                return false;
+            }
+            phylogenies = null;
+            phylogenies = factory.create( Test.PATH_TO_TEST_DATA + "nexus_test_2.nex", parser );
+            if ( phylogenies.length != 1 ) {
+                return false;
+            }
+            if ( phylogenies[ 0 ].getNumberOfExternalNodes() != 10 ) {
+                return false;
+            }
+            if ( !phylogenies[ 0 ].getName().equals( "name" ) ) {
+                return false;
+            }
+            phylogenies = null;
+            phylogenies = factory.create( Test.PATH_TO_TEST_DATA + "nexus_test_3.nex", parser );
+            if ( phylogenies.length != 1 ) {
+                return false;
+            }
+            if ( phylogenies[ 0 ].getNumberOfExternalNodes() != 3 ) {
+                return false;
+            }
+            if ( !phylogenies[ 0 ].getName().equals( "" ) ) {
+                return false;
+            }
+            if ( phylogenies[ 0 ].isRooted() ) {
+                return false;
+            }
+            phylogenies = null;
+            phylogenies = factory.create( Test.PATH_TO_TEST_DATA + "nexus_test_4.nex", parser );
+            if ( phylogenies.length != 18 ) {
+                return false;
+            }
+            if ( phylogenies[ 0 ].getNumberOfExternalNodes() != 10 ) {
+                return false;
+            }
+            if ( !phylogenies[ 0 ].getName().equals( "tree 0" ) ) {
+                return false;
+            }
+            if ( !phylogenies[ 1 ].getName().equals( "tree 1" ) ) {
+                return false;
+            }
+            if ( phylogenies[ 1 ].getNumberOfExternalNodes() != 10 ) {
+                return false;
+            }
+            if ( phylogenies[ 2 ].getNumberOfExternalNodes() != 3 ) {
+                return false;
+            }
+            if ( phylogenies[ 3 ].getNumberOfExternalNodes() != 3 ) {
+                return false;
+            }
+            if ( phylogenies[ 4 ].getNumberOfExternalNodes() != 3 ) {
+                return false;
+            }
+            if ( phylogenies[ 5 ].getNumberOfExternalNodes() != 3 ) {
+                return false;
+            }
+            if ( phylogenies[ 6 ].getNumberOfExternalNodes() != 3 ) {
+                return false;
+            }
+            if ( phylogenies[ 7 ].getNumberOfExternalNodes() != 3 ) {
+                return false;
+            }
+            if ( !phylogenies[ 8 ].getName().equals( "tree 8" ) ) {
+                return false;
+            }
+            if ( phylogenies[ 8 ].isRooted() ) {
+                return false;
+            }
+            if ( phylogenies[ 8 ].getNumberOfExternalNodes() != 3 ) {
+                return false;
+            }
+            if ( !phylogenies[ 9 ].getName().equals( "tree 9" ) ) {
+                return false;
+            }
+            if ( !phylogenies[ 9 ].isRooted() ) {
+                return false;
+            }
+            if ( phylogenies[ 9 ].getNumberOfExternalNodes() != 3 ) {
+                return false;
+            }
+            if ( !phylogenies[ 10 ].getName().equals( "tree 10" ) ) {
+                return false;
+            }
+            if ( !phylogenies[ 10 ].isRooted() ) {
+                return false;
+            }
+            if ( phylogenies[ 10 ].getNumberOfExternalNodes() != 3 ) {
+                return false;
+            }
+            if ( !phylogenies[ 11 ].getName().equals( "tree 11" ) ) {
+                return false;
+            }
+            if ( phylogenies[ 11 ].isRooted() ) {
+                return false;
+            }
+            if ( phylogenies[ 11 ].getNumberOfExternalNodes() != 3 ) {
+                return false;
+            }
+            if ( !phylogenies[ 12 ].getName().equals( "tree 12" ) ) {
+                return false;
+            }
+            if ( !phylogenies[ 12 ].isRooted() ) {
+                return false;
+            }
+            if ( phylogenies[ 12 ].getNumberOfExternalNodes() != 3 ) {
+                return false;
+            }
+            if ( !phylogenies[ 13 ].getName().equals( "tree 13" ) ) {
+                return false;
+            }
+            if ( !phylogenies[ 13 ].isRooted() ) {
+                return false;
+            }
+            if ( phylogenies[ 13 ].getNumberOfExternalNodes() != 3 ) {
+                return false;
+            }
+            if ( !phylogenies[ 14 ].getName().equals( "tree 14" ) ) {
+                return false;
+            }
+            if ( !phylogenies[ 14 ].isRooted() ) {
+                return false;
+            }
+            if ( phylogenies[ 14 ].getNumberOfExternalNodes() != 10 ) {
+                return false;
+            }
+            if ( !phylogenies[ 15 ].getName().equals( "tree 15" ) ) {
+                return false;
+            }
+            if ( phylogenies[ 15 ].isRooted() ) {
+                return false;
+            }
+            if ( phylogenies[ 15 ].getNumberOfExternalNodes() != 10 ) {
+                return false;
+            }
+            if ( !phylogenies[ 16 ].getName().equals( "tree 16" ) ) {
+                return false;
+            }
+            if ( !phylogenies[ 16 ].isRooted() ) {
+                return false;
+            }
+            if ( phylogenies[ 16 ].getNumberOfExternalNodes() != 10 ) {
+                return false;
+            }
+            if ( !phylogenies[ 17 ].getName().equals( "tree 17" ) ) {
+                return false;
+            }
+            if ( phylogenies[ 17 ].isRooted() ) {
+                return false;
+            }
+            if ( phylogenies[ 17 ].getNumberOfExternalNodes() != 10 ) {
+                return false;
+            }
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace( System.out );
+            return false;
+        }
+        return true;
+    }
+
+    private static boolean testNexusTreeParsingTranslating() {
+        try {
+            final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
+            final NexusPhylogeniesParser parser = new NexusPhylogeniesParser();
+            Phylogeny[] phylogenies = factory.create( Test.PATH_TO_TEST_DATA + "nexus_test_5.nex", parser );
+            if ( phylogenies.length != 1 ) {
+                return false;
+            }
+            if ( phylogenies[ 0 ].getNumberOfExternalNodes() != 3 ) {
+                return false;
+            }
+            if ( !phylogenies[ 0 ].getName().equals( "Tree0" ) ) {
+                return false;
+            }
+            if ( !phylogenies[ 0 ].getFirstExternalNode().getName().equals( "Scarabaeus" ) ) {
+                return false;
+            }
+            if ( !phylogenies[ 0 ].getFirstExternalNode().getNextExternalNode().getName().equals( "Drosophila" ) ) {
+                return false;
+            }
+            if ( !phylogenies[ 0 ].getFirstExternalNode().getNextExternalNode().getNextExternalNode().getName()
+                    .equals( "Aranaeus" ) ) {
+                return false;
+            }
+            phylogenies = null;
+            phylogenies = factory.create( Test.PATH_TO_TEST_DATA + "nexus_test_6.nex", parser );
+            if ( phylogenies.length != 3 ) {
+                return false;
+            }
+            if ( phylogenies[ 0 ].getNumberOfExternalNodes() != 3 ) {
+                return false;
+            }
+            if ( !phylogenies[ 0 ].getName().equals( "Tree0" ) ) {
+                return false;
+            }
+            if ( phylogenies[ 0 ].isRooted() ) {
+                return false;
+            }
+            if ( !phylogenies[ 0 ].getFirstExternalNode().getName().equals( "Scarabaeus" ) ) {
+                return false;
+            }
+            if ( !phylogenies[ 0 ].getFirstExternalNode().getNextExternalNode().getName().equals( "Drosophila" ) ) {
+                return false;
+            }
+            if ( !phylogenies[ 0 ].getFirstExternalNode().getNextExternalNode().getNextExternalNode().getName()
+                    .equals( "Aranaeus" ) ) {
+                return false;
+            }
+            if ( phylogenies[ 1 ].getNumberOfExternalNodes() != 3 ) {
+                return false;
+            }
+            if ( !phylogenies[ 1 ].getName().equals( "Tree1" ) ) {
+                return false;
+            }
+            if ( phylogenies[ 1 ].isRooted() ) {
+                return false;
+            }
+            if ( !phylogenies[ 1 ].getFirstExternalNode().getName().equals( "Scarabaeus" ) ) {
+                return false;
+            }
+            if ( !phylogenies[ 1 ].getFirstExternalNode().getNextExternalNode().getName().equals( "Drosophila" ) ) {
+                return false;
+            }
+            if ( !phylogenies[ 1 ].getFirstExternalNode().getNextExternalNode().getNextExternalNode().getName()
+                    .equals( "Aranaeus" ) ) {
+                return false;
+            }
+            if ( phylogenies[ 2 ].getNumberOfExternalNodes() != 3 ) {
+                return false;
+            }
+            if ( !phylogenies[ 2 ].getName().equals( "Tree2" ) ) {
+                return false;
+            }
+            if ( !phylogenies[ 2 ].isRooted() ) {
+                return false;
+            }
+            if ( !phylogenies[ 2 ].getFirstExternalNode().getName().equals( "Scarabaeus" ) ) {
+                return false;
+            }
+            if ( !phylogenies[ 2 ].getFirstExternalNode().getNextExternalNode().getName().equals( "Drosophila" ) ) {
+                return false;
+            }
+            if ( !phylogenies[ 2 ].getFirstExternalNode().getNextExternalNode().getNextExternalNode().getName()
+                    .equals( "Aranaeus" ) ) {
+                return false;
+            }
+            phylogenies = null;
+            phylogenies = factory.create( Test.PATH_TO_TEST_DATA + "nexus_test_7.nex", parser );
+            if ( phylogenies.length != 3 ) {
+                return false;
+            }
+            if ( phylogenies[ 0 ].getNumberOfExternalNodes() != 3 ) {
+                return false;
+            }
+            if ( !phylogenies[ 0 ].getName().equals( "Tree0" ) ) {
+                return false;
+            }
+            if ( phylogenies[ 0 ].isRooted() ) {
+                return false;
+            }
+            if ( !phylogenies[ 0 ].getFirstExternalNode().getName().equals( "Scarabaeus" ) ) {
+                return false;
+            }
+            if ( !phylogenies[ 0 ].getFirstExternalNode().getNextExternalNode().getName().equals( "Drosophila" ) ) {
+                return false;
+            }
+            if ( !phylogenies[ 0 ].getFirstExternalNode().getNextExternalNode().getNextExternalNode().getName()
+                    .equals( "Aranaeus" ) ) {
+                return false;
+            }
+            if ( phylogenies[ 1 ].getNumberOfExternalNodes() != 3 ) {
+                return false;
+            }
+            if ( !phylogenies[ 1 ].getName().equals( "Tree1" ) ) {
+                return false;
+            }
+            if ( phylogenies[ 1 ].isRooted() ) {
+                return false;
+            }
+            if ( !phylogenies[ 1 ].getFirstExternalNode().getName().equals( "Scarabaeus" ) ) {
+                return false;
+            }
+            if ( !phylogenies[ 1 ].getFirstExternalNode().getNextExternalNode().getName().equals( "Drosophila" ) ) {
+                return false;
+            }
+            if ( !phylogenies[ 1 ].getFirstExternalNode().getNextExternalNode().getNextExternalNode().getName()
+                    .equals( "Aranaeus" ) ) {
+                return false;
+            }
+            if ( phylogenies[ 2 ].getNumberOfExternalNodes() != 3 ) {
+                return false;
+            }
+            if ( !phylogenies[ 2 ].getName().equals( "Tree2" ) ) {
+                return false;
+            }
+            if ( !phylogenies[ 2 ].isRooted() ) {
+                return false;
+            }
+            if ( !phylogenies[ 2 ].getFirstExternalNode().getName().equals( "Scarabaeus" ) ) {
+                return false;
+            }
+            if ( !phylogenies[ 2 ].getFirstExternalNode().getNextExternalNode().getName().equals( "Drosophila" ) ) {
+                return false;
+            }
+            if ( !phylogenies[ 2 ].getFirstExternalNode().getNextExternalNode().getNextExternalNode().getName()
+                    .equals( "Aranaeus" ) ) {
+                return false;
+            }
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace( System.out );
+            return false;
+        }
+        return true;
+    }
+
+    private static boolean testNHParsing() {
+        try {
+            final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
+            final Phylogeny p1 = factory.create( "(A,B1)", new NHXParser() )[ 0 ];
+            if ( !p1.toNewHampshireX().equals( "(A,B1)" ) ) {
+                return false;
+            }
+            final NHXParser nhxp = new NHXParser();
+            nhxp.setTaxonomyExtraction( ForesterUtil.TAXONOMY_EXTRACTION.NO );
+            nhxp.setReplaceUnderscores( true );
+            final Phylogeny uc0 = factory.create( "(A__A_,_B_B)", nhxp )[ 0 ];
+            if ( !uc0.getRoot().getChildNode( 0 ).getName().equals( "A A " ) ) {
+                return false;
+            }
+            if ( !uc0.getRoot().getChildNode( 1 ).getName().equals( " B B" ) ) {
+                return false;
+            }
+            final Phylogeny p1b = factory
+                    .create( "   \n  \t  \b   \r \f   ; (  \n  \t  \b   \r \f; A ;  \n  \t  \b   \r \f,  \n  \t  \b   \r \f; B ;   \n  \t  \b   \r \f 1  \n  \t  \b   \r \f ;  \n  \t  \b   \r \f );;;;; \n  \t  \b   \r \f;;;  \n  \t  \b   \r \f ",
+                             new NHXParser() )[ 0 ];
+            if ( !p1b.toNewHampshireX().equals( "(';A;',';B;1;')" ) ) {
+                return false;
+            }
+            if ( !p1b.toNewHampshire().equals( "(';A;',';B;1;');" ) ) {
+                return false;
+            }
+            final Phylogeny p2 = factory.create( new StringBuffer( "(A,B2)" ), new NHXParser() )[ 0 ];
+            final Phylogeny p3 = factory.create( new char[] { '(', 'A', ',', 'B', '3', ')' }, new NHXParser() )[ 0 ];
+            final Phylogeny p4 = factory.create( "(A,B4);", new NHXParser() )[ 0 ];
+            final Phylogeny p5 = factory.create( new StringBuffer( "(A,B5);" ), new NHXParser() )[ 0 ];
+            final Phylogeny[] p7 = factory.create( "(A,B7);(C,D7)", new NHXParser() );
+            final Phylogeny[] p8 = factory.create( "(A,B8) (C,D8)", new NHXParser() );
+            final Phylogeny[] p9 = factory.create( "(A,B9)\n(C,D9)", new NHXParser() );
+            final Phylogeny[] p10 = factory.create( "(A,B10);(C,D10);", new NHXParser() );
+            final Phylogeny[] p11 = factory.create( "(A,B11);(C,D11) (E,F11)\t(G,H11)", new NHXParser() );
+            final Phylogeny[] p12 = factory.create( "(A,B12) (C,D12) (E,F12) (G,H12)", new NHXParser() );
+            final Phylogeny[] p13 = factory.create( " ; (;A; , ; B ; 1  3 ; \n)\t ( \n ;"
+                                                            + " C ; ,; D;13;);;;;;;(;E;,;F;13 ;) ; "
+                                                            + "; ; ( \t\n\r\b; G ;, ;H ;1 3; )  ;  ;   ;",
+                                                    new NHXParser() );
+            if ( !p13[ 0 ].toNewHampshireX().equals( "(';A;',';B;13;')" ) ) {
+                return false;
+            }
+            if ( !p13[ 1 ].toNewHampshireX().equals( "(';C;',';D;13;')" ) ) {
+                return false;
+            }
+            if ( !p13[ 2 ].toNewHampshireX().equals( "(';E;',';F;13;')" ) ) {
+                return false;
+            }
+            if ( !p13[ 3 ].toNewHampshireX().equals( "(';G;',';H;13;')" ) ) {
+                return false;
+            }
+            final Phylogeny[] p14 = factory.create( "(A,B14)ab", new NHXParser() );
+            final Phylogeny[] p15 = factory.create( "(A,B15)ab;", new NHXParser() );
+            final String p16_S = "((A,B),C)";
+            final Phylogeny[] p16 = factory.create( p16_S, new NHXParser() );
+            if ( !p16[ 0 ].toNewHampshireX().equals( p16_S ) ) {
+                return false;
+            }
+            final String p17_S = "(C,(A,B))";
+            final Phylogeny[] p17 = factory.create( p17_S, new NHXParser() );
+            if ( !p17[ 0 ].toNewHampshireX().equals( p17_S ) ) {
+                return false;
+            }
+            final String p18_S = "((A,B),(C,D))";
+            final Phylogeny[] p18 = factory.create( p18_S, new NHXParser() );
+            if ( !p18[ 0 ].toNewHampshireX().equals( p18_S ) ) {
+                return false;
+            }
+            final String p19_S = "(((A,B),C),D)";
+            final Phylogeny[] p19 = factory.create( p19_S, new NHXParser() );
+            if ( !p19[ 0 ].toNewHampshireX().equals( p19_S ) ) {
+                return false;
+            }
+            final String p20_S = "(A,(B,(C,D)))";
+            final Phylogeny[] p20 = factory.create( p20_S, new NHXParser() );
+            if ( !p20[ 0 ].toNewHampshireX().equals( p20_S ) ) {
+                return false;
+            }
+            final String p21_S = "(A,(B,(C,(D,E))))";
+            final Phylogeny[] p21 = factory.create( p21_S, new NHXParser() );
+            if ( !p21[ 0 ].toNewHampshireX().equals( p21_S ) ) {
+                return false;
+            }
+            final String p22_S = "((((A,B),C),D),E)";
+            final Phylogeny[] p22 = factory.create( p22_S, new NHXParser() );
+            if ( !p22[ 0 ].toNewHampshireX().equals( p22_S ) ) {
+                return false;
+            }
+            final String p23_S = "(A,(B,(C,(D,E)de)cde)bcde)abcde";
+            final Phylogeny[] p23 = factory.create( p23_S, new NHXParser() );
+            if ( !p23[ 0 ].toNewHampshireX().equals( p23_S ) ) {
+                return false;
+            }
+            final String p24_S = "((((A,B)ab,C)abc,D)abcd,E)abcde";
+            final Phylogeny[] p24 = factory.create( p24_S, new NHXParser() );
+            if ( !p24[ 0 ].toNewHampshireX().equals( p24_S ) ) {
+                return false;
+            }
+            final String p241_S1 = "(A,(B,(C,(D,E)de)cde)bcde)abcde";
+            final String p241_S2 = "((((A,B)ab,C)abc,D)abcd,E)abcde";
+            final Phylogeny[] p241 = factory.create( p241_S1 + p241_S2, new NHXParser() );
+            if ( !p241[ 0 ].toNewHampshireX().equals( p241_S1 ) ) {
+                return false;
+            }
+            if ( !p241[ 1 ].toNewHampshireX().equals( p241_S2 ) ) {
+                return false;
+            }
+            final String p25_S = "((((((((((((((A,B)ab,C)abc,D)abcd,E)"
+                    + "abcde,(B,(C,(D,E)de)cde)bcde)abcde,(B,((A,(B,(C,(D,"
+                    + "E)de)cde)bcde)abcde,(D,E)de)cde)bcde)abcde,B)ab,C)"
+                    + "abc,((((A,B)ab,C)abc,D)abcd,E)abcde)abcd,E)abcde,"
+                    + "((((A,((((((((A,B)ab,C)abc,((((A,B)ab,C)abc,D)abcd,"
+                    + "E)abcde)abcd,E)abcde,((((A,B)ab,C)abc,D)abcd,E)abcde)"
+                    + "ab,C)abc,((((A,B)ab,C)abc,D)abcd,E)abcde)abcd,E)abcde"
+                    + ")ab,C)abc,D)abcd,E)abcde)ab,C)abc,((((A,B)ab,C)abc,D)" + "abcd,E)abcde)abcd,E)abcde";
+            final Phylogeny[] p25 = factory.create( p25_S, new NHXParser() );
+            if ( !p25[ 0 ].toNewHampshireX().equals( p25_S ) ) {
+                return false;
+            }
+            final String p26_S = "(A,B)ab";
+            final Phylogeny[] p26 = factory.create( p26_S, new NHXParser() );
+            if ( !p26[ 0 ].toNewHampshireX().equals( p26_S ) ) {
+                return false;
+            }
+            final String p27_S = "((((A,B)ab,C)abc,D)abcd,E)abcde";
+            final Phylogeny[] p27 = factory.create( new File( Test.PATH_TO_TEST_DATA + "phylogeny27.nhx" ),
+                                                    new NHXParser() );
+            if ( !p27[ 0 ].toNewHampshireX().equals( p27_S ) ) {
+                return false;
+            }
+            final String p28_S1 = "((((A,B)ab,C)abc,D)abcd,E)abcde";
+            final String p28_S2 = "(A,(B,(C,(D,E)de)cde)bcde)abcde";
+            final String p28_S3 = "(A,B)ab";
+            final String p28_S4 = "((((A,B),C),D),;E;)";
+            final Phylogeny[] p28 = factory.create( new File( Test.PATH_TO_TEST_DATA + "phylogeny28.nhx" ),
+                                                    new NHXParser() );
+            if ( !p28[ 0 ].toNewHampshireX().equals( p28_S1 ) ) {
+                return false;
+            }
+            if ( !p28[ 1 ].toNewHampshireX().equals( p28_S2 ) ) {
+                return false;
+            }
+            if ( !p28[ 2 ].toNewHampshireX().equals( p28_S3 ) ) {
+                return false;
+            }
+            if ( !p28[ 3 ].toNewHampshireX().equals( "((((A,B),C),D),';E;')" ) ) {
+                return false;
+            }
+            final String p29_S = "((((A:0.01,B:0.684)ab:0.345,C:0.3451)abc:0.3451,D:1.5)abcd:0.134,E:0.32)abcde:0.1345";
+            final Phylogeny[] p29 = factory.create( p29_S, new NHXParser() );
+            if ( !p29[ 0 ].toNewHampshireX().equals( p29_S ) ) {
+                return false;
+            }
+            final String p30_S = "((((A:0.01,B:0.02):0.93,C:0.04):0.05,D:1.4):0.06,E):0.72";
+            final Phylogeny[] p30 = factory.create( p30_S, new NHXParser() );
+            if ( !p30[ 0 ].toNewHampshireX().equals( p30_S ) ) {
+                return false;
+            }
+            final String p32_S = " ;   ;       \n  \t  \b   \f  \r  ;;;;;; ";
+            final Phylogeny[] p32 = factory.create( p32_S, new NHXParser() );
+            if ( ( p32.length != 1 ) || !p32[ 0 ].isEmpty() ) {
+                return false;
+            }
+            final String p33_S = "A";
+            final Phylogeny[] p33 = factory.create( p33_S, new NHXParser() );
+            if ( !p33[ 0 ].toNewHampshireX().equals( p33_S ) ) {
+                return false;
+            }
+            final String p34_S = "B;";
+            final Phylogeny[] p34 = factory.create( p34_S, new NHXParser() );
+            if ( !p34[ 0 ].toNewHampshireX().equals( "B" ) ) {
+                return false;
+            }
+            final String p35_S = "B:0.2";
+            final Phylogeny[] p35 = factory.create( p35_S, new NHXParser() );
+            if ( !p35[ 0 ].toNewHampshireX().equals( p35_S ) ) {
+                return false;
+            }
+            final String p36_S = "(A)";
+            final Phylogeny[] p36 = factory.create( p36_S, new NHXParser() );
+            if ( !p36[ 0 ].toNewHampshireX().equals( p36_S ) ) {
+                return false;
+            }
+            final String p37_S = "((A))";
+            final Phylogeny[] p37 = factory.create( p37_S, new NHXParser() );
+            if ( !p37[ 0 ].toNewHampshireX().equals( p37_S ) ) {
+                return false;
+            }
+            final String p38_S = "(((((((A:0.2):0.2):0.3):0.4):0.5):0.6):0.7):0.8";
+            final Phylogeny[] p38 = factory.create( p38_S, new NHXParser() );
+            if ( !p38[ 0 ].toNewHampshireX().equals( p38_S ) ) {
+                return false;
+            }
+            final String p39_S = "(((B,((((A:0.2):0.2):0.3):0.4):0.5):0.6):0.7):0.8";
+            final Phylogeny[] p39 = factory.create( p39_S, new NHXParser() );
+            if ( !p39[ 0 ].toNewHampshireX().equals( p39_S ) ) {
+                return false;
+            }
+            final String p40_S = "(A,B,C)";
+            final Phylogeny[] p40 = factory.create( p40_S, new NHXParser() );
+            if ( !p40[ 0 ].toNewHampshireX().equals( p40_S ) ) {
+                return false;
+            }
+            final String p41_S = "(A,B,C,D,E,F,G,H,I,J,K)";
+            final Phylogeny[] p41 = factory.create( p41_S, new NHXParser() );
+            if ( !p41[ 0 ].toNewHampshireX().equals( p41_S ) ) {
+                return false;
+            }
+            final String p42_S = "(A,B,(X,Y,Z),D,E,F,G,H,I,J,K)";
+            final Phylogeny[] p42 = factory.create( p42_S, new NHXParser() );
+            if ( !p42[ 0 ].toNewHampshireX().equals( p42_S ) ) {
+                return false;
+            }
+            final String p43_S = "(A,B,C,(AA,BB,CC,(CCC,DDD,EEE,(FFFF,GGGG)x)y,DD,EE,FF,GG,HH),D,E,(EE,FF),F,G,H,(((((5)4)3)2)1),I,J,K,L,M,N,O,P,Q,R,S,T,U,V,W,X,(XX,(YY)),Y,Z)";
+            final Phylogeny[] p43 = factory.create( p43_S, new NHXParser() );
+            if ( !p43[ 0 ].toNewHampshireX().equals( p43_S ) ) {
+                return false;
+            }
+            final String p44_S = "(((A,B,C,D),(A,B,C,D),(A,B,C,D),(A,B,C,D)),((A,B,C,D),(A,B,C,D),(A,B,C,D),(A,B,C,D)),((A,B,C,D),(A,B,C,D),(A,B,C,D),(A,B,C,D)),((A,B,C,D),(A,B,C,D),(A,B,C,D),(A,B,C,D)))";
+            final Phylogeny[] p44 = factory.create( p44_S, new NHXParser() );
+            if ( !p44[ 0 ].toNewHampshireX().equals( p44_S ) ) {
+                return false;
+            }
+            final String p45_S = "((((((((((A))))))))),(((((((((B))))))))),(((((((((C))))))))))";
+            final Phylogeny[] p45 = factory.create( p45_S, new NHXParser() );
+            if ( !p45[ 0 ].toNewHampshireX().equals( p45_S ) ) {
+                return false;
+            }
+            final String p46_S = "";
+            final Phylogeny[] p46 = factory.create( p46_S, new NHXParser() );
+            if ( ( p46.length != 1 ) || !p46[ 0 ].isEmpty() ) {
+                return false;
+            }
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace( System.out );
+            return false;
+        }
+        return true;
+    }
+
+    private static boolean testNHXconversion() {
+        try {
+            final PhylogenyNode n1 = new PhylogenyNode();
+            final PhylogenyNode n2 = new PhylogenyNode( "" );
+            final PhylogenyNode n3 = new PhylogenyNode( "n3" );
+            final PhylogenyNode n4 = new PhylogenyNode( "n4:0.01" );
+            final PhylogenyNode n5 = new PhylogenyNode( "n5:0.1[&&NHX:S=Ecoli:E=1.1.1.1:D=Y:Co=Y:B=56:T=1:W=2:C=10.20.30:XN=S=tag1=value1=unit1]" );
+            final PhylogenyNode n6 = new PhylogenyNode( "n6:0.000001[&&NHX:S=Ecoli:E=1.1.1.1:D=N:Co=N:B=100:T=1:W=2:C=0.0.0:XN=B=bool_tag=T]" );
+            if ( !n1.toNewHampshireX().equals( "" ) ) {
+                return false;
+            }
+            if ( !n2.toNewHampshireX().equals( "" ) ) {
+                return false;
+            }
+            if ( !n3.toNewHampshireX().equals( "n3" ) ) {
+                return false;
+            }
+            if ( !n4.toNewHampshireX().equals( "n4:0.01" ) ) {
+                return false;
+            }
+            if ( !n5.toNewHampshireX()
+                    .equals( "n5:0.1[&&NHX:T=1:S=Ecoli:D=Y:XN=S=tag1=value1=unit1:B=56.0:W=2.0:C=10.20.30]" ) ) {
+                return false;
+            }
+            if ( !n6.toNewHampshireX()
+                    .equals( "n6:1.0E-6[&&NHX:T=1:S=Ecoli:D=N:XN=B=bool_tag=T:B=100.0:W=2.0:C=0.0.0]" ) ) {
+                return false;
+            }
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace( System.out );
+            return false;
+        }
+        return true;
+    }
+
+    private static boolean testNHXNodeParsing() {
+        try {
+            final PhylogenyNode n1 = new PhylogenyNode();
+            final PhylogenyNode n2 = new PhylogenyNode( "" );
+            final PhylogenyNode n3 = new PhylogenyNode( "n3" );
+            final PhylogenyNode n4 = new PhylogenyNode( "n4:0.01" );
+            final PhylogenyNode n5 = new PhylogenyNode( "n5:0.1[&&NHX:S=Ecoli:E=1.1.1.1:D=Y:B=56:T=1:On=22:SOn=33:SNn=44:W=2:C=10.20.30:XN=S=tag1=value1=unit1:XN=S=tag3=value3=unit3]" );
+            if ( !n3.getName().equals( "n3" ) ) {
+                return false;
+            }
+            if ( n3.getDistanceToParent() != PhylogenyNode.DISTANCE_DEFAULT ) {
+                return false;
+            }
+            if ( n3.isDuplication() ) {
+                return false;
+            }
+            if ( n3.isHasAssignedEvent() ) {
+                return false;
+            }
+            if ( PhylogenyMethods.getBranchWidthValue( n3 ) != BranchWidth.BRANCH_WIDTH_DEFAULT_VALUE ) {
+                return false;
+            }
+            if ( !n4.getName().equals( "n4" ) ) {
+                return false;
+            }
+            if ( n4.getDistanceToParent() != 0.01 ) {
+                return false;
+            }
+            if ( !n5.getName().equals( "n5" ) ) {
+                return false;
+            }
+            if ( PhylogenyMethods.getConfidenceValue( n5 ) != 56 ) {
+                return false;
+            }
+            if ( n5.getDistanceToParent() != 0.1 ) {
+                return false;
+            }
+            if ( !PhylogenyMethods.getSpecies( n5 ).equals( "Ecoli" ) ) {
+                return false;
+            }
+            if ( !n5.isDuplication() ) {
+                return false;
+            }
+            if ( !n5.isHasAssignedEvent() ) {
+                return false;
+            }
+            if ( PhylogenyMethods.getBranchWidthValue( n5 ) != 2 ) {
+                return false;
+            }
+            if ( n5.getNodeData().getProperties().getPropertyRefs().length != 2 ) {
+                return false;
+            }
+            final PhylogenyNode n8 = new PhylogenyNode( "n8_ECOLI/12:0.01",
+                                                        ForesterUtil.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY );
+            if ( !n8.getName().equals( "n8_ECOLI/12" ) ) {
+                return false;
+            }
+            if ( !PhylogenyMethods.getSpecies( n8 ).equals( "ECOLI" ) ) {
+                return false;
+            }
+            final PhylogenyNode n9 = new PhylogenyNode( "n9_ECOLI/12=12:0.01",
+                                                        ForesterUtil.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY );
+            if ( !n9.getName().equals( "n9_ECOLI/12=12" ) ) {
+                return false;
+            }
+            if ( !PhylogenyMethods.getSpecies( n9 ).equals( "ECOLI" ) ) {
+                return false;
+            }
+            final PhylogenyNode n10 = new PhylogenyNode( "n10.ECOLI", ForesterUtil.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY );
+            if ( !n10.getName().equals( "n10.ECOLI" ) ) {
+                return false;
+            }
+            final PhylogenyNode n20 = new PhylogenyNode( "n20_ECOLI/1-2",
+                                                         ForesterUtil.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY );
+            if ( !n20.getName().equals( "n20_ECOLI/1-2" ) ) {
+                return false;
+            }
+            if ( !PhylogenyMethods.getSpecies( n20 ).equals( "ECOLI" ) ) {
+                return false;
+            }
+            final PhylogenyNode n20x = new PhylogenyNode( "n20_ECOL1/1-2", ForesterUtil.TAXONOMY_EXTRACTION.YES );
+            if ( !n20x.getName().equals( "n20_ECOL1/1-2" ) ) {
+                return false;
+            }
+            if ( !PhylogenyMethods.getSpecies( n20x ).equals( "ECOL1" ) ) {
+                return false;
+            }
+            final PhylogenyNode n20xx = new PhylogenyNode( "n20_eCOL1/1-2",
+                                                           ForesterUtil.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY );
+            if ( !n20xx.getName().equals( "n20_eCOL1/1-2" ) ) {
+                return false;
+            }
+            if ( PhylogenyMethods.getSpecies( n20xx ).length() > 0 ) {
+                return false;
+            }
+            final PhylogenyNode n20xxx = new PhylogenyNode( "n20_ecoli/1-2",
+                                                            ForesterUtil.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY );
+            if ( !n20xxx.getName().equals( "n20_ecoli/1-2" ) ) {
+                return false;
+            }
+            if ( PhylogenyMethods.getSpecies( n20xxx ).length() > 0 ) {
+                return false;
+            }
+            final PhylogenyNode n20xxxx = new PhylogenyNode( "n20_Ecoli/1-2",
+                                                             ForesterUtil.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY );
+            if ( !n20xxxx.getName().equals( "n20_Ecoli/1-2" ) ) {
+                return false;
+            }
+            if ( PhylogenyMethods.getSpecies( n20xxxx ).length() > 0 ) {
+                return false;
+            }
+            final PhylogenyNode n21 = new PhylogenyNode( "n21_PIG", ForesterUtil.TAXONOMY_EXTRACTION.YES );
+            if ( !n21.getName().equals( "n21_PIG" ) ) {
+                return false;
+            }
+            if ( !PhylogenyMethods.getSpecies( n21 ).equals( "PIG" ) ) {
+                return false;
+            }
+            final PhylogenyNode n21x = new PhylogenyNode( "n21_PIG", ForesterUtil.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY );
+            if ( !n21x.getName().equals( "n21_PIG" ) ) {
+                return false;
+            }
+            if ( PhylogenyMethods.getSpecies( n21x ).length() > 0 ) {
+                return false;
+            }
+            final PhylogenyNode n22 = new PhylogenyNode( "n22/PIG", ForesterUtil.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY );
+            if ( !n22.getName().equals( "n22/PIG" ) ) {
+                return false;
+            }
+            if ( PhylogenyMethods.getSpecies( n22 ).length() > 0 ) {
+                return false;
+            }
+            final PhylogenyNode n23 = new PhylogenyNode( "n23/PIG_1", ForesterUtil.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY );
+            if ( !n23.getName().equals( "n23/PIG_1" ) ) {
+                return false;
+            }
+            if ( PhylogenyMethods.getSpecies( n23 ).length() > 0 ) {
+                return false;
+            }
+            if ( NHXParser.LIMIT_SPECIES_NAMES_TO_FIVE_CHARS ) {
+                final PhylogenyNode a = new PhylogenyNode( "n10_ECOLI/1-2",
+                                                           ForesterUtil.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY );
+                if ( !a.getName().equals( "n10_ECOLI/1-2" ) ) {
+                    return false;
+                }
+                if ( !PhylogenyMethods.getSpecies( a ).equals( "ECOLI" ) ) {
+                    return false;
+                }
+                final PhylogenyNode b = new PhylogenyNode( "n10_ECOLI1/1-2",
+                                                           ForesterUtil.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY );
+                if ( !b.getName().equals( "n10_ECOLI1/1-2" ) ) {
+                    return false;
+                }
+                if ( !PhylogenyMethods.getSpecies( b ).equals( "ECOLI" ) ) {
+                    return false;
+                }
+                final PhylogenyNode c = new PhylogenyNode( "n10_RATAF12/1000-2000",
+                                                           ForesterUtil.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY );
+                if ( !c.getName().equals( "n10_RATAF12/1000-2000" ) ) {
+                    return false;
+                }
+                if ( !PhylogenyMethods.getSpecies( c ).equals( "RATAF" ) ) {
+                    return false;
+                }
+                final PhylogenyNode d = new PhylogenyNode( "n10_RAT1/1-2",
+                                                           ForesterUtil.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY );
+                if ( !d.getName().equals( "n10_RAT1/1-2" ) ) {
+                    return false;
+                }
+                if ( !PhylogenyMethods.getSpecies( d ).equals( "RAT" ) ) {
+                    return false;
+                }
+                final PhylogenyNode e = new PhylogenyNode( "n10_RAT1", ForesterUtil.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY );
+                if ( !e.getName().equals( "n10_RAT1" ) ) {
+                    return false;
+                }
+                if ( !ForesterUtil.isEmpty( PhylogenyMethods.getSpecies( e ) ) ) {
+                    return false;
+                }
+            }
+            final PhylogenyNode n11 = new PhylogenyNode( "n111111_ECOLI/jdj:0.4",
+                                                         ForesterUtil.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY );
+            if ( !n11.getName().equals( "n111111_ECOLI/jdj" ) ) {
+                return false;
+            }
+            if ( n11.getDistanceToParent() != 0.4 ) {
+                return false;
+            }
+            if ( !PhylogenyMethods.getSpecies( n11 ).equals( "ECOLI" ) ) {
+                return false;
+            }
+            final PhylogenyNode n12 = new PhylogenyNode( "n111111-ECOLI---/jdj:0.4",
+                                                         ForesterUtil.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY );
+            if ( !n12.getName().equals( "n111111-ECOLI---/jdj" ) ) {
+                return false;
+            }
+            if ( n12.getDistanceToParent() != 0.4 ) {
+                return false;
+            }
+            if ( PhylogenyMethods.getSpecies( n12 ).length() > 0 ) {
+                return false;
+            }
+            final Property tvu1 = n5.getNodeData().getProperties().getProperty( "tag1" );
+            final Property tvu3 = n5.getNodeData().getProperties().getProperty( "tag3" );
+            if ( !tvu1.getRef().equals( "tag1" ) ) {
+                return false;
+            }
+            if ( !tvu1.getDataType().equals( "xsd:string" ) ) {
+                return false;
+            }
+            if ( !tvu1.getUnit().equals( "unit1" ) ) {
+                return false;
+            }
+            if ( !tvu1.getValue().equals( "value1" ) ) {
+                return false;
+            }
+            if ( !tvu3.getRef().equals( "tag3" ) ) {
+                return false;
+            }
+            if ( !tvu3.getDataType().equals( "xsd:string" ) ) {
+                return false;
+            }
+            if ( !tvu3.getUnit().equals( "unit3" ) ) {
+                return false;
+            }
+            if ( !tvu3.getValue().equals( "value3" ) ) {
+                return false;
+            }
+            if ( n1.getName().compareTo( "" ) != 0 ) {
+                return false;
+            }
+            if ( PhylogenyMethods.getConfidenceValue( n1 ) != Confidence.CONFIDENCE_DEFAULT_VALUE ) {
+                return false;
+            }
+            if ( n1.getDistanceToParent() != PhylogenyNode.DISTANCE_DEFAULT ) {
+                return false;
+            }
+            if ( n2.getName().compareTo( "" ) != 0 ) {
+                return false;
+            }
+            if ( PhylogenyMethods.getConfidenceValue( n2 ) != Confidence.CONFIDENCE_DEFAULT_VALUE ) {
+                return false;
+            }
+            if ( n2.getDistanceToParent() != PhylogenyNode.DISTANCE_DEFAULT ) {
+                return false;
+            }
+            final PhylogenyNode n00 = new PhylogenyNode( "n7:0.000001[&&NHX:GN=gene_name:AC=accession123:ID=node_identifier:S=Ecoli:D=N:Co=N:B=100:T=1:On=100:SOn=100:SNn=100:W=2:C=0.0.0:XN=U=url_tag=www.yahoo.com]" );
+            if ( !n00.getNodeData().getNodeIdentifier().getValue().equals( "node_identifier" ) ) {
+                return false;
+            }
+            if ( !n00.getNodeData().getSequence().getName().equals( "gene_name" ) ) {
+                return false;
+            }
+            if ( !n00.getNodeData().getSequence().getAccession().getValue().equals( "accession123" ) ) {
+                return false;
+            }
+            if ( !n00.getNodeData().getProperties().getProperty( "url_tag" ).getRef().equals( "url_tag" ) ) {
+                return false;
+            }
+            if ( n00.getNodeData().getProperties().getProperty( "url_tag" ).getAppliesTo() != Property.AppliesTo.NODE ) {
+                return false;
+            }
+            if ( !n00.getNodeData().getProperties().getProperty( "url_tag" ).getDataType().equals( "xsd:anyURI" ) ) {
+                return false;
+            }
+            if ( !n00.getNodeData().getProperties().getProperty( "url_tag" ).getValue().equals( "www.yahoo.com" ) ) {
+                return false;
+            }
+            if ( !n00.getNodeData().getProperties().getProperty( "url_tag" ).getUnit().equals( "" ) ) {
+                return false;
+            }
+            final PhylogenyNode nx = new PhylogenyNode( "n5:0.1[&&NHX:S=Ecoli:GN=gene_1]" );
+            if ( !nx.getNodeData().getSequence().getName().equals( "gene_1" ) ) {
+                return false;
+            }
+            final PhylogenyNode nx2 = new PhylogenyNode( "n5:0.1[&&NHX:S=Ecoli:G=gene_2]" );
+            if ( !nx2.getNodeData().getSequence().getName().equals( "gene_2" ) ) {
+                return false;
+            }
+            final PhylogenyNode n13 = new PhylogenyNode( "blah_12345/1-2",
+                                                         ForesterUtil.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY );
+            if ( !n13.getName().equals( "blah_12345/1-2" ) ) {
+                return false;
+            }
+            if ( !PhylogenyMethods.getSpecies( n13 ).equals( "" ) ) {
+                return false;
+            }
+            final PhylogenyNode n14 = new PhylogenyNode( "blah_12X45/1-2",
+                                                         ForesterUtil.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY );
+            if ( !n14.getName().equals( "blah_12X45/1-2" ) ) {
+                return false;
+            }
+            if ( !PhylogenyMethods.getSpecies( n14 ).equals( "12X45" ) ) {
+                return false;
+            }
+            final PhylogenyNode n15 = new PhylogenyNode( "something_wicked[123]",
+                                                         ForesterUtil.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY );
+            if ( !n15.getName().equals( "something_wicked" ) ) {
+                return false;
+            }
+            if ( n15.getBranchData().getNumberOfConfidences() != 1 ) {
+                return false;
+            }
+            if ( !isEqual( n15.getBranchData().getConfidence( 0 ).getValue(), 123 ) ) {
+                return false;
+            }
+            final PhylogenyNode n16 = new PhylogenyNode( "something_wicked2[9]",
+                                                         ForesterUtil.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY );
+            if ( !n16.getName().equals( "something_wicked2" ) ) {
+                return false;
+            }
+            if ( n16.getBranchData().getNumberOfConfidences() != 1 ) {
+                return false;
+            }
+            if ( !isEqual( n16.getBranchData().getConfidence( 0 ).getValue(), 9 ) ) {
+                return false;
+            }
+            final PhylogenyNode n17 = new PhylogenyNode( "something_wicked3[a]",
+                                                         ForesterUtil.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY );
+            if ( !n17.getName().equals( "something_wicked3" ) ) {
+                return false;
+            }
+            if ( n17.getBranchData().getNumberOfConfidences() != 0 ) {
+                return false;
+            }
+            final PhylogenyNode n18 = new PhylogenyNode( ":0.5[91]", ForesterUtil.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY );
+            if ( !isEqual( n18.getDistanceToParent(), 0.5 ) ) {
+                return false;
+            }
+            if ( n18.getBranchData().getNumberOfConfidences() != 1 ) {
+                return false;
+            }
+            if ( !isEqual( n18.getBranchData().getConfidence( 0 ).getValue(), 91 ) ) {
+                return false;
+            }
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace( System.out );
+            return false;
+        }
+        return true;
+    }
+
+    private static boolean testNHXParsing() {
+        try {
+            final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
+            final Phylogeny p1 = factory.create( "(A     [&&NHX:S=a_species],B1[&&NHX:S=b_species])", new NHXParser() )[ 0 ];
+            if ( !p1.toNewHampshireX().equals( "(A[&&NHX:S=a_species],B1[&&NHX:S=b_species])" ) ) {
+                return false;
+            }
+            final String p2_S = "(((((((A:0.2[&&NHX:S=qwerty]):0.2[&&NHX:S=uiop]):0.3[&&NHX:S=asdf]):0.4[&&NHX:S=zxc]):0.5[&&NHX:S=a]):0.6[&&NHX:S=asd]):0.7[&&NHX:S=za]):0.8[&&NHX:S=zaq]";
+            final Phylogeny[] p2 = factory.create( p2_S, new NHXParser() );
+            if ( !p2[ 0 ].toNewHampshireX().equals( p2_S ) ) {
+                return false;
+            }
+            final String p2b_S = "(((((((A:0.2[&NHX:S=qwerty]):0.2[&:S=uiop]):0.3[&NHX:S=asdf]):0.4[S=zxc]):0.5[]):0.6[&&NH:S=asd]):0.7[&&HX:S=za]):0.8[&&:S=zaq]";
+            final Phylogeny[] p2b = factory.create( p2b_S, new NHXParser() );
+            if ( !p2b[ 0 ].toNewHampshireX().equals( "(((((((A:0.2):0.2):0.3):0.4):0.5):0.6):0.7):0.8" ) ) {
+                return false;
+            }
+            final Phylogeny[] p3 = factory
+                    .create( "[  comment&&NHX,())))](((((((A:0.2[&&NHX:S=qwerty]):0.2[&&NHX:S=uiop]):0.3[&&NHX:S=asdf]):0.4[&&NHX:S=zxc]):0.5[&&NHX:S=a]):0.6[&&NHX:S=asd]):0.7[&&NHX:S=za]):0.8[&&NHX:S=zaq]",
+                             new NHXParser() );
+            if ( !p3[ 0 ].toNewHampshireX().equals( p2_S ) ) {
+                return false;
+            }
+            final Phylogeny[] p4 = factory
+                    .create( "(((((((A:0.2[&&NHX:S=qwerty]):0.2[&&NHX:S=uiop]):0.3[&&NHX:S=asdf]):0.4[&&NHX:S=zxc]):0.5[&&NHX:S=a]):0.6[&&NHX:S=asd]):0.7[&&NHX:S=za]):0.8[&&NHX:S=zaq][comment(]",
+                             new NHXParser() );
+            if ( !p4[ 0 ].toNewHampshireX().equals( p2_S ) ) {
+                return false;
+            }
+            final Phylogeny[] p5 = factory
+                    .create( "[]  (  [][ ][   ]  ([((( &&NHXcomment only![[[[[[]([]((((A:0.2[&&NHX:S=q[comment )))]werty][,,,,))]):0.2[&&NHX:S=uiop]):0.3[&&NHX:S=a[comment,,))]sdf])[comment(((]:0.4[&&NHX:S=zxc][comment(((][comment(((]):0.5[&&NHX:S=a]):0.6[&&NHX:S=a[comment(((]sd]):0.7[&&NHX:S=za]):0.8[&&NHX:S=zaq][comment(((]",
+                             new NHXParser() );
+            if ( !p5[ 0 ].toNewHampshireX().equals( p2_S ) ) {
+                return false;
+            }
+            final String p6_S_C = "(A[][][][1][22][333][4444][55555][666666][&&NHX:S=Aspecies],B[))],C,(AA,BB,CC,(CCC,DDD,EEE,[comment](FFFF,GGGG)x)y,D[comment]D,EE,FF,GG,HH),D,E,(EE,FF),F,G,H,(((((5)4)3)2)1),I,J,K,L,M,N,O,P,Q,R,S,T,U,V,W,X,(XX,(YY)),Y,Z)";
+            final String p6_S_WO_C = "(A[&&NHX:S=Aspecies],B,C,(AA,BB,CC,(CCC,DDD,EEE,(FFFF,GGGG)x)y,DD,EE,FF,GG,HH),D,E,(EE,FF),F,G,H,(((((5)4)3)2)1),I,J,K,L,M,N,O,P,Q,R,S,T,U,V,W,X,(XX,(YY)),Y,Z)";
+            final Phylogeny[] p6 = factory.create( p6_S_C, new NHXParser() );
+            if ( !p6[ 0 ].toNewHampshireX().equals( p6_S_WO_C ) ) {
+                return false;
+            }
+            final String p7_S_C = "(((A [&&NHX:S=species_a], B [&&NHX:S=Vstorri] , C   , D),(A,B,C,D[comment])[],[c][]([xxx]A[comment],[comment]B[comment][comment],[comment][comment]C[comment][comment],[comment][comment]D[comment][comment])[comment][comment],[comment]   [comment](A,B,C,D)),((A,B,C,D),(A,B,C,D),(A,B,C,D),(A,B,C,D)),((A,B,C[comment][comment][comment][comment][comment]    [comment],D),(A,B,C,D),(A,B,C,D),(A,B,C,D)),[comment][comment]((A,B,C,D),(A,B,C,D),(A,B,C,D),(A,B,C,D)))";
+            final String p7_S_WO_C = "(((A[&&NHX:S=species_a],B[&&NHX:S=Vstorri],C,D),(A,B,C,D),(A,B,C,D),(A,B,C,D)),((A,B,C,D),(A,B,C,D),(A,B,C,D),(A,B,C,D)),((A,B,C,D),(A,B,C,D),(A,B,C,D),(A,B,C,D)),((A,B,C,D),(A,B,C,D),(A,B,C,D),(A,B,C,D)))";
+            final Phylogeny[] p7 = factory.create( p7_S_C, new NHXParser() );
+            if ( !p7[ 0 ].toNewHampshireX().equals( p7_S_WO_C ) ) {
+                return false;
+            }
+            final String p8_S_C = "[cmt](((([]([))))))](((((A[&&NHX:S= [a comment] a])))))))[too many comments!:)])),(((((((((B[&&NHX[ a comment in a bad place]:S   =b])))))[] []   )))),(((((((((C[&&NHX:S=c])   ))[,,, ])))))))";
+            final String p8_S_WO_C = "((((((((((A[&&NHX:S=a]))))))))),(((((((((B[&&NHX:S=b]))))))))),(((((((((C[&&NHX:S=c]))))))))))";
+            final Phylogeny[] p8 = factory.create( p8_S_C, new NHXParser() );
+            if ( !p8[ 0 ].toNewHampshireX().equals( p8_S_WO_C ) ) {
+                return false;
+            }
+            final Phylogeny p9 = factory.create( "((A:0.2,B:0.3):0.5[91],C:0.1)root:0.1[100]", new NHXParser() )[ 0 ];
+            if ( !p9.toNewHampshireX().equals( "((A:0.2,B:0.3):0.5[&&NHX:B=91.0],C:0.1)root:0.1[&&NHX:B=100.0]" ) ) {
+                return false;
+            }
+            final Phylogeny p10 = factory
+                    .create( " [79]   ( (A [co mment] :0 .2[comment],B:0.3[com])[com ment]: 0. 5 \t[ 9 1 ][ comment],C: 0.1)[comment]root:0.1[100] [comment]",
+                             new NHXParser() )[ 0 ];
+            if ( !p10.toNewHampshireX().equals( "((A:0.2,B:0.3):0.5[&&NHX:B=91.0],C:0.1)root:0.1[&&NHX:B=100.0]" ) ) {
+                return false;
+            }
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace( System.out );
+            return false;
+        }
+        return true;
+    }
+
+    private static boolean testNHXParsingQuotes() {
+        try {
+            final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
+            final NHXParser p = new NHXParser();
+            final Phylogeny[] phylogenies_0 = factory.create( new File( Test.PATH_TO_TEST_DATA + "quotes.nhx" ), p );
+            if ( phylogenies_0.length != 5 ) {
+                return false;
+            }
+            final Phylogeny phy = phylogenies_0[ 4 ];
+            if ( phy.getNumberOfExternalNodes() != 7 ) {
+                return false;
+            }
+            if ( phy.getNodes( "a name in double quotes from tree ((a,b),c)" ).size() != 1 ) {
+                return false;
+            }
+            if ( phy.getNodes( "charles darwin 'origin of species'" ).size() != 1 ) {
+                return false;
+            }
+            if ( !phy.getNodes( "charles darwin 'origin of species'" ).get( 0 ).getNodeData().getTaxonomy()
+                    .getScientificName().equals( "hsapiens" ) ) {
+                return false;
+            }
+            if ( phy.getNodes( "shouldbetogether single quotes" ).size() != 1 ) {
+                return false;
+            }
+            if ( phy.getNodes( "'single quotes' inside double quotes" ).size() != 1 ) {
+                return false;
+            }
+            if ( phy.getNodes( "double quotes inside single quotes" ).size() != 1 ) {
+                return false;
+            }
+            if ( phy.getNodes( "noquotes" ).size() != 1 ) {
+                return false;
+            }
+            if ( phy.getNodes( "A   (  B    C '" ).size() != 1 ) {
+                return false;
+            }
+            final NHXParser p1p = new NHXParser();
+            p1p.setIgnoreQuotes( true );
+            final Phylogeny p1 = factory.create( "(\"A\",'B1')", p1p )[ 0 ];
+            if ( !p1.toNewHampshire().equals( "(A,B1);" ) ) {
+                return false;
+            }
+            final NHXParser p2p = new NHXParser();
+            p1p.setIgnoreQuotes( false );
+            final Phylogeny p2 = factory.create( "(\"A\",'B1')", p2p )[ 0 ];
+            if ( !p2.toNewHampshire().equals( "(A,B1);" ) ) {
+                return false;
+            }
+            final NHXParser p3p = new NHXParser();
+            p3p.setIgnoreQuotes( false );
+            final Phylogeny p3 = factory.create( "(\"A)\",'B1')", p3p )[ 0 ];
+            if ( !p3.toNewHampshire().equals( "('A)',B1);" ) ) {
+                return false;
+            }
+            final NHXParser p4p = new NHXParser();
+            p4p.setIgnoreQuotes( false );
+            final Phylogeny p4 = factory.create( "(\"A)\",'B(),; x')", p4p )[ 0 ];
+            if ( !p4.toNewHampshire().equals( "('A)','B(),; x');" ) ) {
+                return false;
+            }
+            final Phylogeny p10 = factory
+                    .create( " [79]   ( (\"A \n\tB \" [co mment] :0 .2[comment],'B':0.3[com])[com ment]: 0. 5 \t[ 9 1 ][ comment],'C (or D?\\//;,))': 0.1)[comment]'\nroot is here (cool,  was! ) ':0.1[100] [comment]",
+                             new NHXParser() )[ 0 ];
+            final String p10_clean_str = "(('A B':0.2,B:0.3):0.5[&&NHX:B=91.0],'C (or D?\\//;,))':0.1)'root is here (cool,  was! )':0.1[&&NHX:B=100.0]";
+            if ( !p10.toNewHampshireX().equals( p10_clean_str ) ) {
+                return false;
+            }
+            final Phylogeny p11 = factory.create( p10.toNewHampshireX(), new NHXParser() )[ 0 ];
+            if ( !p11.toNewHampshireX().equals( p10_clean_str ) ) {
+                return false;
+            }
+            //
+            final Phylogeny p12 = factory
+                    .create( " [79]   ( (\"A \n\tB \" [[][] :0 .2[comment][\t&\t&\n N\tH\tX:S=mo\tnkey !],'\tB\t\b\t\n\f\rB B ':0.0\b3[])\t[com ment]: 0. 5 \t[ 9 1 ][ \ncomment],'C\t (or D?\\//;,))': 0.\b1)[comment]'\nroot \tis here (cool, \b\t\n\f\r was! ) ':0.1[100] [comment]",
+                             new NHXParser() )[ 0 ];
+            final String p12_clean_str = "(('A B':0.2[&&NHX:S=monkey!],'BB B':0.03):0.5[&&NHX:B=91.0],'C (or D?\\//;,))':0.1)'root is here (cool,  was! )':0.1[&&NHX:B=100.0]";
+            if ( !p12.toNewHampshireX().equals( p12_clean_str ) ) {
+                return false;
+            }
+            final Phylogeny p13 = factory.create( p12.toNewHampshireX(), new NHXParser() )[ 0 ];
+            if ( !p13.toNewHampshireX().equals( p12_clean_str ) ) {
+                return false;
+            }
+            final String p12_clean_str_nh = "(('A B':0.2,'BB B':0.03):0.5,'C (or D?\\//;,))':0.1)'root is here (cool,  was! )':0.1;";
+            if ( !p13.toNewHampshire().equals( p12_clean_str_nh ) ) {
+                return false;
+            }
+            final Phylogeny p14 = factory.create( p13.toNewHampshire(), new NHXParser() )[ 0 ];
+            if ( !p14.toNewHampshire().equals( p12_clean_str_nh ) ) {
+                return false;
+            }
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace( System.out );
+            return false;
+        }
+        return true;
+    }
+
+    private static boolean testPhylogenyBranch() {
+        try {
+            final PhylogenyNode a1 = new PhylogenyNode( "a" );
+            final PhylogenyNode b1 = new PhylogenyNode( "b" );
+            final PhylogenyBranch a1b1 = new PhylogenyBranch( a1, b1 );
+            final PhylogenyBranch b1a1 = new PhylogenyBranch( b1, a1 );
+            if ( !a1b1.equals( a1b1 ) ) {
+                return false;
+            }
+            if ( !a1b1.equals( b1a1 ) ) {
+                return false;
+            }
+            if ( !b1a1.equals( a1b1 ) ) {
+                return false;
+            }
+            final PhylogenyBranch a1_b1 = new PhylogenyBranch( a1, b1, true );
+            final PhylogenyBranch b1_a1 = new PhylogenyBranch( b1, a1, true );
+            final PhylogenyBranch a1_b1_ = new PhylogenyBranch( a1, b1, false );
+            if ( a1_b1.equals( b1_a1 ) ) {
+                return false;
+            }
+            if ( a1_b1.equals( a1_b1_ ) ) {
+                return false;
+            }
+            final PhylogenyBranch b1_a1_ = new PhylogenyBranch( b1, a1, false );
+            if ( !a1_b1.equals( b1_a1_ ) ) {
+                return false;
+            }
+            if ( a1_b1_.equals( b1_a1_ ) ) {
+                return false;
+            }
+            if ( !a1_b1_.equals( b1_a1 ) ) {
+                return false;
+            }
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace( System.out );
+            return false;
+        }
+        return true;
+    }
+
+    private static boolean testPhyloXMLparsingOfDistributionElement() {
+        try {
+            final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
+            PhyloXmlParser xml_parser = null;
+            try {
+                xml_parser = PhyloXmlParser.createPhyloXmlParserXsdValidating();
+            }
+            catch ( final Exception e ) {
+                // Do nothing -- means were not running from jar.
+            }
+            if ( xml_parser == null ) {
+                xml_parser = new PhyloXmlParser();
+                if ( USE_LOCAL_PHYLOXML_SCHEMA ) {
+                    xml_parser.setValidateAgainstSchema( PHYLOXML_LOCAL_XSD );
+                }
+                else {
+                    xml_parser.setValidateAgainstSchema( PHYLOXML_REMOTE_XSD );
+                }
+            }
+            final Phylogeny[] phylogenies_0 = factory.create( Test.PATH_TO_TEST_DATA + "phyloxml_distribution.xml",
+                                                              xml_parser );
+            if ( xml_parser.getErrorCount() > 0 ) {
+                System.out.println( xml_parser.getErrorMessages().toString() );
+                return false;
+            }
+            if ( phylogenies_0.length != 1 ) {
+                return false;
+            }
+            final Phylogeny t1 = phylogenies_0[ 0 ];
+            PhylogenyNode n = null;
+            Distribution d = null;
+            n = t1.getNode( "root node" );
+            if ( !n.getNodeData().isHasDistribution() ) {
+                return false;
+            }
+            if ( n.getNodeData().getDistributions().size() != 1 ) {
+                return false;
+            }
+            d = n.getNodeData().getDistribution();
+            if ( !d.getDesc().equals( "Hirschweg 38" ) ) {
+                return false;
+            }
+            if ( d.getPoints().size() != 1 ) {
+                return false;
+            }
+            if ( d.getPolygons() != null ) {
+                return false;
+            }
+            if ( !d.getPoints().get( 0 ).getAltitude().toString().equals( "472" ) ) {
+                return false;
+            }
+            if ( !d.getPoints().get( 0 ).getAltiudeUnit().equals( "m" ) ) {
+                return false;
+            }
+            if ( !d.getPoints().get( 0 ).getGeodeticDatum().equals( "WGS84" ) ) {
+                return false;
+            }
+            if ( !d.getPoints().get( 0 ).getLatitude().toString().equals( "47.48148427110029" ) ) {
+                return false;
+            }
+            if ( !d.getPoints().get( 0 ).getLongitude().toString().equals( "8.768951296806335" ) ) {
+                return false;
+            }
+            n = t1.getNode( "node a" );
+            if ( !n.getNodeData().isHasDistribution() ) {
+                return false;
+            }
+            if ( n.getNodeData().getDistributions().size() != 2 ) {
+                return false;
+            }
+            d = n.getNodeData().getDistribution( 1 );
+            if ( !d.getDesc().equals( "San Diego" ) ) {
+                return false;
+            }
+            if ( d.getPoints().size() != 1 ) {
+                return false;
+            }
+            if ( d.getPolygons() != null ) {
+                return false;
+            }
+            if ( !d.getPoints().get( 0 ).getAltitude().toString().equals( "104" ) ) {
+                return false;
+            }
+            if ( !d.getPoints().get( 0 ).getAltiudeUnit().equals( "m" ) ) {
+                return false;
+            }
+            if ( !d.getPoints().get( 0 ).getGeodeticDatum().equals( "WGS84" ) ) {
+                return false;
+            }
+            if ( !d.getPoints().get( 0 ).getLatitude().toString().equals( "32.880933" ) ) {
+                return false;
+            }
+            if ( !d.getPoints().get( 0 ).getLongitude().toString().equals( "-117.217543" ) ) {
+                return false;
+            }
+            n = t1.getNode( "node bb" );
+            if ( !n.getNodeData().isHasDistribution() ) {
+                return false;
+            }
+            if ( n.getNodeData().getDistributions().size() != 1 ) {
+                return false;
+            }
+            d = n.getNodeData().getDistribution( 0 );
+            if ( d.getPoints().size() != 3 ) {
+                return false;
+            }
+            if ( d.getPolygons().size() != 2 ) {
+                return false;
+            }
+            if ( !d.getPoints().get( 0 ).getLatitude().toString().equals( "1" ) ) {
+                return false;
+            }
+            if ( !d.getPoints().get( 0 ).getLongitude().toString().equals( "2" ) ) {
+                return false;
+            }
+            if ( !d.getPoints().get( 1 ).getLatitude().toString().equals( "3" ) ) {
+                return false;
+            }
+            if ( !d.getPoints().get( 1 ).getLongitude().toString().equals( "4" ) ) {
+                return false;
+            }
+            if ( !d.getPoints().get( 2 ).getLatitude().toString().equals( "5" ) ) {
+                return false;
+            }
+            if ( !d.getPoints().get( 2 ).getLongitude().toString().equals( "6" ) ) {
+                return false;
+            }
+            Polygon p = d.getPolygons().get( 0 );
+            if ( p.getPoints().size() != 3 ) {
+                return false;
+            }
+            if ( !p.getPoints().get( 0 ).getLatitude().toString().equals( "0.1" ) ) {
+                return false;
+            }
+            if ( !p.getPoints().get( 0 ).getLongitude().toString().equals( "0.2" ) ) {
+                return false;
+            }
+            if ( !p.getPoints().get( 0 ).getAltitude().toString().equals( "10" ) ) {
+                return false;
+            }
+            if ( !p.getPoints().get( 2 ).getLatitude().toString().equals( "0.5" ) ) {
+                return false;
+            }
+            if ( !p.getPoints().get( 2 ).getLongitude().toString().equals( "0.6" ) ) {
+                return false;
+            }
+            if ( !p.getPoints().get( 2 ).getAltitude().toString().equals( "30" ) ) {
+                return false;
+            }
+            p = d.getPolygons().get( 1 );
+            if ( p.getPoints().size() != 3 ) {
+                return false;
+            }
+            if ( !p.getPoints().get( 0 ).getLatitude().toString().equals( "1.49348902489947473" ) ) {
+                return false;
+            }
+            if ( !p.getPoints().get( 0 ).getLongitude().toString().equals( "2.567489393947847492" ) ) {
+                return false;
+            }
+            if ( !p.getPoints().get( 0 ).getAltitude().toString().equals( "10" ) ) {
+                return false;
+            }
+            // Roundtrip:
+            final StringBuffer t1_sb = new StringBuffer( t1.toPhyloXML( 0 ) );
+            final Phylogeny[] rt = factory.create( t1_sb, xml_parser );
+            if ( rt.length != 1 ) {
+                return false;
+            }
+            final Phylogeny t1_rt = rt[ 0 ];
+            n = t1_rt.getNode( "root node" );
+            if ( !n.getNodeData().isHasDistribution() ) {
+                return false;
+            }
+            if ( n.getNodeData().getDistributions().size() != 1 ) {
+                return false;
+            }
+            d = n.getNodeData().getDistribution();
+            if ( !d.getDesc().equals( "Hirschweg 38" ) ) {
+                return false;
+            }
+            if ( d.getPoints().size() != 1 ) {
+                return false;
+            }
+            if ( d.getPolygons() != null ) {
+                return false;
+            }
+            if ( !d.getPoints().get( 0 ).getAltitude().toString().equals( "472" ) ) {
+                return false;
+            }
+            if ( !d.getPoints().get( 0 ).getAltiudeUnit().equals( "m" ) ) {
+                return false;
+            }
+            if ( !d.getPoints().get( 0 ).getGeodeticDatum().equals( "WGS84" ) ) {
+                return false;
+            }
+            if ( !d.getPoints().get( 0 ).getLatitude().toString().equals( "47.48148427110029" ) ) {
+                return false;
+            }
+            if ( !d.getPoints().get( 0 ).getLongitude().toString().equals( "8.768951296806335" ) ) {
+                return false;
+            }
+            n = t1_rt.getNode( "node a" );
+            if ( !n.getNodeData().isHasDistribution() ) {
+                return false;
+            }
+            if ( n.getNodeData().getDistributions().size() != 2 ) {
+                return false;
+            }
+            d = n.getNodeData().getDistribution( 1 );
+            if ( !d.getDesc().equals( "San Diego" ) ) {
+                return false;
+            }
+            if ( d.getPoints().size() != 1 ) {
+                return false;
+            }
+            if ( d.getPolygons() != null ) {
+                return false;
+            }
+            if ( !d.getPoints().get( 0 ).getAltitude().toString().equals( "104" ) ) {
+                return false;
+            }
+            if ( !d.getPoints().get( 0 ).getAltiudeUnit().equals( "m" ) ) {
+                return false;
+            }
+            if ( !d.getPoints().get( 0 ).getGeodeticDatum().equals( "WGS84" ) ) {
+                return false;
+            }
+            if ( !d.getPoints().get( 0 ).getLatitude().toString().equals( "32.880933" ) ) {
+                return false;
+            }
+            if ( !d.getPoints().get( 0 ).getLongitude().toString().equals( "-117.217543" ) ) {
+                return false;
+            }
+            n = t1_rt.getNode( "node bb" );
+            if ( !n.getNodeData().isHasDistribution() ) {
+                return false;
+            }
+            if ( n.getNodeData().getDistributions().size() != 1 ) {
+                return false;
+            }
+            d = n.getNodeData().getDistribution( 0 );
+            if ( d.getPoints().size() != 3 ) {
+                return false;
+            }
+            if ( d.getPolygons().size() != 2 ) {
+                return false;
+            }
+            if ( !d.getPoints().get( 0 ).getLatitude().toString().equals( "1" ) ) {
+                return false;
+            }
+            if ( !d.getPoints().get( 0 ).getLongitude().toString().equals( "2" ) ) {
+                return false;
+            }
+            if ( !d.getPoints().get( 1 ).getLatitude().toString().equals( "3" ) ) {
+                return false;
+            }
+            if ( !d.getPoints().get( 1 ).getLongitude().toString().equals( "4" ) ) {
+                return false;
+            }
+            if ( !d.getPoints().get( 2 ).getLatitude().toString().equals( "5" ) ) {
+                return false;
+            }
+            if ( !d.getPoints().get( 2 ).getLongitude().toString().equals( "6" ) ) {
+                return false;
+            }
+            p = d.getPolygons().get( 0 );
+            if ( p.getPoints().size() != 3 ) {
+                return false;
+            }
+            if ( !p.getPoints().get( 0 ).getLatitude().toString().equals( "0.1" ) ) {
+                return false;
+            }
+            if ( !p.getPoints().get( 0 ).getLongitude().toString().equals( "0.2" ) ) {
+                return false;
+            }
+            if ( !p.getPoints().get( 0 ).getAltitude().toString().equals( "10" ) ) {
+                return false;
+            }
+            if ( !p.getPoints().get( 2 ).getLatitude().toString().equals( "0.5" ) ) {
+                return false;
+            }
+            if ( !p.getPoints().get( 2 ).getLongitude().toString().equals( "0.6" ) ) {
+                return false;
+            }
+            if ( !p.getPoints().get( 2 ).getAltitude().toString().equals( "30" ) ) {
+                return false;
+            }
+            p = d.getPolygons().get( 1 );
+            if ( p.getPoints().size() != 3 ) {
+                return false;
+            }
+            if ( !p.getPoints().get( 0 ).getLatitude().toString().equals( "1.49348902489947473" ) ) {
+                return false;
+            }
+            if ( !p.getPoints().get( 0 ).getLongitude().toString().equals( "2.567489393947847492" ) ) {
+                return false;
+            }
+            if ( !p.getPoints().get( 0 ).getAltitude().toString().equals( "10" ) ) {
+                return false;
+            }
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace( System.out );
+            return false;
+        }
+        return true;
+    }
+
+    private static boolean testPostOrderIterator() {
+        try {
+            final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
+            final Phylogeny t0 = factory.create( "((A,B)ab,(C,D)cd)r", new NHXParser() )[ 0 ];
+            PhylogenyNodeIterator it0;
+            for( it0 = t0.iteratorPostorder(); it0.hasNext(); ) {
+                it0.next();
+            }
+            for( it0.reset(); it0.hasNext(); ) {
+                it0.next();
+            }
+            final Phylogeny t1 = factory.create( "(((A,B)ab,(C,D)cd)abcd,((E,F)ef,(G,H)gh)efgh)r", new NHXParser() )[ 0 ];
+            final PhylogenyNodeIterator it = t1.iteratorPostorder();
+            if ( !it.next().getName().equals( "A" ) ) {
+                return false;
+            }
+            if ( !it.next().getName().equals( "B" ) ) {
+                return false;
+            }
+            if ( !it.next().getName().equals( "ab" ) ) {
+                return false;
+            }
+            if ( !it.next().getName().equals( "C" ) ) {
+                return false;
+            }
+            if ( !it.next().getName().equals( "D" ) ) {
+                return false;
+            }
+            if ( !it.next().getName().equals( "cd" ) ) {
+                return false;
+            }
+            if ( !it.next().getName().equals( "abcd" ) ) {
+                return false;
+            }
+            if ( !it.next().getName().equals( "E" ) ) {
+                return false;
+            }
+            if ( !it.next().getName().equals( "F" ) ) {
+                return false;
+            }
+            if ( !it.next().getName().equals( "ef" ) ) {
+                return false;
+            }
+            if ( !it.next().getName().equals( "G" ) ) {
+                return false;
+            }
+            if ( !it.next().getName().equals( "H" ) ) {
+                return false;
+            }
+            if ( !it.next().getName().equals( "gh" ) ) {
+                return false;
+            }
+            if ( !it.next().getName().equals( "efgh" ) ) {
+                return false;
+            }
+            if ( !it.next().getName().equals( "r" ) ) {
+                return false;
+            }
+            if ( it.hasNext() ) {
+                return false;
+            }
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace( System.out );
+            return false;
+        }
+        return true;
+    }
+
+    private static boolean testPreOrderIterator() {
+        try {
+            final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
+            final Phylogeny t0 = factory.create( "((A,B)ab,(C,D)cd)r", new NHXParser() )[ 0 ];
+            PhylogenyNodeIterator it0;
+            for( it0 = t0.iteratorPreorder(); it0.hasNext(); ) {
+                it0.next();
+            }
+            for( it0.reset(); it0.hasNext(); ) {
+                it0.next();
+            }
+            PhylogenyNodeIterator it = t0.iteratorPreorder();
+            if ( !it.next().getName().equals( "r" ) ) {
+                return false;
+            }
+            if ( !it.next().getName().equals( "ab" ) ) {
+                return false;
+            }
+            if ( !it.next().getName().equals( "A" ) ) {
+                return false;
+            }
+            if ( !it.next().getName().equals( "B" ) ) {
+                return false;
+            }
+            if ( !it.next().getName().equals( "cd" ) ) {
+                return false;
+            }
+            if ( !it.next().getName().equals( "C" ) ) {
+                return false;
+            }
+            if ( !it.next().getName().equals( "D" ) ) {
+                return false;
+            }
+            if ( it.hasNext() ) {
+                return false;
+            }
+            final Phylogeny t1 = factory.create( "(((A,B)ab,(C,D)cd)abcd,((E,F)ef,(G,H)gh)efgh)r", new NHXParser() )[ 0 ];
+            it = t1.iteratorPreorder();
+            if ( !it.next().getName().equals( "r" ) ) {
+                return false;
+            }
+            if ( !it.next().getName().equals( "abcd" ) ) {
+                return false;
+            }
+            if ( !it.next().getName().equals( "ab" ) ) {
+                return false;
+            }
+            if ( !it.next().getName().equals( "A" ) ) {
+                return false;
+            }
+            if ( !it.next().getName().equals( "B" ) ) {
+                return false;
+            }
+            if ( !it.next().getName().equals( "cd" ) ) {
+                return false;
+            }
+            if ( !it.next().getName().equals( "C" ) ) {
+                return false;
+            }
+            if ( !it.next().getName().equals( "D" ) ) {
+                return false;
+            }
+            if ( !it.next().getName().equals( "efgh" ) ) {
+                return false;
+            }
+            if ( !it.next().getName().equals( "ef" ) ) {
+                return false;
+            }
+            if ( !it.next().getName().equals( "E" ) ) {
+                return false;
+            }
+            if ( !it.next().getName().equals( "F" ) ) {
+                return false;
+            }
+            if ( !it.next().getName().equals( "gh" ) ) {
+                return false;
+            }
+            if ( !it.next().getName().equals( "G" ) ) {
+                return false;
+            }
+            if ( !it.next().getName().equals( "H" ) ) {
+                return false;
+            }
+            if ( it.hasNext() ) {
+                return false;
+            }
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace( System.out );
+            return false;
+        }
+        return true;
+    }
+
+    private static boolean testPropertiesMap() {
+        try {
+            final PropertiesMap pm = new PropertiesMap();
+            final Property p0 = new Property( "dimensions:diameter", "1", "metric:mm", "xsd:decimal", AppliesTo.NODE );
+            final Property p1 = new Property( "dimensions:length", "2", "metric:mm", "xsd:decimal", AppliesTo.NODE );
+            final Property p2 = new Property( "something:else",
+                                              "?",
+                                              "improbable:research",
+                                              "xsd:decimal",
+                                              AppliesTo.NODE );
+            pm.addProperty( p0 );
+            pm.addProperty( p1 );
+            pm.addProperty( p2 );
+            if ( !pm.getProperty( "dimensions:diameter" ).getValue().equals( "1" ) ) {
+                return false;
+            }
+            if ( !pm.getProperty( "dimensions:length" ).getValue().equals( "2" ) ) {
+                return false;
+            }
+            if ( pm.getProperties().size() != 3 ) {
+                return false;
+            }
+            if ( pm.getPropertiesWithGivenReferencePrefix( "dimensions" ).size() != 2 ) {
+                return false;
+            }
+            if ( pm.getPropertiesWithGivenReferencePrefix( "something" ).size() != 1 ) {
+                return false;
+            }
+            if ( pm.getProperties().size() != 3 ) {
+                return false;
+            }
+            pm.removeProperty( "dimensions:diameter" );
+            if ( pm.getProperties().size() != 2 ) {
+                return false;
+            }
+            if ( pm.getPropertiesWithGivenReferencePrefix( "dimensions" ).size() != 1 ) {
+                return false;
+            }
+            if ( pm.getPropertiesWithGivenReferencePrefix( "something" ).size() != 1 ) {
+                return false;
+            }
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace( System.out );
+            return false;
+        }
+        return true;
+    }
+
+    private static boolean testReIdMethods() {
+        try {
+            final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
+            final Phylogeny p = factory.create( "((1,2)A,(((X,Y,Z)a,b)3)B,(4,5,6)C)r", new NHXParser() )[ 0 ];
+            final int count = PhylogenyNode.getNodeCount();
+            p.levelOrderReID();
+            if ( p.getNode( "r" ).getId() != count ) {
+                return false;
+            }
+            if ( p.getNode( "A" ).getId() != count + 1 ) {
+                return false;
+            }
+            if ( p.getNode( "B" ).getId() != count + 1 ) {
+                return false;
+            }
+            if ( p.getNode( "C" ).getId() != count + 1 ) {
+                return false;
+            }
+            if ( p.getNode( "1" ).getId() != count + 2 ) {
+                return false;
+            }
+            if ( p.getNode( "2" ).getId() != count + 2 ) {
+                return false;
+            }
+            if ( p.getNode( "3" ).getId() != count + 2 ) {
+                return false;
+            }
+            if ( p.getNode( "4" ).getId() != count + 2 ) {
+                return false;
+            }
+            if ( p.getNode( "5" ).getId() != count + 2 ) {
+                return false;
+            }
+            if ( p.getNode( "6" ).getId() != count + 2 ) {
+                return false;
+            }
+            if ( p.getNode( "a" ).getId() != count + 3 ) {
+                return false;
+            }
+            if ( p.getNode( "b" ).getId() != count + 3 ) {
+                return false;
+            }
+            if ( p.getNode( "X" ).getId() != count + 4 ) {
+                return false;
+            }
+            if ( p.getNode( "Y" ).getId() != count + 4 ) {
+                return false;
+            }
+            if ( p.getNode( "Z" ).getId() != count + 4 ) {
+                return false;
+            }
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace( System.out );
+            return false;
+        }
+        return true;
+    }
+
+    private static boolean testRerooting() {
+        try {
+            final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
+            final Phylogeny t1 = factory.create( "((A:1,B:2)AB:1[&&NHX:B=55],(C:3,D:5)CD:3[&&NHX:B=10])ABCD:0.5",
+                                                 new NHXParser() )[ 0 ];
+            if ( !t1.isRooted() ) {
+                return false;
+            }
+            t1.reRoot( t1.getNode( "D" ) );
+            t1.reRoot( t1.getNode( "CD" ) );
+            t1.reRoot( t1.getNode( "A" ) );
+            t1.reRoot( t1.getNode( "B" ) );
+            t1.reRoot( t1.getNode( "AB" ) );
+            t1.reRoot( t1.getNode( "D" ) );
+            t1.reRoot( t1.getNode( "C" ) );
+            t1.reRoot( t1.getNode( "CD" ) );
+            t1.reRoot( t1.getNode( "A" ) );
+            t1.reRoot( t1.getNode( "B" ) );
+            t1.reRoot( t1.getNode( "AB" ) );
+            t1.reRoot( t1.getNode( "D" ) );
+            t1.reRoot( t1.getNode( "D" ) );
+            t1.reRoot( t1.getNode( "C" ) );
+            t1.reRoot( t1.getNode( "A" ) );
+            t1.reRoot( t1.getNode( "B" ) );
+            t1.reRoot( t1.getNode( "AB" ) );
+            t1.reRoot( t1.getNode( "C" ) );
+            t1.reRoot( t1.getNode( "D" ) );
+            t1.reRoot( t1.getNode( "CD" ) );
+            t1.reRoot( t1.getNode( "D" ) );
+            t1.reRoot( t1.getNode( "A" ) );
+            t1.reRoot( t1.getNode( "B" ) );
+            t1.reRoot( t1.getNode( "AB" ) );
+            t1.reRoot( t1.getNode( "C" ) );
+            t1.reRoot( t1.getNode( "D" ) );
+            t1.reRoot( t1.getNode( "CD" ) );
+            t1.reRoot( t1.getNode( "D" ) );
+            if ( !isEqual( t1.getNode( "A" ).getDistanceToParent(), 1 ) ) {
+                return false;
+            }
+            if ( !isEqual( t1.getNode( "B" ).getDistanceToParent(), 2 ) ) {
+                return false;
+            }
+            if ( !isEqual( t1.getNode( "C" ).getDistanceToParent(), 3 ) ) {
+                return false;
+            }
+            if ( !isEqual( t1.getNode( "D" ).getDistanceToParent(), 2.5 ) ) {
+                return false;
+            }
+            if ( !isEqual( t1.getNode( "CD" ).getDistanceToParent(), 2.5 ) ) {
+                return false;
+            }
+            if ( !isEqual( t1.getNode( "AB" ).getDistanceToParent(), 4 ) ) {
+                return false;
+            }
+            final Phylogeny t2 = factory.create( "(((A:1,B:2)AB:10[&&NHX:B=55],C)ABC:3[&&NHX:B=33],D:5)ABCD:0.5",
+                                                 new NHXParser() )[ 0 ];
+            t2.reRoot( t2.getNode( "A" ) );
+            t2.reRoot( t2.getNode( "D" ) );
+            t2.reRoot( t2.getNode( "ABC" ) );
+            t2.reRoot( t2.getNode( "A" ) );
+            t2.reRoot( t2.getNode( "B" ) );
+            t2.reRoot( t2.getNode( "D" ) );
+            t2.reRoot( t2.getNode( "C" ) );
+            t2.reRoot( t2.getNode( "ABC" ) );
+            t2.reRoot( t2.getNode( "A" ) );
+            t2.reRoot( t2.getNode( "B" ) );
+            t2.reRoot( t2.getNode( "AB" ) );
+            t2.reRoot( t2.getNode( "AB" ) );
+            t2.reRoot( t2.getNode( "D" ) );
+            t2.reRoot( t2.getNode( "C" ) );
+            t2.reRoot( t2.getNode( "B" ) );
+            t2.reRoot( t2.getNode( "AB" ) );
+            t2.reRoot( t2.getNode( "D" ) );
+            t2.reRoot( t2.getNode( "D" ) );
+            t2.reRoot( t2.getNode( "ABC" ) );
+            t2.reRoot( t2.getNode( "A" ) );
+            t2.reRoot( t2.getNode( "B" ) );
+            t2.reRoot( t2.getNode( "AB" ) );
+            t2.reRoot( t2.getNode( "D" ) );
+            t2.reRoot( t2.getNode( "C" ) );
+            t2.reRoot( t2.getNode( "ABC" ) );
+            t2.reRoot( t2.getNode( "A" ) );
+            t2.reRoot( t2.getNode( "B" ) );
+            t2.reRoot( t2.getNode( "AB" ) );
+            t2.reRoot( t2.getNode( "D" ) );
+            t2.reRoot( t2.getNode( "D" ) );
+            t2.reRoot( t2.getNode( "C" ) );
+            t2.reRoot( t2.getNode( "A" ) );
+            t2.reRoot( t2.getNode( "B" ) );
+            t2.reRoot( t2.getNode( "AB" ) );
+            t2.reRoot( t2.getNode( "C" ) );
+            t2.reRoot( t2.getNode( "D" ) );
+            t2.reRoot( t2.getNode( "ABC" ) );
+            t2.reRoot( t2.getNode( "D" ) );
+            t2.reRoot( t2.getNode( "A" ) );
+            t2.reRoot( t2.getNode( "B" ) );
+            t2.reRoot( t2.getNode( "AB" ) );
+            t2.reRoot( t2.getNode( "C" ) );
+            t2.reRoot( t2.getNode( "D" ) );
+            t2.reRoot( t2.getNode( "ABC" ) );
+            t2.reRoot( t2.getNode( "D" ) );
+            if ( !isEqual( t2.getNode( "AB" ).getBranchData().getConfidence( 0 ).getValue(), 55 ) ) {
+                return false;
+            }
+            if ( !isEqual( t2.getNode( "ABC" ).getBranchData().getConfidence( 0 ).getValue(), 33 ) ) {
+                return false;
+            }
+            t2.reRoot( t2.getNode( "ABC" ) );
+            if ( !isEqual( t2.getNode( "AB" ).getBranchData().getConfidence( 0 ).getValue(), 55 ) ) {
+                return false;
+            }
+            if ( !isEqual( t2.getNode( "ABC" ).getBranchData().getConfidence( 0 ).getValue(), 33 ) ) {
+                return false;
+            }
+            t2.reRoot( t2.getNode( "AB" ) );
+            if ( !isEqual( t2.getNode( "AB" ).getBranchData().getConfidence( 0 ).getValue(), 55 ) ) {
+                return false;
+            }
+            if ( !isEqual( t2.getNode( "ABC" ).getBranchData().getConfidence( 0 ).getValue(), 55 ) ) {
+                return false;
+            }
+            if ( !isEqual( t2.getNode( "D" ).getBranchData().getConfidence( 0 ).getValue(), 33 ) ) {
+                return false;
+            }
+            t2.reRoot( t2.getNode( "AB" ) );
+            if ( !isEqual( t2.getNode( "AB" ).getBranchData().getConfidence( 0 ).getValue(), 55 ) ) {
+                return false;
+            }
+            if ( !isEqual( t2.getNode( "ABC" ).getBranchData().getConfidence( 0 ).getValue(), 55 ) ) {
+                return false;
+            }
+            if ( !isEqual( t2.getNode( "D" ).getBranchData().getConfidence( 0 ).getValue(), 33 ) ) {
+                return false;
+            }
+            t2.reRoot( t2.getNode( "D" ) );
+            if ( !isEqual( t2.getNode( "AB" ).getBranchData().getConfidence( 0 ).getValue(), 55 ) ) {
+                return false;
+            }
+            if ( !isEqual( t2.getNode( "ABC" ).getBranchData().getConfidence( 0 ).getValue(), 33 ) ) {
+                return false;
+            }
+            t2.reRoot( t2.getNode( "ABC" ) );
+            if ( !isEqual( t2.getNode( "AB" ).getBranchData().getConfidence( 0 ).getValue(), 55 ) ) {
+                return false;
+            }
+            if ( !isEqual( t2.getNode( "ABC" ).getBranchData().getConfidence( 0 ).getValue(), 33 ) ) {
+                return false;
+            }
+            final Phylogeny t3 = factory.create( "(A[&&NHX:B=10],B[&&NHX:B=20],C[&&NHX:B=30],D[&&NHX:B=40])",
+                                                 new NHXParser() )[ 0 ];
+            t3.reRoot( t3.getNode( "B" ) );
+            if ( t3.getNode( "B" ).getBranchData().getConfidence( 0 ).getValue() != 20 ) {
+                return false;
+            }
+            if ( t3.getNode( "A" ).getParent().getBranchData().getConfidence( 0 ).getValue() != 20 ) {
+                return false;
+            }
+            if ( t3.getNode( "A" ).getParent().getNumberOfDescendants() != 3 ) {
+                return false;
+            }
+            t3.reRoot( t3.getNode( "B" ) );
+            if ( t3.getNode( "B" ).getBranchData().getConfidence( 0 ).getValue() != 20 ) {
+                return false;
+            }
+            if ( t3.getNode( "A" ).getParent().getBranchData().getConfidence( 0 ).getValue() != 20 ) {
+                return false;
+            }
+            if ( t3.getNode( "A" ).getParent().getNumberOfDescendants() != 3 ) {
+                return false;
+            }
+            t3.reRoot( t3.getRoot() );
+            if ( t3.getNode( "B" ).getBranchData().getConfidence( 0 ).getValue() != 20 ) {
+                return false;
+            }
+            if ( t3.getNode( "A" ).getParent().getBranchData().getConfidence( 0 ).getValue() != 20 ) {
+                return false;
+            }
+            if ( t3.getNode( "A" ).getParent().getNumberOfDescendants() != 3 ) {
+                return false;
+            }
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace( System.out );
+            return false;
+        }
+        return true;
+    }
+
+    private static boolean testSDIse() {
+        try {
+            final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
+            final Phylogeny species1 = factory.create( "[&&NHX:S=yeast]", new NHXParser() )[ 0 ];
+            final Phylogeny gene1 = factory.create( "(A1[&&NHX:S=yeast],A2[&&NHX:S=yeast])", new NHXParser() )[ 0 ];
+            gene1.setRooted( true );
+            species1.setRooted( true );
+            final SDI sdi = new SDIse( gene1, species1 );
+            if ( !gene1.getRoot().isDuplication() ) {
+                return false;
+            }
+            final Phylogeny species2 = factory
+                    .create( "(((([&&NHX:S=A],[&&NHX:S=B]),[&&NHX:S=C]),[&&NHX:S=D]),([&&NHX:S=E],[&&NHX:S=F]))",
+                             new NHXParser() )[ 0 ];
+            final Phylogeny gene2 = factory
+                    .create( "(((([&&NHX:S=A],[&&NHX:S=B])ab,[&&NHX:S=C])abc,[&&NHX:S=D])abcd,([&&NHX:S=E],[&&NHX:S=F])ef)r",
+                             new NHXParser() )[ 0 ];
+            species2.setRooted( true );
+            gene2.setRooted( true );
+            final SDI sdi2 = new SDIse( gene2, species2 );
+            if ( sdi2.getDuplicationsSum() != 0 ) {
+                return false;
+            }
+            if ( !gene2.getNode( "ab" ).isSpeciation() ) {
+                return false;
+            }
+            if ( !gene2.getNode( "ab" ).isHasAssignedEvent() ) {
+                return false;
+            }
+            if ( !gene2.getNode( "abc" ).isSpeciation() ) {
+                return false;
+            }
+            if ( !gene2.getNode( "abc" ).isHasAssignedEvent() ) {
+                return false;
+            }
+            if ( !gene2.getNode( "r" ).isSpeciation() ) {
+                return false;
+            }
+            if ( !gene2.getNode( "r" ).isHasAssignedEvent() ) {
+                return false;
+            }
+            final Phylogeny species3 = factory
+                    .create( "(((([&&NHX:S=A],[&&NHX:S=B]),[&&NHX:S=C]),[&&NHX:S=D]),([&&NHX:S=E],[&&NHX:S=F]))",
+                             new NHXParser() )[ 0 ];
+            final Phylogeny gene3 = factory
+                    .create( "(((([&&NHX:S=A],[&&NHX:S=A])aa,[&&NHX:S=C])abc,[&&NHX:S=D])abcd,([&&NHX:S=E],[&&NHX:S=F])ef)r",
+                             new NHXParser() )[ 0 ];
+            species3.setRooted( true );
+            gene3.setRooted( true );
+            final SDI sdi3 = new SDIse( gene3, species3 );
+            if ( sdi3.getDuplicationsSum() != 1 ) {
+                return false;
+            }
+            if ( !gene3.getNode( "aa" ).isDuplication() ) {
+                return false;
+            }
+            if ( !gene3.getNode( "aa" ).isHasAssignedEvent() ) {
+                return false;
+            }
+            final Phylogeny species4 = factory
+                    .create( "(((([&&NHX:S=A],[&&NHX:S=B]),[&&NHX:S=C]),[&&NHX:S=D]),([&&NHX:S=E],[&&NHX:S=F]))",
+                             new NHXParser() )[ 0 ];
+            final Phylogeny gene4 = factory
+                    .create( "(((([&&NHX:S=A],[&&NHX:S=C])ac,[&&NHX:S=B])abc,[&&NHX:S=D])abcd,([&&NHX:S=E],[&&NHX:S=F])ef)r",
+                             new NHXParser() )[ 0 ];
+            species4.setRooted( true );
+            gene4.setRooted( true );
+            final SDI sdi4 = new SDIse( gene4, species4 );
+            if ( sdi4.getDuplicationsSum() != 1 ) {
+                return false;
+            }
+            if ( !gene4.getNode( "ac" ).isSpeciation() ) {
+                return false;
+            }
+            if ( !gene4.getNode( "abc" ).isDuplication() ) {
+                return false;
+            }
+            if ( gene4.getNode( "abcd" ).isDuplication() ) {
+                return false;
+            }
+            if ( species4.getNumberOfExternalNodes() != 6 ) {
+                return false;
+            }
+            if ( gene4.getNumberOfExternalNodes() != 6 ) {
+                return false;
+            }
+            final Phylogeny species5 = factory
+                    .create( "(((([&&NHX:S=A],[&&NHX:S=B]),[&&NHX:S=C]),[&&NHX:S=D]),([&&NHX:S=E],[&&NHX:S=F]))",
+                             new NHXParser() )[ 0 ];
+            final Phylogeny gene5 = factory
+                    .create( "(((([&&NHX:S=A],[&&NHX:S=D])ad,[&&NHX:S=C])adc,[&&NHX:S=B])abcd,([&&NHX:S=E],[&&NHX:S=F])ef)r",
+                             new NHXParser() )[ 0 ];
+            species5.setRooted( true );
+            gene5.setRooted( true );
+            final SDI sdi5 = new SDIse( gene5, species5 );
+            if ( sdi5.getDuplicationsSum() != 2 ) {
+                return false;
+            }
+            if ( !gene5.getNode( "ad" ).isSpeciation() ) {
+                return false;
+            }
+            if ( !gene5.getNode( "adc" ).isDuplication() ) {
+                return false;
+            }
+            if ( !gene5.getNode( "abcd" ).isDuplication() ) {
+                return false;
+            }
+            if ( species5.getNumberOfExternalNodes() != 6 ) {
+                return false;
+            }
+            if ( gene5.getNumberOfExternalNodes() != 6 ) {
+                return false;
+            }
+            // Trees from Louxin Zhang 1997 "On a Mirkin-Muchnik-Smith
+            // Conjecture for Comparing Molecular Phylogenies"
+            // J. of Comput Bio. Vol. 4, No 2, pp.177-187
+            final Phylogeny species6 = factory
+                    .create( "(((1:[&&NHX:S=1],5:[&&NHX:S=5])1-5,((4:[&&NHX:S=4],6:[&&NHX:S=6])4-6,2:[&&NHX:S=2])4-6-2)1-5-4-6-2,"
+                                     + "((9:[&&NHX:S=9],3:[&&NHX:S=3])9-3,(8:[&&NHX:S=8],7:[&&NHX:S=7])8-7)9-3-8-7)",
+                             new NHXParser() )[ 0 ];
+            final Phylogeny gene6 = factory
+                    .create( "(((1:0.1[&&NHX:S=1],2:0.1[&&NHX:S=2])1-2:0.1,3:0.1[&&NHX:S=3])1-2-3:0.1,"
+                                     + "((4:0.1[&&NHX:S=4],(5:0.1[&&NHX:S=5],6:0.1[&&NHX:S=6])5-6:0.1)4-5-6:0.1,"
+                                     + "(7:0.1[&&NHX:S=7],(8:0.1[&&NHX:S=8],9:0.1[&&NHX:S=9])8-9:0.1)7-8-9:0.1)4-5-6-7-8-9:0.1)r;",
+                             new NHXParser() )[ 0 ];
+            species6.setRooted( true );
+            gene6.setRooted( true );
+            final SDI sdi6 = new SDIse( gene6, species6 );
+            if ( sdi6.getDuplicationsSum() != 3 ) {
+                return false;
+            }
+            if ( !gene6.getNode( "r" ).isDuplication() ) {
+                return false;
+            }
+            if ( !gene6.getNode( "4-5-6" ).isDuplication() ) {
+                return false;
+            }
+            if ( !gene6.getNode( "7-8-9" ).isDuplication() ) {
+                return false;
+            }
+            if ( !gene6.getNode( "1-2" ).isSpeciation() ) {
+                return false;
+            }
+            if ( !gene6.getNode( "1-2-3" ).isSpeciation() ) {
+                return false;
+            }
+            if ( !gene6.getNode( "5-6" ).isSpeciation() ) {
+                return false;
+            }
+            if ( !gene6.getNode( "8-9" ).isSpeciation() ) {
+                return false;
+            }
+            if ( !gene6.getNode( "4-5-6-7-8-9" ).isSpeciation() ) {
+                return false;
+            }
+            sdi6.computeMappingCostL();
+            if ( sdi6.computeMappingCostL() != 17 ) {
+                return false;
+            }
+            if ( species6.getNumberOfExternalNodes() != 9 ) {
+                return false;
+            }
+            if ( gene6.getNumberOfExternalNodes() != 9 ) {
+                return false;
+            }
+            final Phylogeny species7 = Test.createPhylogeny( "(((((((" + "([&&NHX:S=a1],[&&NHX:S=a2]),"
+                    + "([&&NHX:S=b1],[&&NHX:S=b2])" + "),[&&NHX:S=x]),(" + "([&&NHX:S=m1],[&&NHX:S=m2]),"
+                    + "([&&NHX:S=n1],[&&NHX:S=n2])" + ")),(" + "([&&NHX:S=i1],[&&NHX:S=i2]),"
+                    + "([&&NHX:S=j1],[&&NHX:S=j2])" + ")),(" + "([&&NHX:S=e1],[&&NHX:S=e2]),"
+                    + "([&&NHX:S=f1],[&&NHX:S=f2])" + ")),[&&NHX:S=y]),[&&NHX:S=z])" );
+            species7.setRooted( true );
+            final Phylogeny gene7_1 = Test
+                    .createPhylogeny( "((((((((a1[&&NHX:S=a1],a2[&&NHX:S=a2]),b1[&&NHX:S=b1]),x[&&NHX:S=x]),m1[&&NHX:S=m1]),i1[&&NHX:S=i1]),e1[&&NHX:S=e1]),y[&&NHX:S=y]),z[&&NHX:S=z])" );
+            gene7_1.setRooted( true );
+            final SDI sdi7 = new SDIse( gene7_1, species7 );
+            if ( sdi7.getDuplicationsSum() != 0 ) {
+                return false;
+            }
+            if ( !Test.getEvent( gene7_1, "a1", "a2" ).isSpeciation() ) {
+                return false;
+            }
+            if ( !Test.getEvent( gene7_1, "a1", "b1" ).isSpeciation() ) {
+                return false;
+            }
+            if ( !Test.getEvent( gene7_1, "a1", "x" ).isSpeciation() ) {
+                return false;
+            }
+            if ( !Test.getEvent( gene7_1, "a1", "m1" ).isSpeciation() ) {
+                return false;
+            }
+            if ( !Test.getEvent( gene7_1, "a1", "i1" ).isSpeciation() ) {
+                return false;
+            }
+            if ( !Test.getEvent( gene7_1, "a1", "e1" ).isSpeciation() ) {
+                return false;
+            }
+            if ( !Test.getEvent( gene7_1, "a1", "y" ).isSpeciation() ) {
+                return false;
+            }
+            if ( !Test.getEvent( gene7_1, "a1", "z" ).isSpeciation() ) {
+                return false;
+            }
+            final Phylogeny gene7_2 = Test
+                    .createPhylogeny( "(((((((((a1[&&NHX:S=a1],a2[&&NHX:S=a2]),b1[&&NHX:S=b1]),x[&&NHX:S=x]),m1[&&NHX:S=m1]),i1[&&NHX:S=i1]),j2[&&NHX:S=j2]),e1[&&NHX:S=e1]),y[&&NHX:S=y]),z[&&NHX:S=z])" );
+            gene7_2.setRooted( true );
+            final SDI sdi7_2 = new SDIse( gene7_2, species7 );
+            if ( sdi7_2.getDuplicationsSum() != 1 ) {
+                return false;
+            }
+            if ( !Test.getEvent( gene7_2, "a1", "a2" ).isSpeciation() ) {
+                return false;
+            }
+            if ( !Test.getEvent( gene7_2, "a1", "b1" ).isSpeciation() ) {
+                return false;
+            }
+            if ( !Test.getEvent( gene7_2, "a1", "x" ).isSpeciation() ) {
+                return false;
+            }
+            if ( !Test.getEvent( gene7_2, "a1", "m1" ).isSpeciation() ) {
+                return false;
+            }
+            if ( !Test.getEvent( gene7_2, "a1", "i1" ).isSpeciation() ) {
+                return false;
+            }
+            if ( !Test.getEvent( gene7_2, "a1", "j2" ).isDuplication() ) {
+                return false;
+            }
+            if ( !Test.getEvent( gene7_2, "a1", "e1" ).isSpeciation() ) {
+                return false;
+            }
+            if ( !Test.getEvent( gene7_2, "a1", "y" ).isSpeciation() ) {
+                return false;
+            }
+            if ( !Test.getEvent( gene7_2, "a1", "z" ).isSpeciation() ) {
+                return false;
+            }
+        }
+        catch ( final Exception e ) {
+            return false;
+        }
+        return true;
+    }
+
+    private static boolean testSDIunrooted() {
+        try {
+            final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
+            final Phylogeny p0 = factory.create( "((((A,B)ab,(C1,C2)cc)abc,D)abcd,(E,F)ef)abcdef", new NHXParser() )[ 0 ];
+            final List<PhylogenyBranch> l = SDIR.getBranchesInPreorder( p0 );
+            final Iterator<PhylogenyBranch> iter = l.iterator();
+            PhylogenyBranch br = iter.next();
+            if ( !br.getFirstNode().getName().equals( "abcd" ) && !br.getFirstNode().getName().equals( "ef" ) ) {
+                return false;
+            }
+            if ( !br.getSecondNode().getName().equals( "abcd" ) && !br.getSecondNode().getName().equals( "ef" ) ) {
+                return false;
+            }
+            br = iter.next();
+            if ( !br.getFirstNode().getName().equals( "abcd" ) && !br.getFirstNode().getName().equals( "abc" ) ) {
+                return false;
+            }
+            if ( !br.getSecondNode().getName().equals( "abcd" ) && !br.getSecondNode().getName().equals( "abc" ) ) {
+                return false;
+            }
+            br = iter.next();
+            if ( !br.getFirstNode().getName().equals( "abc" ) && !br.getFirstNode().getName().equals( "ab" ) ) {
+                return false;
+            }
+            if ( !br.getSecondNode().getName().equals( "abc" ) && !br.getSecondNode().getName().equals( "ab" ) ) {
+                return false;
+            }
+            br = iter.next();
+            if ( !br.getFirstNode().getName().equals( "ab" ) && !br.getFirstNode().getName().equals( "A" ) ) {
+                return false;
+            }
+            if ( !br.getSecondNode().getName().equals( "ab" ) && !br.getSecondNode().getName().equals( "A" ) ) {
+                return false;
+            }
+            br = iter.next();
+            if ( !br.getFirstNode().getName().equals( "ab" ) && !br.getFirstNode().getName().equals( "B" ) ) {
+                return false;
+            }
+            if ( !br.getSecondNode().getName().equals( "ab" ) && !br.getSecondNode().getName().equals( "B" ) ) {
+                return false;
+            }
+            br = iter.next();
+            if ( !br.getFirstNode().getName().equals( "ab" ) && !br.getFirstNode().getName().equals( "abc" ) ) {
+                return false;
+            }
+            if ( !br.getSecondNode().getName().equals( "ab" ) && !br.getSecondNode().getName().equals( "abc" ) ) {
+                return false;
+            }
+            br = iter.next();
+            if ( !br.getFirstNode().getName().equals( "abc" ) && !br.getFirstNode().getName().equals( "cc" ) ) {
+                return false;
+            }
+            if ( !br.getSecondNode().getName().equals( "abc" ) && !br.getSecondNode().getName().equals( "cc" ) ) {
+                return false;
+            }
+            br = iter.next();
+            if ( !br.getFirstNode().getName().equals( "C1" ) && !br.getFirstNode().getName().equals( "cc" ) ) {
+                return false;
+            }
+            if ( !br.getSecondNode().getName().equals( "C1" ) && !br.getSecondNode().getName().equals( "cc" ) ) {
+                return false;
+            }
+            br = iter.next();
+            if ( !br.getFirstNode().getName().equals( "C2" ) && !br.getFirstNode().getName().equals( "cc" ) ) {
+                return false;
+            }
+            if ( !br.getSecondNode().getName().equals( "C2" ) && !br.getSecondNode().getName().equals( "cc" ) ) {
+                return false;
+            }
+            br = iter.next();
+            if ( !br.getFirstNode().getName().equals( "abc" ) && !br.getFirstNode().getName().equals( "cc" ) ) {
+                return false;
+            }
+            if ( !br.getSecondNode().getName().equals( "abc" ) && !br.getSecondNode().getName().equals( "cc" ) ) {
+                return false;
+            }
+            br = iter.next();
+            if ( !br.getFirstNode().getName().equals( "abc" ) && !br.getFirstNode().getName().equals( "abcd" ) ) {
+                return false;
+            }
+            if ( !br.getSecondNode().getName().equals( "abc" ) && !br.getSecondNode().getName().equals( "abcd" ) ) {
+                return false;
+            }
+            br = iter.next();
+            if ( !br.getFirstNode().getName().equals( "abcd" ) && !br.getFirstNode().getName().equals( "D" ) ) {
+                return false;
+            }
+            if ( !br.getSecondNode().getName().equals( "abcd" ) && !br.getSecondNode().getName().equals( "D" ) ) {
+                return false;
+            }
+            br = iter.next();
+            if ( !br.getFirstNode().getName().equals( "ef" ) && !br.getFirstNode().getName().equals( "abcd" ) ) {
+                return false;
+            }
+            if ( !br.getSecondNode().getName().equals( "ef" ) && !br.getSecondNode().getName().equals( "abcd" ) ) {
+                return false;
+            }
+            br = iter.next();
+            if ( !br.getFirstNode().getName().equals( "ef" ) && !br.getFirstNode().getName().equals( "E" ) ) {
+                return false;
+            }
+            if ( !br.getSecondNode().getName().equals( "ef" ) && !br.getSecondNode().getName().equals( "E" ) ) {
+                return false;
+            }
+            br = iter.next();
+            if ( !br.getFirstNode().getName().equals( "ef" ) && !br.getFirstNode().getName().equals( "F" ) ) {
+                return false;
+            }
+            if ( !br.getSecondNode().getName().equals( "ef" ) && !br.getSecondNode().getName().equals( "F" ) ) {
+                return false;
+            }
+            if ( iter.hasNext() ) {
+                return false;
+            }
+            final Phylogeny p1 = factory.create( "(C,(A,B)ab)abc", new NHXParser() )[ 0 ];
+            final List<PhylogenyBranch> l1 = SDIR.getBranchesInPreorder( p1 );
+            final Iterator<PhylogenyBranch> iter1 = l1.iterator();
+            br = iter1.next();
+            if ( !br.getFirstNode().getName().equals( "ab" ) && !br.getFirstNode().getName().equals( "C" ) ) {
+                return false;
+            }
+            if ( !br.getSecondNode().getName().equals( "ab" ) && !br.getSecondNode().getName().equals( "C" ) ) {
+                return false;
+            }
+            br = iter1.next();
+            if ( !br.getFirstNode().getName().equals( "ab" ) && !br.getFirstNode().getName().equals( "A" ) ) {
+                return false;
+            }
+            if ( !br.getSecondNode().getName().equals( "ab" ) && !br.getSecondNode().getName().equals( "A" ) ) {
+                return false;
+            }
+            br = iter1.next();
+            if ( !br.getFirstNode().getName().equals( "ab" ) && !br.getFirstNode().getName().equals( "B" ) ) {
+                return false;
+            }
+            if ( !br.getSecondNode().getName().equals( "ab" ) && !br.getSecondNode().getName().equals( "B" ) ) {
+                return false;
+            }
+            if ( iter1.hasNext() ) {
+                return false;
+            }
+            final Phylogeny p2 = factory.create( "((A,B)ab,C)abc", new NHXParser() )[ 0 ];
+            final List<PhylogenyBranch> l2 = SDIR.getBranchesInPreorder( p2 );
+            final Iterator<PhylogenyBranch> iter2 = l2.iterator();
+            br = iter2.next();
+            if ( !br.getFirstNode().getName().equals( "ab" ) && !br.getFirstNode().getName().equals( "C" ) ) {
+                return false;
+            }
+            if ( !br.getSecondNode().getName().equals( "ab" ) && !br.getSecondNode().getName().equals( "C" ) ) {
+                return false;
+            }
+            br = iter2.next();
+            if ( !br.getFirstNode().getName().equals( "ab" ) && !br.getFirstNode().getName().equals( "A" ) ) {
+                return false;
+            }
+            if ( !br.getSecondNode().getName().equals( "ab" ) && !br.getSecondNode().getName().equals( "A" ) ) {
+                return false;
+            }
+            br = iter2.next();
+            if ( !br.getFirstNode().getName().equals( "ab" ) && !br.getFirstNode().getName().equals( "B" ) ) {
+                return false;
+            }
+            if ( !br.getSecondNode().getName().equals( "ab" ) && !br.getSecondNode().getName().equals( "B" ) ) {
+                return false;
+            }
+            if ( iter2.hasNext() ) {
+                return false;
+            }
+            final Phylogeny species0 = factory
+                    .create( "(((([&&NHX:S=A],[&&NHX:S=B]),[&&NHX:S=C]),[&&NHX:S=D]),([&&NHX:S=E],[&&NHX:S=F]))",
+                             new NHXParser() )[ 0 ];
+            final Phylogeny gene1 = factory
+                    .create( "(((((A:0.6[&&NHX:S=A],B:0.1[&&NHX:S=B])ab:0.1,C:0.1[&&NHX:S=C])abc:0.3,D:1.0[&&NHX:S=D])abcd:0.2,E:0.1[&&NHX:S=E])abcde:0.2,F:0.2[&&NHX:S=F])",
+                             new NHXParser() )[ 0 ];
+            species0.setRooted( true );
+            gene1.setRooted( true );
+            final SDIR sdi_unrooted = new SDIR();
+            sdi_unrooted.infer( gene1, species0, false, true, true, true, 10 );
+            if ( sdi_unrooted.getCount() != 1 ) {
+                return false;
+            }
+            if ( sdi_unrooted.getMinimalDuplications() != 0 ) {
+                return false;
+            }
+            if ( !Test.isEqual( sdi_unrooted.getMinimalDiffInSubTreeHeights(), 0.4 ) ) {
+                return false;
+            }
+            if ( !Test.isEqual( sdi_unrooted.getMinimalTreeHeight(), 1.0 ) ) {
+                return false;
+            }
+            if ( sdi_unrooted.getMinimalMappingCost() != Integer.MAX_VALUE ) {
+                return false;
+            }
+            final Phylogeny gene2 = factory
+                    .create( "(((((A:2.6[&&NHX:S=A],B:0.1[&&NHX:S=B])ab:0.1,C:0.1[&&NHX:S=C])abc:0.3,D:1.0[&&NHX:S=D])abcd:0.2,E:0.1[&&NHX:S=E])abcde:0.2,F:0.2[&&NHX:S=F])",
+                             new NHXParser() )[ 0 ];
+            gene2.setRooted( true );
+            sdi_unrooted.infer( gene2, species0, false, false, true, true, 10 );
+            if ( sdi_unrooted.getCount() != 1 ) {
+                return false;
+            }
+            if ( sdi_unrooted.getMinimalDuplications() != 3 ) {
+                return false;
+            }
+            if ( !Test.isEqual( sdi_unrooted.getMinimalDiffInSubTreeHeights(), 0.0 ) ) {
+                return false;
+            }
+            if ( !Test.isEqual( sdi_unrooted.getMinimalTreeHeight(), 2.0 ) ) {
+                return false;
+            }
+            if ( sdi_unrooted.getMinimalMappingCost() != Integer.MAX_VALUE ) {
+                return false;
+            }
+            final Phylogeny species6 = factory
+                    .create( "(((1:[&&NHX:S=1],5:[&&NHX:S=5])1-5,((4:[&&NHX:S=4],6:[&&NHX:S=6])4-6,2:[&&NHX:S=2])4-6-2)1-5-4-6-2,"
+                                     + "((9:[&&NHX:S=9],3:[&&NHX:S=3])9-3,(8:[&&NHX:S=8],7:[&&NHX:S=7])8-7)9-3-8-7)",
+                             new NHXParser() )[ 0 ];
+            final Phylogeny gene6 = factory
+                    .create( "((5:0.1[&&NHX:S=5],6:0.1[&&NHX:S=6])5-6:0.05[&&NHX:S=6],(4:0.1[&&NHX:S=4],"
+                                     + "(((1:0.1[&&NHX:S=1],2:0.1[&&NHX:S=2])1-2:0.1[&&NHX:S=2],3:0.25[&&NHX:S=3])1-2-3:0.2[&&NHX:S=2],"
+                                     + "(7:0.1[&&NHX:S=7],(8:0.1[&&NHX:S=8],"
+                                     + "9:0.1[&&NHX:S=9])8-9:0.1[&&NHX:S=9])7-8-9:0.1[&&NHX:S=8])"
+                                     + "4-5-6-7-8-9:0.1[&&NHX:S=5])4-5-6:0.05[&&NHX:S=5])",
+                             new NHXParser() )[ 0 ];
+            species6.setRooted( true );
+            gene6.setRooted( true );
+            Phylogeny[] p6 = sdi_unrooted.infer( gene6, species6, false, true, true, true, 10 );
+            if ( sdi_unrooted.getCount() != 1 ) {
+                return false;
+            }
+            if ( !Test.isEqual( sdi_unrooted.getMinimalDiffInSubTreeHeights(), 0.0 ) ) {
+                return false;
+            }
+            if ( !Test.isEqual( sdi_unrooted.getMinimalTreeHeight(), 0.375 ) ) {
+                return false;
+            }
+            if ( sdi_unrooted.getMinimalDuplications() != 3 ) {
+                return false;
+            }
+            if ( sdi_unrooted.getMinimalMappingCost() != Integer.MAX_VALUE ) {
+                return false;
+            }
+            if ( !p6[ 0 ].getRoot().isDuplication() ) {
+                return false;
+            }
+            if ( !p6[ 0 ].getNode( "4-5-6" ).isDuplication() ) {
+                return false;
+            }
+            if ( !p6[ 0 ].getNode( "7-8-9" ).isDuplication() ) {
+                return false;
+            }
+            if ( p6[ 0 ].getNode( "1-2" ).isDuplication() ) {
+                return false;
+            }
+            if ( p6[ 0 ].getNode( "1-2-3" ).isDuplication() ) {
+                return false;
+            }
+            if ( p6[ 0 ].getNode( "5-6" ).isDuplication() ) {
+                return false;
+            }
+            if ( p6[ 0 ].getNode( "8-9" ).isDuplication() ) {
+                return false;
+            }
+            if ( p6[ 0 ].getNode( "4-5-6-7-8-9" ).isDuplication() ) {
+                return false;
+            }
+            p6 = null;
+            final Phylogeny species7 = factory
+                    .create( "(((1:[&&NHX:S=1],5:[&&NHX:S=5])1-5,((4:[&&NHX:S=4],6:[&&NHX:S=6])4-6,2:[&&NHX:S=2])4-6-2)1-5-4-6-2,"
+                                     + "((9:[&&NHX:S=9],3:[&&NHX:S=3])9-3,(8:[&&NHX:S=8],7:[&&NHX:S=7])8-7)9-3-8-7)",
+                             new NHXParser() )[ 0 ];
+            final Phylogeny gene7 = factory
+                    .create( "((5:0.1[&&NHX:S=5],6:0.1[&&NHX:S=6])5-6:0.05[&&NHX:S=6],(4:0.1[&&NHX:S=4],"
+                                     + "(((1:0.1[&&NHX:S=1],2:0.1[&&NHX:S=2])1-2:0.1[&&NHX:S=2],3:0.25[&&NHX:S=3])1-2-3:0.2[&&NHX:S=2],"
+                                     + "(7:0.1[&&NHX:S=7],(8:0.1[&&NHX:S=8],"
+                                     + "9:0.1[&&NHX:S=9])8-9:0.1[&&NHX:S=9])7-8-9:0.1[&&NHX:S=8])"
+                                     + "4-5-6-7-8-9:0.1[&&NHX:S=5])4-5-6:0.05[&&NHX:S=5])",
+                             new NHXParser() )[ 0 ];
+            species7.setRooted( true );
+            gene7.setRooted( true );
+            Phylogeny[] p7 = sdi_unrooted.infer( gene7, species7, true, true, true, true, 10 );
+            if ( sdi_unrooted.getCount() != 1 ) {
+                return false;
+            }
+            if ( !Test.isEqual( sdi_unrooted.getMinimalDiffInSubTreeHeights(), 0.0 ) ) {
+                return false;
+            }
+            if ( !Test.isEqual( sdi_unrooted.getMinimalTreeHeight(), 0.375 ) ) {
+                return false;
+            }
+            if ( sdi_unrooted.getMinimalDuplications() != 3 ) {
+                return false;
+            }
+            if ( sdi_unrooted.getMinimalMappingCost() != 17 ) {
+                return false;
+            }
+            if ( !p7[ 0 ].getRoot().isDuplication() ) {
+                return false;
+            }
+            if ( !p7[ 0 ].getNode( "4-5-6" ).isDuplication() ) {
+                return false;
+            }
+            if ( !p7[ 0 ].getNode( "7-8-9" ).isDuplication() ) {
+                return false;
+            }
+            if ( p7[ 0 ].getNode( "1-2" ).isDuplication() ) {
+                return false;
+            }
+            if ( p7[ 0 ].getNode( "1-2-3" ).isDuplication() ) {
+                return false;
+            }
+            if ( p7[ 0 ].getNode( "5-6" ).isDuplication() ) {
+                return false;
+            }
+            if ( p7[ 0 ].getNode( "8-9" ).isDuplication() ) {
+                return false;
+            }
+            if ( p7[ 0 ].getNode( "4-5-6-7-8-9" ).isDuplication() ) {
+                return false;
+            }
+            p7 = null;
+            final Phylogeny species8 = factory
+                    .create( "(((1:[&&NHX:S=1],5:[&&NHX:S=5])1-5,((4:[&&NHX:S=4],6:[&&NHX:S=6])4-6,2:[&&NHX:S=2])4-6-2)1-5-4-6-2,"
+                                     + "((9:[&&NHX:S=9],3:[&&NHX:S=3])9-3,(8:[&&NHX:S=8],7:[&&NHX:S=7])8-7)9-3-8-7)",
+                             new NHXParser() )[ 0 ];
+            final Phylogeny gene8 = factory
+                    .create( "((5:0.1[&&NHX:S=5],6:0.1[&&NHX:S=6])5-6:0.05[&&NHX:S=6],(4:0.1[&&NHX:S=4],"
+                                     + "(((1:0.1[&&NHX:S=1],2:0.1[&&NHX:S=2])1-2:0.1[&&NHX:S=2],3:0.25[&&NHX:S=3])1-2-3:0.2[&&NHX:S=2],"
+                                     + "(7:0.1[&&NHX:S=7],(8:0.1[&&NHX:S=8],"
+                                     + "9:0.1[&&NHX:S=9])8-9:0.1[&&NHX:S=9])7-8-9:0.1[&&NHX:S=8])"
+                                     + "4-5-6-7-8-9:0.1[&&NHX:S=5])4-5-6:0.05[&&NHX:S=5])",
+                             new NHXParser() )[ 0 ];
+            species8.setRooted( true );
+            gene8.setRooted( true );
+            Phylogeny[] p8 = sdi_unrooted.infer( gene8, species8, false, false, true, true, 10 );
+            if ( sdi_unrooted.getCount() != 1 ) {
+                return false;
+            }
+            if ( !Test.isEqual( sdi_unrooted.getMinimalDiffInSubTreeHeights(), 0.0 ) ) {
+                return false;
+            }
+            if ( !Test.isEqual( sdi_unrooted.getMinimalTreeHeight(), 0.375 ) ) {
+                return false;
+            }
+            if ( sdi_unrooted.getMinimalDuplications() != 3 ) {
+                return false;
+            }
+            if ( sdi_unrooted.getMinimalMappingCost() != Integer.MAX_VALUE ) {
+                return false;
+            }
+            if ( !p8[ 0 ].getRoot().isDuplication() ) {
+                return false;
+            }
+            if ( !p8[ 0 ].getNode( "4-5-6" ).isDuplication() ) {
+                return false;
+            }
+            if ( !p8[ 0 ].getNode( "7-8-9" ).isDuplication() ) {
+                return false;
+            }
+            if ( p8[ 0 ].getNode( "1-2" ).isDuplication() ) {
+                return false;
+            }
+            if ( p8[ 0 ].getNode( "1-2-3" ).isDuplication() ) {
+                return false;
+            }
+            if ( p8[ 0 ].getNode( "5-6" ).isDuplication() ) {
+                return false;
+            }
+            if ( p8[ 0 ].getNode( "8-9" ).isDuplication() ) {
+                return false;
+            }
+            if ( p8[ 0 ].getNode( "4-5-6-7-8-9" ).isDuplication() ) {
+                return false;
+            }
+            p8 = null;
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace( System.out );
+            return false;
+        }
+        return true;
+    }
+
+    private static boolean testSplit() {
+        try {
+            final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
+            final Phylogeny p0 = factory.create( "(((A,B,C),D),(E,(F,G)))R", new NHXParser() )[ 0 ];
+            //Archaeopteryx.createApplication( p0 );
+            final Set<PhylogenyNode> ex = new HashSet<PhylogenyNode>();
+            ex.add( new PhylogenyNode( "A" ) );
+            ex.add( new PhylogenyNode( "B" ) );
+            ex.add( new PhylogenyNode( "C" ) );
+            ex.add( new PhylogenyNode( "D" ) );
+            ex.add( new PhylogenyNode( "E" ) );
+            ex.add( new PhylogenyNode( "F" ) );
+            ex.add( new PhylogenyNode( "G" ) );
+            ex.add( new PhylogenyNode( "X" ) );
+            ex.add( new PhylogenyNode( "Y" ) );
+            final TreeSplitMatrix s0 = new TreeSplitMatrix( p0, false, ex );
+            // System.out.println( s0.toString() );
+            //
+            Set<PhylogenyNode> query_nodes = new HashSet<PhylogenyNode>();
+            query_nodes.add( new PhylogenyNode( "A" ) );
+            query_nodes.add( new PhylogenyNode( "B" ) );
+            if ( s0.match( query_nodes ) ) {
+                return false;
+            }
+            query_nodes = new HashSet<PhylogenyNode>();
+            query_nodes.add( new PhylogenyNode( "A" ) );
+            query_nodes.add( new PhylogenyNode( "B" ) );
+            query_nodes.add( new PhylogenyNode( "C" ) );
+            query_nodes.add( new PhylogenyNode( "D" ) );
+            query_nodes.add( new PhylogenyNode( "E" ) );
+            query_nodes.add( new PhylogenyNode( "F" ) );
+            query_nodes.add( new PhylogenyNode( "G" ) );
+            if ( !s0.match( query_nodes ) ) {
+                return false;
+            }
+            //
+            query_nodes = new HashSet<PhylogenyNode>();
+            query_nodes.add( new PhylogenyNode( "A" ) );
+            query_nodes.add( new PhylogenyNode( "B" ) );
+            query_nodes.add( new PhylogenyNode( "C" ) );
+            if ( !s0.match( query_nodes ) ) {
+                return false;
+            }
+            //
+            query_nodes = new HashSet<PhylogenyNode>();
+            query_nodes.add( new PhylogenyNode( "D" ) );
+            query_nodes.add( new PhylogenyNode( "E" ) );
+            query_nodes.add( new PhylogenyNode( "F" ) );
+            query_nodes.add( new PhylogenyNode( "G" ) );
+            if ( !s0.match( query_nodes ) ) {
+                return false;
+            }
+            //
+            query_nodes = new HashSet<PhylogenyNode>();
+            query_nodes.add( new PhylogenyNode( "A" ) );
+            query_nodes.add( new PhylogenyNode( "B" ) );
+            query_nodes.add( new PhylogenyNode( "C" ) );
+            query_nodes.add( new PhylogenyNode( "D" ) );
+            if ( !s0.match( query_nodes ) ) {
+                return false;
+            }
+            //
+            query_nodes = new HashSet<PhylogenyNode>();
+            query_nodes.add( new PhylogenyNode( "E" ) );
+            query_nodes.add( new PhylogenyNode( "F" ) );
+            query_nodes.add( new PhylogenyNode( "G" ) );
+            if ( !s0.match( query_nodes ) ) {
+                return false;
+            }
+            //
+            query_nodes = new HashSet<PhylogenyNode>();
+            query_nodes.add( new PhylogenyNode( "F" ) );
+            query_nodes.add( new PhylogenyNode( "G" ) );
+            if ( !s0.match( query_nodes ) ) {
+                return false;
+            }
+            //
+            query_nodes = new HashSet<PhylogenyNode>();
+            query_nodes.add( new PhylogenyNode( "E" ) );
+            query_nodes.add( new PhylogenyNode( "D" ) );
+            query_nodes.add( new PhylogenyNode( "C" ) );
+            query_nodes.add( new PhylogenyNode( "B" ) );
+            query_nodes.add( new PhylogenyNode( "A" ) );
+            if ( !s0.match( query_nodes ) ) {
+                return false;
+            }
+            //
+            query_nodes = new HashSet<PhylogenyNode>();
+            query_nodes.add( new PhylogenyNode( "F" ) );
+            query_nodes.add( new PhylogenyNode( "G" ) );
+            query_nodes.add( new PhylogenyNode( "E" ) );
+            if ( !s0.match( query_nodes ) ) {
+                return false;
+            }
+            //
+            query_nodes = new HashSet<PhylogenyNode>();
+            query_nodes.add( new PhylogenyNode( "F" ) );
+            query_nodes.add( new PhylogenyNode( "G" ) );
+            query_nodes.add( new PhylogenyNode( "E" ) );
+            query_nodes.add( new PhylogenyNode( "D" ) );
+            if ( !s0.match( query_nodes ) ) {
+                return false;
+            }
+            //
+            query_nodes = new HashSet<PhylogenyNode>();
+            query_nodes.add( new PhylogenyNode( "F" ) );
+            query_nodes.add( new PhylogenyNode( "A" ) );
+            if ( s0.match( query_nodes ) ) {
+                return false;
+            }
+            //
+            query_nodes = new HashSet<PhylogenyNode>();
+            query_nodes.add( new PhylogenyNode( "A" ) );
+            query_nodes.add( new PhylogenyNode( "E" ) );
+            query_nodes.add( new PhylogenyNode( "B" ) );
+            query_nodes.add( new PhylogenyNode( "C" ) );
+            if ( s0.match( query_nodes ) ) {
+                return false;
+            }
+            //
+            query_nodes = new HashSet<PhylogenyNode>();
+            query_nodes.add( new PhylogenyNode( "F" ) );
+            query_nodes.add( new PhylogenyNode( "G" ) );
+            query_nodes.add( new PhylogenyNode( "E" ) );
+            query_nodes.add( new PhylogenyNode( "D" ) );
+            query_nodes.add( new PhylogenyNode( "C" ) );
+            if ( s0.match( query_nodes ) ) {
+                return false;
+            }
+            //
+            query_nodes = new HashSet<PhylogenyNode>();
+            query_nodes.add( new PhylogenyNode( "A" ) );
+            query_nodes.add( new PhylogenyNode( "B" ) );
+            query_nodes.add( new PhylogenyNode( "D" ) );
+            if ( s0.match( query_nodes ) ) {
+                return false;
+            }
+            //
+            query_nodes = new HashSet<PhylogenyNode>();
+            query_nodes.add( new PhylogenyNode( "A" ) );
+            query_nodes.add( new PhylogenyNode( "D" ) );
+            if ( s0.match( query_nodes ) ) {
+                return false;
+            }
+            //
+            query_nodes = new HashSet<PhylogenyNode>();
+            query_nodes.add( new PhylogenyNode( "A" ) );
+            query_nodes.add( new PhylogenyNode( "B" ) );
+            if ( s0.match( query_nodes ) ) {
+                return false;
+            }
+            //
+            query_nodes = new HashSet<PhylogenyNode>();
+            query_nodes.add( new PhylogenyNode( "A" ) );
+            query_nodes.add( new PhylogenyNode( "C" ) );
+            if ( s0.match( query_nodes ) ) {
+                return false;
+            }
+            //
+            query_nodes = new HashSet<PhylogenyNode>();
+            query_nodes.add( new PhylogenyNode( "A" ) );
+            query_nodes.add( new PhylogenyNode( "E" ) );
+            if ( s0.match( query_nodes ) ) {
+                return false;
+            }
+            //
+            query_nodes = new HashSet<PhylogenyNode>();
+            query_nodes.add( new PhylogenyNode( "A" ) );
+            query_nodes.add( new PhylogenyNode( "F" ) );
+            if ( s0.match( query_nodes ) ) {
+                return false;
+            }
+            //
+            query_nodes = new HashSet<PhylogenyNode>();
+            query_nodes.add( new PhylogenyNode( "A" ) );
+            query_nodes.add( new PhylogenyNode( "G" ) );
+            if ( s0.match( query_nodes ) ) {
+                return false;
+            }
+            //
+            query_nodes = new HashSet<PhylogenyNode>();
+            query_nodes.add( new PhylogenyNode( "A" ) );
+            query_nodes.add( new PhylogenyNode( "F" ) );
+            query_nodes.add( new PhylogenyNode( "G" ) );
+            if ( s0.match( query_nodes ) ) {
+                return false;
+            }
+            //
+            query_nodes = new HashSet<PhylogenyNode>();
+            query_nodes.add( new PhylogenyNode( "A" ) );
+            query_nodes.add( new PhylogenyNode( "B" ) );
+            query_nodes.add( new PhylogenyNode( "D" ) );
+            if ( s0.match( query_nodes ) ) {
+                return false;
+            }
+            //
+            query_nodes = new HashSet<PhylogenyNode>();
+            query_nodes.add( new PhylogenyNode( "E" ) );
+            query_nodes.add( new PhylogenyNode( "D" ) );
+            query_nodes.add( new PhylogenyNode( "A" ) );
+            if ( s0.match( query_nodes ) ) {
+                return false;
+            }
+            //
+            query_nodes = new HashSet<PhylogenyNode>();
+            query_nodes.add( new PhylogenyNode( "E" ) );
+            query_nodes.add( new PhylogenyNode( "D" ) );
+            query_nodes.add( new PhylogenyNode( "A" ) );
+            query_nodes.add( new PhylogenyNode( "G" ) );
+            if ( s0.match( query_nodes ) ) {
+                return false;
+            }
+            /////////
+            //            query_nodes = new HashSet<PhylogenyNode>();
+            //            query_nodes.add( new PhylogenyNode( "X" ) );
+            //            query_nodes.add( new PhylogenyNode( "Y" ) );
+            //            query_nodes.add( new PhylogenyNode( "A" ) );
+            //            query_nodes.add( new PhylogenyNode( "B" ) );
+            //            query_nodes.add( new PhylogenyNode( "C" ) );
+            //            query_nodes.add( new PhylogenyNode( "D" ) );
+            //            query_nodes.add( new PhylogenyNode( "E" ) );
+            //            query_nodes.add( new PhylogenyNode( "F" ) );
+            //            query_nodes.add( new PhylogenyNode( "G" ) );
+            //            if ( !s0.match( query_nodes ) ) {
+            //                return false;
+            //            }
+            //            query_nodes = new HashSet<PhylogenyNode>();
+            //            query_nodes.add( new PhylogenyNode( "X" ) );
+            //            query_nodes.add( new PhylogenyNode( "Y" ) );
+            //            query_nodes.add( new PhylogenyNode( "A" ) );
+            //            query_nodes.add( new PhylogenyNode( "B" ) );
+            //            query_nodes.add( new PhylogenyNode( "C" ) );
+            //            if ( !s0.match( query_nodes ) ) {
+            //                return false;
+            //            }
+            //            //
+            //            query_nodes = new HashSet<PhylogenyNode>();
+            //            query_nodes.add( new PhylogenyNode( "X" ) );
+            //            query_nodes.add( new PhylogenyNode( "Y" ) );
+            //            query_nodes.add( new PhylogenyNode( "D" ) );
+            //            query_nodes.add( new PhylogenyNode( "E" ) );
+            //            query_nodes.add( new PhylogenyNode( "F" ) );
+            //            query_nodes.add( new PhylogenyNode( "G" ) );
+            //            if ( !s0.match( query_nodes ) ) {
+            //                return false;
+            //            }
+            //            //
+            //            query_nodes = new HashSet<PhylogenyNode>();
+            //            query_nodes.add( new PhylogenyNode( "X" ) );
+            //            query_nodes.add( new PhylogenyNode( "Y" ) );
+            //            query_nodes.add( new PhylogenyNode( "A" ) );
+            //            query_nodes.add( new PhylogenyNode( "B" ) );
+            //            query_nodes.add( new PhylogenyNode( "C" ) );
+            //            query_nodes.add( new PhylogenyNode( "D" ) );
+            //            if ( !s0.match( query_nodes ) ) {
+            //                return false;
+            //            }
+            //            //
+            //            query_nodes = new HashSet<PhylogenyNode>();
+            //            query_nodes.add( new PhylogenyNode( "X" ) );
+            //            query_nodes.add( new PhylogenyNode( "Y" ) );
+            //            query_nodes.add( new PhylogenyNode( "E" ) );
+            //            query_nodes.add( new PhylogenyNode( "F" ) );
+            //            query_nodes.add( new PhylogenyNode( "G" ) );
+            //            if ( !s0.match( query_nodes ) ) {
+            //                return false;
+            //            }
+            //            //
+            //            query_nodes = new HashSet<PhylogenyNode>();
+            //            query_nodes.add( new PhylogenyNode( "X" ) );
+            //            query_nodes.add( new PhylogenyNode( "Y" ) );
+            //            query_nodes.add( new PhylogenyNode( "F" ) );
+            //            query_nodes.add( new PhylogenyNode( "G" ) );
+            //            if ( !s0.match( query_nodes ) ) {
+            //                return false;
+            //            }
+            //
+            query_nodes = new HashSet<PhylogenyNode>();
+            query_nodes.add( new PhylogenyNode( "X" ) );
+            query_nodes.add( new PhylogenyNode( "Y" ) );
+            query_nodes.add( new PhylogenyNode( "E" ) );
+            query_nodes.add( new PhylogenyNode( "G" ) );
+            if ( s0.match( query_nodes ) ) {
+                return false;
+            }
+            //
+            query_nodes = new HashSet<PhylogenyNode>();
+            query_nodes.add( new PhylogenyNode( "X" ) );
+            query_nodes.add( new PhylogenyNode( "Y" ) );
+            query_nodes.add( new PhylogenyNode( "A" ) );
+            query_nodes.add( new PhylogenyNode( "B" ) );
+            if ( s0.match( query_nodes ) ) {
+                return false;
+            }
+            ///////////////////////////
+            //
+            query_nodes = new HashSet<PhylogenyNode>();
+            query_nodes.add( new PhylogenyNode( "X" ) );
+            query_nodes.add( new PhylogenyNode( "Y" ) );
+            query_nodes.add( new PhylogenyNode( "A" ) );
+            query_nodes.add( new PhylogenyNode( "D" ) );
+            if ( s0.match( query_nodes ) ) {
+                return false;
+            }
+            //
+            query_nodes = new HashSet<PhylogenyNode>();
+            query_nodes.add( new PhylogenyNode( "X" ) );
+            query_nodes.add( new PhylogenyNode( "Y" ) );
+            query_nodes.add( new PhylogenyNode( "A" ) );
+            query_nodes.add( new PhylogenyNode( "B" ) );
+            if ( s0.match( query_nodes ) ) {
+                return false;
+            }
+            //
+            query_nodes = new HashSet<PhylogenyNode>();
+            query_nodes.add( new PhylogenyNode( "X" ) );
+            query_nodes.add( new PhylogenyNode( "Y" ) );
+            query_nodes.add( new PhylogenyNode( "A" ) );
+            query_nodes.add( new PhylogenyNode( "C" ) );
+            if ( s0.match( query_nodes ) ) {
+                return false;
+            }
+            //
+            query_nodes = new HashSet<PhylogenyNode>();
+            query_nodes.add( new PhylogenyNode( "X" ) );
+            query_nodes.add( new PhylogenyNode( "Y" ) );
+            query_nodes.add( new PhylogenyNode( "A" ) );
+            query_nodes.add( new PhylogenyNode( "E" ) );
+            if ( s0.match( query_nodes ) ) {
+                return false;
+            }
+            //
+            query_nodes = new HashSet<PhylogenyNode>();
+            query_nodes.add( new PhylogenyNode( "X" ) );
+            query_nodes.add( new PhylogenyNode( "Y" ) );
+            query_nodes.add( new PhylogenyNode( "A" ) );
+            query_nodes.add( new PhylogenyNode( "F" ) );
+            if ( s0.match( query_nodes ) ) {
+                return false;
+            }
+            //
+            query_nodes = new HashSet<PhylogenyNode>();
+            query_nodes.add( new PhylogenyNode( "Y" ) );
+            query_nodes.add( new PhylogenyNode( "A" ) );
+            query_nodes.add( new PhylogenyNode( "G" ) );
+            if ( s0.match( query_nodes ) ) {
+                return false;
+            }
+            //
+            query_nodes = new HashSet<PhylogenyNode>();
+            query_nodes.add( new PhylogenyNode( "X" ) );
+            query_nodes.add( new PhylogenyNode( "Y" ) );
+            query_nodes.add( new PhylogenyNode( "A" ) );
+            query_nodes.add( new PhylogenyNode( "F" ) );
+            query_nodes.add( new PhylogenyNode( "G" ) );
+            if ( s0.match( query_nodes ) ) {
+                return false;
+            }
+            //
+            query_nodes = new HashSet<PhylogenyNode>();
+            query_nodes.add( new PhylogenyNode( "X" ) );
+            query_nodes.add( new PhylogenyNode( "Y" ) );
+            query_nodes.add( new PhylogenyNode( "A" ) );
+            query_nodes.add( new PhylogenyNode( "B" ) );
+            query_nodes.add( new PhylogenyNode( "D" ) );
+            if ( s0.match( query_nodes ) ) {
+                return false;
+            }
+            //
+            query_nodes = new HashSet<PhylogenyNode>();
+            query_nodes.add( new PhylogenyNode( "X" ) );
+            query_nodes.add( new PhylogenyNode( "Y" ) );
+            query_nodes.add( new PhylogenyNode( "E" ) );
+            query_nodes.add( new PhylogenyNode( "D" ) );
+            query_nodes.add( new PhylogenyNode( "A" ) );
+            if ( s0.match( query_nodes ) ) {
+                return false;
+            }
+            //
+            query_nodes = new HashSet<PhylogenyNode>();
+            query_nodes.add( new PhylogenyNode( "X" ) );
+            query_nodes.add( new PhylogenyNode( "Y" ) );
+            query_nodes.add( new PhylogenyNode( "E" ) );
+            query_nodes.add( new PhylogenyNode( "D" ) );
+            query_nodes.add( new PhylogenyNode( "A" ) );
+            query_nodes.add( new PhylogenyNode( "G" ) );
+            if ( s0.match( query_nodes ) ) {
+                return false;
+            }
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace();
+            return false;
+        }
+        return true;
+    }
+
+    private static boolean testSplitStrict() {
+        try {
+            final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
+            final Phylogeny p0 = factory.create( "(((A,B,C),D),(E,(F,G)))R", new NHXParser() )[ 0 ];
+            final Set<PhylogenyNode> ex = new HashSet<PhylogenyNode>();
+            ex.add( new PhylogenyNode( "A" ) );
+            ex.add( new PhylogenyNode( "B" ) );
+            ex.add( new PhylogenyNode( "C" ) );
+            ex.add( new PhylogenyNode( "D" ) );
+            ex.add( new PhylogenyNode( "E" ) );
+            ex.add( new PhylogenyNode( "F" ) );
+            ex.add( new PhylogenyNode( "G" ) );
+            final TreeSplitMatrix s0 = new TreeSplitMatrix( p0, true, ex );
+            Set<PhylogenyNode> query_nodes = new HashSet<PhylogenyNode>();
+            query_nodes.add( new PhylogenyNode( "A" ) );
+            query_nodes.add( new PhylogenyNode( "B" ) );
+            if ( s0.match( query_nodes ) ) {
+                return false;
+            }
+            query_nodes = new HashSet<PhylogenyNode>();
+            query_nodes.add( new PhylogenyNode( "A" ) );
+            query_nodes.add( new PhylogenyNode( "B" ) );
+            query_nodes.add( new PhylogenyNode( "C" ) );
+            query_nodes.add( new PhylogenyNode( "D" ) );
+            query_nodes.add( new PhylogenyNode( "E" ) );
+            query_nodes.add( new PhylogenyNode( "F" ) );
+            query_nodes.add( new PhylogenyNode( "G" ) );
+            if ( !s0.match( query_nodes ) ) {
+                return false;
+            }
+            //
+            query_nodes = new HashSet<PhylogenyNode>();
+            query_nodes.add( new PhylogenyNode( "A" ) );
+            query_nodes.add( new PhylogenyNode( "B" ) );
+            query_nodes.add( new PhylogenyNode( "C" ) );
+            if ( !s0.match( query_nodes ) ) {
+                return false;
+            }
+            //
+            query_nodes = new HashSet<PhylogenyNode>();
+            query_nodes.add( new PhylogenyNode( "D" ) );
+            query_nodes.add( new PhylogenyNode( "E" ) );
+            query_nodes.add( new PhylogenyNode( "F" ) );
+            query_nodes.add( new PhylogenyNode( "G" ) );
+            if ( !s0.match( query_nodes ) ) {
+                return false;
+            }
+            //
+            query_nodes = new HashSet<PhylogenyNode>();
+            query_nodes.add( new PhylogenyNode( "A" ) );
+            query_nodes.add( new PhylogenyNode( "B" ) );
+            query_nodes.add( new PhylogenyNode( "C" ) );
+            query_nodes.add( new PhylogenyNode( "D" ) );
+            if ( !s0.match( query_nodes ) ) {
+                return false;
+            }
+            //
+            query_nodes = new HashSet<PhylogenyNode>();
+            query_nodes.add( new PhylogenyNode( "E" ) );
+            query_nodes.add( new PhylogenyNode( "F" ) );
+            query_nodes.add( new PhylogenyNode( "G" ) );
+            if ( !s0.match( query_nodes ) ) {
+                return false;
+            }
+            //
+            query_nodes = new HashSet<PhylogenyNode>();
+            query_nodes.add( new PhylogenyNode( "F" ) );
+            query_nodes.add( new PhylogenyNode( "G" ) );
+            if ( !s0.match( query_nodes ) ) {
+                return false;
+            }
+            //
+            query_nodes = new HashSet<PhylogenyNode>();
+            query_nodes.add( new PhylogenyNode( "E" ) );
+            query_nodes.add( new PhylogenyNode( "D" ) );
+            query_nodes.add( new PhylogenyNode( "C" ) );
+            query_nodes.add( new PhylogenyNode( "B" ) );
+            query_nodes.add( new PhylogenyNode( "A" ) );
+            if ( !s0.match( query_nodes ) ) {
+                return false;
+            }
+            //
+            query_nodes = new HashSet<PhylogenyNode>();
+            query_nodes.add( new PhylogenyNode( "F" ) );
+            query_nodes.add( new PhylogenyNode( "G" ) );
+            query_nodes.add( new PhylogenyNode( "E" ) );
+            if ( !s0.match( query_nodes ) ) {
+                return false;
+            }
+            //
+            query_nodes = new HashSet<PhylogenyNode>();
+            query_nodes.add( new PhylogenyNode( "F" ) );
+            query_nodes.add( new PhylogenyNode( "G" ) );
+            query_nodes.add( new PhylogenyNode( "E" ) );
+            query_nodes.add( new PhylogenyNode( "D" ) );
+            if ( !s0.match( query_nodes ) ) {
+                return false;
+            }
+            //
+            query_nodes = new HashSet<PhylogenyNode>();
+            query_nodes.add( new PhylogenyNode( "F" ) );
+            query_nodes.add( new PhylogenyNode( "A" ) );
+            if ( s0.match( query_nodes ) ) {
+                return false;
+            }
+            //
+            query_nodes = new HashSet<PhylogenyNode>();
+            query_nodes.add( new PhylogenyNode( "A" ) );
+            query_nodes.add( new PhylogenyNode( "E" ) );
+            query_nodes.add( new PhylogenyNode( "B" ) );
+            query_nodes.add( new PhylogenyNode( "C" ) );
+            if ( s0.match( query_nodes ) ) {
+                return false;
+            }
+            //
+            query_nodes = new HashSet<PhylogenyNode>();
+            query_nodes.add( new PhylogenyNode( "F" ) );
+            query_nodes.add( new PhylogenyNode( "G" ) );
+            query_nodes.add( new PhylogenyNode( "E" ) );
+            query_nodes.add( new PhylogenyNode( "D" ) );
+            query_nodes.add( new PhylogenyNode( "C" ) );
+            if ( s0.match( query_nodes ) ) {
+                return false;
+            }
+            //
+            query_nodes = new HashSet<PhylogenyNode>();
+            query_nodes.add( new PhylogenyNode( "A" ) );
+            query_nodes.add( new PhylogenyNode( "B" ) );
+            query_nodes.add( new PhylogenyNode( "D" ) );
+            if ( s0.match( query_nodes ) ) {
+                return false;
+            }
+            //
+            query_nodes = new HashSet<PhylogenyNode>();
+            query_nodes.add( new PhylogenyNode( "A" ) );
+            query_nodes.add( new PhylogenyNode( "D" ) );
+            if ( s0.match( query_nodes ) ) {
+                return false;
+            }
+            //
+            query_nodes = new HashSet<PhylogenyNode>();
+            query_nodes.add( new PhylogenyNode( "A" ) );
+            query_nodes.add( new PhylogenyNode( "B" ) );
+            if ( s0.match( query_nodes ) ) {
+                return false;
+            }
+            //
+            query_nodes = new HashSet<PhylogenyNode>();
+            query_nodes.add( new PhylogenyNode( "A" ) );
+            query_nodes.add( new PhylogenyNode( "C" ) );
+            if ( s0.match( query_nodes ) ) {
+                return false;
+            }
+            //
+            query_nodes = new HashSet<PhylogenyNode>();
+            query_nodes.add( new PhylogenyNode( "A" ) );
+            query_nodes.add( new PhylogenyNode( "E" ) );
+            if ( s0.match( query_nodes ) ) {
+                return false;
+            }
+            //
+            query_nodes = new HashSet<PhylogenyNode>();
+            query_nodes.add( new PhylogenyNode( "A" ) );
+            query_nodes.add( new PhylogenyNode( "F" ) );
+            if ( s0.match( query_nodes ) ) {
+                return false;
+            }
+            //
+            query_nodes = new HashSet<PhylogenyNode>();
+            query_nodes.add( new PhylogenyNode( "A" ) );
+            query_nodes.add( new PhylogenyNode( "G" ) );
+            if ( s0.match( query_nodes ) ) {
+                return false;
+            }
+            //
+            query_nodes = new HashSet<PhylogenyNode>();
+            query_nodes.add( new PhylogenyNode( "A" ) );
+            query_nodes.add( new PhylogenyNode( "F" ) );
+            query_nodes.add( new PhylogenyNode( "G" ) );
+            if ( s0.match( query_nodes ) ) {
+                return false;
+            }
+            //
+            query_nodes = new HashSet<PhylogenyNode>();
+            query_nodes.add( new PhylogenyNode( "A" ) );
+            query_nodes.add( new PhylogenyNode( "B" ) );
+            query_nodes.add( new PhylogenyNode( "D" ) );
+            if ( s0.match( query_nodes ) ) {
+                return false;
+            }
+            //
+            query_nodes = new HashSet<PhylogenyNode>();
+            query_nodes.add( new PhylogenyNode( "E" ) );
+            query_nodes.add( new PhylogenyNode( "D" ) );
+            query_nodes.add( new PhylogenyNode( "A" ) );
+            if ( s0.match( query_nodes ) ) {
+                return false;
+            }
+            //
+            query_nodes = new HashSet<PhylogenyNode>();
+            query_nodes.add( new PhylogenyNode( "E" ) );
+            query_nodes.add( new PhylogenyNode( "D" ) );
+            query_nodes.add( new PhylogenyNode( "A" ) );
+            query_nodes.add( new PhylogenyNode( "G" ) );
+            if ( s0.match( query_nodes ) ) {
+                return false;
+            }
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace();
+            return false;
+        }
+        return true;
+    }
+
+    private static boolean testSubtreeDeletion() {
+        try {
+            final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
+            final Phylogeny t1 = factory.create( "((A,B,C)abc,(D,E,F)def)r", new NHXParser() )[ 0 ];
+            t1.deleteSubtree( t1.getNode( "A" ), false );
+            if ( t1.getNumberOfExternalNodes() != 5 ) {
+                return false;
+            }
+            t1.toNewHampshireX();
+            t1.deleteSubtree( t1.getNode( "E" ), false );
+            if ( t1.getNumberOfExternalNodes() != 4 ) {
+                return false;
+            }
+            t1.toNewHampshireX();
+            t1.deleteSubtree( t1.getNode( "F" ), false );
+            if ( t1.getNumberOfExternalNodes() != 3 ) {
+                return false;
+            }
+            t1.toNewHampshireX();
+            t1.deleteSubtree( t1.getNode( "D" ), false );
+            t1.toNewHampshireX();
+            if ( t1.getNumberOfExternalNodes() != 3 ) {
+                return false;
+            }
+            t1.deleteSubtree( t1.getNode( "def" ), false );
+            t1.toNewHampshireX();
+            if ( t1.getNumberOfExternalNodes() != 2 ) {
+                return false;
+            }
+            t1.deleteSubtree( t1.getNode( "B" ), false );
+            t1.toNewHampshireX();
+            if ( t1.getNumberOfExternalNodes() != 1 ) {
+                return false;
+            }
+            t1.deleteSubtree( t1.getNode( "C" ), false );
+            t1.toNewHampshireX();
+            if ( t1.getNumberOfExternalNodes() != 1 ) {
+                return false;
+            }
+            t1.deleteSubtree( t1.getNode( "abc" ), false );
+            t1.toNewHampshireX();
+            if ( t1.getNumberOfExternalNodes() != 1 ) {
+                return false;
+            }
+            t1.deleteSubtree( t1.getNode( "r" ), false );
+            if ( t1.getNumberOfExternalNodes() != 0 ) {
+                return false;
+            }
+            if ( !t1.isEmpty() ) {
+                return false;
+            }
+            final Phylogeny t2 = factory.create( "(((1,2,3)A,B,C)abc,(D,E,F)def)r", new NHXParser() )[ 0 ];
+            t2.deleteSubtree( t2.getNode( "A" ), false );
+            t2.toNewHampshireX();
+            if ( t2.getNumberOfExternalNodes() != 5 ) {
+                return false;
+            }
+            t2.deleteSubtree( t2.getNode( "abc" ), false );
+            t2.toNewHampshireX();
+            if ( t2.getNumberOfExternalNodes() != 3 ) {
+                return false;
+            }
+            t2.deleteSubtree( t2.getNode( "def" ), false );
+            t2.toNewHampshireX();
+            if ( t2.getNumberOfExternalNodes() != 1 ) {
+                return false;
+            }
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace( System.out );
+            return false;
+        }
+        return true;
+    }
+
+    private static boolean testSupportCount() {
+        try {
+            final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
+            final Phylogeny t0_1 = factory.create( "(((A,B),C),(D,E))", new NHXParser() )[ 0 ];
+            final Phylogeny[] phylogenies_1 = factory.create( "(((A,B),C),(D,E)) " + "(((C,B),A),(D,E))"
+                                                                      + "(((A,B),C),(D,E)) " + "(((A,B),C),(D,E))"
+                                                                      + "(((A,B),C),(D,E))" + "(((C,B),A),(D,E))"
+                                                                      + "(((E,B),D),(C,A))" + "(((C,B),A),(D,E))"
+                                                                      + "(((A,B),C),(D,E))" + "(((A,B),C),(D,E))",
+                                                              new NHXParser() );
+            SupportCount.count( t0_1, phylogenies_1, true, false );
+            final Phylogeny t0_2 = factory.create( "(((((A,B),C),D),E),(F,G))", new NHXParser() )[ 0 ];
+            final Phylogeny[] phylogenies_2 = factory.create( "(((((A,B),C),D),E),(F,G))"
+                                                                      + "(((((A,B),C),D),E),((F,G),X))"
+                                                                      + "(((((A,Y),B),C),D),((F,G),E))"
+                                                                      + "(((((A,B),C),D),E),(F,G))"
+                                                                      + "(((((A,B),C),D),E),(F,G))"
+                                                                      + "(((((A,B),C),D),E),(F,G))"
+                                                                      + "(((((A,B),C),D),E),(F,G),Z)"
+                                                                      + "(((((A,B),C),D),E),(F,G))"
+                                                                      + "((((((A,B),C),D),E),F),G)"
+                                                                      + "(((((X,Y),F,G),E),((A,B),C)),D)",
+                                                              new NHXParser() );
+            SupportCount.count( t0_2, phylogenies_2, true, false );
+            final PhylogenyNodeIterator it = t0_2.iteratorPostorder();
+            while ( it.hasNext() ) {
+                final PhylogenyNode n = it.next();
+                if ( !n.isExternal() && ( PhylogenyMethods.getConfidenceValue( n ) != 10 ) ) {
+                    return false;
+                }
+            }
+            final Phylogeny t0_3 = factory.create( "(((A,B)ab,C)abc,((D,E)de,F)def)", new NHXParser() )[ 0 ];
+            final Phylogeny[] phylogenies_3 = factory.create( "(((A,B),C),((D,E),F))" + "(((A,C),B),((D,F),E))"
+                    + "(((C,A),B),((F,D),E))" + "(((A,B),F),((D,E),C))" + "(((((A,B),C),D),E),F)", new NHXParser() );
+            SupportCount.count( t0_3, phylogenies_3, true, false );
+            t0_3.reRoot( t0_3.getNode( "def" ).getId() );
+            if ( PhylogenyMethods.getConfidenceValue( t0_3.getNode( "ab" ) ) != 3 ) {
+                return false;
+            }
+            if ( PhylogenyMethods.getConfidenceValue( t0_3.getNode( "abc" ) ) != 4 ) {
+                return false;
+            }
+            if ( PhylogenyMethods.getConfidenceValue( t0_3.getNode( "def" ) ) != 4 ) {
+                return false;
+            }
+            if ( PhylogenyMethods.getConfidenceValue( t0_3.getNode( "de" ) ) != 2 ) {
+                return false;
+            }
+            if ( PhylogenyMethods.getConfidenceValue( t0_3.getNode( "A" ) ) != 5 ) {
+                return false;
+            }
+            if ( PhylogenyMethods.getConfidenceValue( t0_3.getNode( "B" ) ) != 5 ) {
+                return false;
+            }
+            if ( PhylogenyMethods.getConfidenceValue( t0_3.getNode( "C" ) ) != 5 ) {
+                return false;
+            }
+            if ( PhylogenyMethods.getConfidenceValue( t0_3.getNode( "D" ) ) != 5 ) {
+                return false;
+            }
+            if ( PhylogenyMethods.getConfidenceValue( t0_3.getNode( "E" ) ) != 5 ) {
+                return false;
+            }
+            if ( PhylogenyMethods.getConfidenceValue( t0_3.getNode( "F" ) ) != 5 ) {
+                return false;
+            }
+            final Phylogeny t0_4 = factory.create( "(((((A,B)1,C)2,D)3,E)4,F)", new NHXParser() )[ 0 ];
+            final Phylogeny[] phylogenies_4 = factory.create( "((((((A,X),C),B),D),E),F) "
+                    + "(((A,B,Z),C,Q),(((D,Y),E),F))", new NHXParser() );
+            SupportCount.count( t0_4, phylogenies_4, true, false );
+            t0_4.reRoot( t0_4.getNode( "F" ).getId() );
+            if ( PhylogenyMethods.getConfidenceValue( t0_4.getNode( "1" ) ) != 1 ) {
+                return false;
+            }
+            if ( PhylogenyMethods.getConfidenceValue( t0_4.getNode( "2" ) ) != 2 ) {
+                return false;
+            }
+            if ( PhylogenyMethods.getConfidenceValue( t0_4.getNode( "3" ) ) != 1 ) {
+                return false;
+            }
+            if ( PhylogenyMethods.getConfidenceValue( t0_4.getNode( "4" ) ) != 2 ) {
+                return false;
+            }
+            if ( PhylogenyMethods.getConfidenceValue( t0_4.getNode( "A" ) ) != 2 ) {
+                return false;
+            }
+            if ( PhylogenyMethods.getConfidenceValue( t0_4.getNode( "B" ) ) != 2 ) {
+                return false;
+            }
+            if ( PhylogenyMethods.getConfidenceValue( t0_4.getNode( "C" ) ) != 2 ) {
+                return false;
+            }
+            if ( PhylogenyMethods.getConfidenceValue( t0_4.getNode( "D" ) ) != 2 ) {
+                return false;
+            }
+            if ( PhylogenyMethods.getConfidenceValue( t0_4.getNode( "E" ) ) != 2 ) {
+                return false;
+            }
+            if ( PhylogenyMethods.getConfidenceValue( t0_4.getNode( "F" ) ) != 2 ) {
+                return false;
+            }
+            Phylogeny a = factory.create( "(((((A,B)1,C)2,D)3,E)4,F)", new NHXParser() )[ 0 ];
+            final Phylogeny b1 = factory.create( "(((((B,A)1,C)2,D)3,E)4,F)", new NHXParser() )[ 0 ];
+            double d = SupportCount.compare( b1, a, true, true, true );
+            if ( !Test.isEqual( d, 5.0 / 5.0 ) ) {
+                return false;
+            }
+            a = factory.create( "(((((A,B)1,C)2,D)3,E)4,F)", new NHXParser() )[ 0 ];
+            final Phylogeny b2 = factory.create( "(((((C,B)1,A)2,D)3,E)4,F)", new NHXParser() )[ 0 ];
+            d = SupportCount.compare( b2, a, true, true, true );
+            if ( !Test.isEqual( d, 4.0 / 5.0 ) ) {
+                return false;
+            }
+            a = factory.create( "(((((A,B)1,C)2,D)3,E)4,F)", new NHXParser() )[ 0 ];
+            final Phylogeny b3 = factory.create( "(((((F,C)1,A)2,B)3,D)4,E)", new NHXParser() )[ 0 ];
+            d = SupportCount.compare( b3, a, true, true, true );
+            if ( !Test.isEqual( d, 2.0 / 5.0 ) ) {
+                return false;
+            }
+            a = factory.create( "(((((A,B)1,C)2,D)3,E)4,F)r", new NHXParser() )[ 0 ];
+            final Phylogeny b4 = factory.create( "(((((F,C)1,A)2,B)3,D)4,E)r", new NHXParser() )[ 0 ];
+            d = SupportCount.compare( b4, a, true, true, false );
+            if ( !Test.isEqual( d, 1.0 / 5.0 ) ) {
+                return false;
+            }
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace( System.out );
+            return false;
+        }
+        return true;
+    }
+
+    private static boolean testSupportTransfer() {
+        try {
+            final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
+            final Phylogeny p1 = factory.create( "(((A,B)ab:97,C)abc:57,((D,E)de:10,(F,G)fg:50,(H,I)hi:64)defghi)",
+                                                 new NHXParser() )[ 0 ];
+            final Phylogeny p2 = factory
+                    .create( "(((A:0.1,B:0.3)ab:0.4,C)abc:0.5,((D,E)de,(F,G)fg,(H,I)hi:0.59)defghi)", new NHXParser() )[ 0 ];
+            if ( PhylogenyMethods.getConfidenceValue( p2.getNode( "ab" ) ) >= 0.0 ) {
+                return false;
+            }
+            if ( PhylogenyMethods.getConfidenceValue( p2.getNode( "abc" ) ) >= 0.0 ) {
+                return false;
+            }
+            support_transfer.moveBranchLengthsToBootstrap( p1 );
+            support_transfer.transferSupportValues( p1, p2 );
+            if ( p2.getNode( "ab" ).getDistanceToParent() != 0.4 ) {
+                return false;
+            }
+            if ( p2.getNode( "abc" ).getDistanceToParent() != 0.5 ) {
+                return false;
+            }
+            if ( p2.getNode( "hi" ).getDistanceToParent() != 0.59 ) {
+                return false;
+            }
+            if ( PhylogenyMethods.getConfidenceValue( p2.getNode( "ab" ) ) != 97 ) {
+                return false;
+            }
+            if ( PhylogenyMethods.getConfidenceValue( p2.getNode( "abc" ) ) != 57 ) {
+                return false;
+            }
+            if ( PhylogenyMethods.getConfidenceValue( p2.getNode( "de" ) ) != 10 ) {
+                return false;
+            }
+            if ( PhylogenyMethods.getConfidenceValue( p2.getNode( "fg" ) ) != 50 ) {
+                return false;
+            }
+            if ( PhylogenyMethods.getConfidenceValue( p2.getNode( "hi" ) ) != 64 ) {
+                return false;
+            }
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace( System.out );
+            return false;
+        }
+        return true;
+    }
+
+    private static boolean testTaxonomyAssigner() {
+        try {
+            String s0_str = "(((([&&NHX:S=A],[&&NHX:S=B])[&&NHX:S=AB],[&&NHX:S=C])[&&NHX:S=ABC],[&&NHX:S=D])[&&NHX:S=ABCD],[&&NHX:S=E])[&&NHX:S=ABCDE]";
+            String g0_str = "((([&&NHX:S=A],[&&NHX:S=A],[&&NHX:S=A])a,[&&NHX:S=B])b,[&&NHX:S=C])c";
+            Phylogeny s0 = ParserBasedPhylogenyFactory.getInstance().create( s0_str, new NHXParser() )[ 0 ];
+            Phylogeny g0 = ParserBasedPhylogenyFactory.getInstance().create( g0_str, new NHXParser() )[ 0 ];
+            s0.setRooted( true );
+            g0.setRooted( true );
+            TaxonomyAssigner.execute( g0, s0 );
+            if ( !g0.getNode( "a" ).getNodeData().getTaxonomy().getScientificName().equals( "A" ) ) {
+                return false;
+            }
+            if ( !g0.getNode( "b" ).getNodeData().getTaxonomy().getScientificName().equals( "AB" ) ) {
+                return false;
+            }
+            if ( !g0.getNode( "c" ).getNodeData().getTaxonomy().getScientificName().equals( "ABC" ) ) {
+                return false;
+            }
+            g0_str = "((([&&NHX:S=A],[&&NHX:S=A],[&&NHX:S=A])a,[&&NHX:S=A])b,[&&NHX:S=A])c";
+            g0 = ParserBasedPhylogenyFactory.getInstance().create( g0_str, new NHXParser() )[ 0 ];
+            g0.setRooted( true );
+            TaxonomyAssigner.execute( g0, s0 );
+            if ( !g0.getNode( "a" ).getNodeData().getTaxonomy().getScientificName().equals( "A" ) ) {
+                return false;
+            }
+            if ( !g0.getNode( "b" ).getNodeData().getTaxonomy().getScientificName().equals( "A" ) ) {
+                return false;
+            }
+            if ( !g0.getNode( "c" ).getNodeData().getTaxonomy().getScientificName().equals( "A" ) ) {
+                return false;
+            }
+            g0_str = "((([&&NHX:S=A],[&&NHX:S=A],[&&NHX:S=B])a,[&&NHX:S=A])b,[&&NHX:S=A])c";
+            g0 = ParserBasedPhylogenyFactory.getInstance().create( g0_str, new NHXParser() )[ 0 ];
+            g0.setRooted( true );
+            TaxonomyAssigner.execute( g0, s0 );
+            if ( !g0.getNode( "a" ).getNodeData().getTaxonomy().getScientificName().equals( "AB" ) ) {
+                return false;
+            }
+            if ( !g0.getNode( "b" ).getNodeData().getTaxonomy().getScientificName().equals( "AB" ) ) {
+                return false;
+            }
+            if ( !g0.getNode( "c" ).getNodeData().getTaxonomy().getScientificName().equals( "AB" ) ) {
+                return false;
+            }
+            g0_str = "((([&&NHX:S=A],[&&NHX:S=A],[&&NHX:S=B])a,[&&NHX:S=C])b,[&&NHX:S=A])c";
+            g0 = ParserBasedPhylogenyFactory.getInstance().create( g0_str, new NHXParser() )[ 0 ];
+            g0.setRooted( true );
+            TaxonomyAssigner.execute( g0, s0 );
+            if ( !g0.getNode( "a" ).getNodeData().getTaxonomy().getScientificName().equals( "AB" ) ) {
+                return false;
+            }
+            if ( !g0.getNode( "b" ).getNodeData().getTaxonomy().getScientificName().equals( "ABC" ) ) {
+                return false;
+            }
+            if ( !g0.getNode( "c" ).getNodeData().getTaxonomy().getScientificName().equals( "ABC" ) ) {
+                return false;
+            }
+            g0_str = "((([&&NHX:S=A],[&&NHX:S=A],[&&NHX:S=B])a,[&&NHX:S=C])b,[&&NHX:S=D])c";
+            g0 = ParserBasedPhylogenyFactory.getInstance().create( g0_str, new NHXParser() )[ 0 ];
+            g0.setRooted( true );
+            TaxonomyAssigner.execute( g0, s0 );
+            if ( !g0.getNode( "a" ).getNodeData().getTaxonomy().getScientificName().equals( "AB" ) ) {
+                return false;
+            }
+            if ( !g0.getNode( "b" ).getNodeData().getTaxonomy().getScientificName().equals( "ABC" ) ) {
+                return false;
+            }
+            if ( !g0.getNode( "c" ).getNodeData().getTaxonomy().getScientificName().equals( "ABCD" ) ) {
+                return false;
+            }
+            g0_str = "((([&&NHX:S=A],[&&NHX:S=A],[&&NHX:S=E])a,[&&NHX:S=C])b,[&&NHX:S=D])c";
+            g0 = ParserBasedPhylogenyFactory.getInstance().create( g0_str, new NHXParser() )[ 0 ];
+            g0.setRooted( true );
+            TaxonomyAssigner.execute( g0, s0 );
+            if ( !g0.getNode( "a" ).getNodeData().getTaxonomy().getScientificName().equals( "ABCDE" ) ) {
+                return false;
+            }
+            if ( !g0.getNode( "b" ).getNodeData().getTaxonomy().getScientificName().equals( "ABCDE" ) ) {
+                return false;
+            }
+            if ( !g0.getNode( "c" ).getNodeData().getTaxonomy().getScientificName().equals( "ABCDE" ) ) {
+                return false;
+            }
+            g0_str = "((([&&NHX:S=A],[&&NHX:S=A],[&&NHX:S=E])a,[&&NHX:S=A])b,[&&NHX:S=A])c";
+            g0 = ParserBasedPhylogenyFactory.getInstance().create( g0_str, new NHXParser() )[ 0 ];
+            g0.setRooted( true );
+            TaxonomyAssigner.execute( g0, s0 );
+            if ( !g0.getNode( "a" ).getNodeData().getTaxonomy().getScientificName().equals( "ABCDE" ) ) {
+                return false;
+            }
+            if ( !g0.getNode( "b" ).getNodeData().getTaxonomy().getScientificName().equals( "ABCDE" ) ) {
+                return false;
+            }
+            if ( !g0.getNode( "c" ).getNodeData().getTaxonomy().getScientificName().equals( "ABCDE" ) ) {
+                return false;
+            }
+            s0_str = "(([&&NHX:S=A],[&&NHX:S=B],[&&NHX:S=C],[&&NHX:S=D])[&&NHX:S=ABCD],"
+                    + "([&&NHX:S=E],[&&NHX:S=F],[&&NHX:S=G],[&&NHX:S=H])[&&NHX:S=EFGH],"
+                    + "([&&NHX:S=I],[&&NHX:S=J],[&&NHX:S=K],[&&NHX:S=L])[&&NHX:S=IJKL], "
+                    + "([&&NHX:S=M],[&&NHX:S=N],[&&NHX:S=O],[&&NHX:S=P])[&&NHX:S=MNOP])[&&NHX:S=ROOT]";
+            s0 = ParserBasedPhylogenyFactory.getInstance().create( s0_str, new NHXParser() )[ 0 ];
+            s0.setRooted( true );
+            g0_str = "(([&&NHX:S=A],[&&NHX:S=B],[&&NHX:S=C],[&&NHX:S=D])a,"
+                    + "([&&NHX:S=E],[&&NHX:S=F],[&&NHX:S=G],[&&NHX:S=H])b,"
+                    + "([&&NHX:S=I],[&&NHX:S=J],[&&NHX:S=K],[&&NHX:S=L])c, "
+                    + "([&&NHX:S=M],[&&NHX:S=N],[&&NHX:S=O],[&&NHX:S=P])d)r";
+            g0 = ParserBasedPhylogenyFactory.getInstance().create( g0_str, new NHXParser() )[ 0 ];
+            g0.setRooted( true );
+            TaxonomyAssigner.execute( g0, s0 );
+            if ( !g0.getNode( "a" ).getNodeData().getTaxonomy().getScientificName().equals( "ABCD" ) ) {
+                return false;
+            }
+            if ( !g0.getNode( "b" ).getNodeData().getTaxonomy().getScientificName().equals( "EFGH" ) ) {
+                return false;
+            }
+            if ( !g0.getNode( "c" ).getNodeData().getTaxonomy().getScientificName().equals( "IJKL" ) ) {
+                return false;
+            }
+            if ( !g0.getNode( "d" ).getNodeData().getTaxonomy().getScientificName().equals( "MNOP" ) ) {
+                return false;
+            }
+            if ( !g0.getNode( "r" ).getNodeData().getTaxonomy().getScientificName().equals( "ROOT" ) ) {
+                return false;
+            }
+            g0_str = "(([&&NHX:S=A],[&&NHX:S=B],[&&NHX:S=A],[&&NHX:S=B])a,"
+                    + "([&&NHX:S=E],[&&NHX:S=F],[&&NHX:S=F],[&&NHX:S=F])b,"
+                    + "([&&NHX:S=L],[&&NHX:S=L],[&&NHX:S=L],[&&NHX:S=I])c, "
+                    + "([&&NHX:S=M],[&&NHX:S=N],[&&NHX:S=O],[&&NHX:S=O])d)r";
+            g0 = ParserBasedPhylogenyFactory.getInstance().create( g0_str, new NHXParser() )[ 0 ];
+            g0.setRooted( true );
+            TaxonomyAssigner.execute( g0, s0 );
+            if ( !g0.getNode( "a" ).getNodeData().getTaxonomy().getScientificName().equals( "ABCD" ) ) {
+                return false;
+            }
+            if ( !g0.getNode( "b" ).getNodeData().getTaxonomy().getScientificName().equals( "EFGH" ) ) {
+                return false;
+            }
+            if ( !g0.getNode( "c" ).getNodeData().getTaxonomy().getScientificName().equals( "IJKL" ) ) {
+                return false;
+            }
+            if ( !g0.getNode( "d" ).getNodeData().getTaxonomy().getScientificName().equals( "MNOP" ) ) {
+                return false;
+            }
+            if ( !g0.getNode( "r" ).getNodeData().getTaxonomy().getScientificName().equals( "ROOT" ) ) {
+                return false;
+            }
+            g0_str = "(([&&NHX:S=A],[&&NHX:S=B],[&&NHX:S=A],[&&NHX:S=B])a,"
+                    + "([&&NHX:S=E],[&&NHX:S=F],[&&NHX:S=F],[&&NHX:S=F])b,"
+                    + "([&&NHX:S=L],[&&NHX:S=L],[&&NHX:S=L],[&&NHX:S=L])c, "
+                    + "([&&NHX:S=M],[&&NHX:S=N],[&&NHX:S=A],[&&NHX:S=O])d)r";
+            g0 = ParserBasedPhylogenyFactory.getInstance().create( g0_str, new NHXParser() )[ 0 ];
+            g0.setRooted( true );
+            TaxonomyAssigner.execute( g0, s0 );
+            if ( !g0.getNode( "a" ).getNodeData().getTaxonomy().getScientificName().equals( "ABCD" ) ) {
+                return false;
+            }
+            if ( !g0.getNode( "b" ).getNodeData().getTaxonomy().getScientificName().equals( "EFGH" ) ) {
+                return false;
+            }
+            if ( !g0.getNode( "c" ).getNodeData().getTaxonomy().getScientificName().equals( "L" ) ) {
+                return false;
+            }
+            if ( !g0.getNode( "d" ).getNodeData().getTaxonomy().getScientificName().equals( "ROOT" ) ) {
+                return false;
+            }
+            if ( !g0.getNode( "r" ).getNodeData().getTaxonomy().getScientificName().equals( "ROOT" ) ) {
+                return false;
+            }
+            g0_str = "(([&&NHX:S=L],[&&NHX:S=L],[&&NHX:S=L],[&&NHX:S=L])a,"
+                    + "([&&NHX:S=L],[&&NHX:S=L],[&&NHX:S=L],[&&NHX:S=L])b,"
+                    + "([&&NHX:S=L],[&&NHX:S=L],[&&NHX:S=L],[&&NHX:S=L])c, "
+                    + "([&&NHX:S=L],[&&NHX:S=L],[&&NHX:S=L],[&&NHX:S=L])d)r";
+            g0 = ParserBasedPhylogenyFactory.getInstance().create( g0_str, new NHXParser() )[ 0 ];
+            g0.setRooted( true );
+            TaxonomyAssigner.execute( g0, s0 );
+            if ( !g0.getNode( "a" ).getNodeData().getTaxonomy().getScientificName().equals( "L" ) ) {
+                return false;
+            }
+            if ( !g0.getNode( "b" ).getNodeData().getTaxonomy().getScientificName().equals( "L" ) ) {
+                return false;
+            }
+            if ( !g0.getNode( "c" ).getNodeData().getTaxonomy().getScientificName().equals( "L" ) ) {
+                return false;
+            }
+            if ( !g0.getNode( "d" ).getNodeData().getTaxonomy().getScientificName().equals( "L" ) ) {
+                return false;
+            }
+            if ( !g0.getNode( "r" ).getNodeData().getTaxonomy().getScientificName().equals( "L" ) ) {
+                return false;
+            }
+            g0_str = "((([&&NHX:S=A],[&&NHX:S=A],[&&NHX:S=A])a,[&&NHX:S=A])b,[&&NHX:S=A])c";
+            g0 = ParserBasedPhylogenyFactory.getInstance().create( g0_str, new NHXParser() )[ 0 ];
+            g0.setRooted( true );
+            TaxonomyAssigner.execute( g0, s0 );
+            if ( !g0.getNode( "a" ).getNodeData().getTaxonomy().getScientificName().equals( "A" ) ) {
+                return false;
+            }
+            if ( !g0.getNode( "b" ).getNodeData().getTaxonomy().getScientificName().equals( "A" ) ) {
+                return false;
+            }
+            if ( !g0.getNode( "c" ).getNodeData().getTaxonomy().getScientificName().equals( "A" ) ) {
+                return false;
+            }
+            g0_str = "((([&&NHX:S=A],[&&NHX:S=A],[&&NHX:S=B])a,[&&NHX:S=I])b,[&&NHX:S=J])c";
+            g0 = ParserBasedPhylogenyFactory.getInstance().create( g0_str, new NHXParser() )[ 0 ];
+            g0.setRooted( true );
+            TaxonomyAssigner.execute( g0, s0 );
+            if ( !g0.getNode( "a" ).getNodeData().getTaxonomy().getScientificName().equals( "ABCD" ) ) {
+                return false;
+            }
+            if ( !g0.getNode( "b" ).getNodeData().getTaxonomy().getScientificName().equals( "ROOT" ) ) {
+                return false;
+            }
+            if ( !g0.getNode( "c" ).getNodeData().getTaxonomy().getScientificName().equals( "ROOT" ) ) {
+                return false;
+            }
+            g0_str = "(((([&&NHX:S=A],[&&NHX:S=B],[&&NHX:S=C],[&&NHX:S=D])a,"
+                    + "([&&NHX:S=D],[&&NHX:S=C],[&&NHX:S=B],[&&NHX:S=A])b)ab,"
+                    + "([&&NHX:S=L],[&&NHX:S=L],[&&NHX:S=L],[&&NHX:S=L])c)abc, "
+                    + "([&&NHX:S=L],[&&NHX:S=L],[&&NHX:S=L],[&&NHX:S=L])d)r";
+            g0 = ParserBasedPhylogenyFactory.getInstance().create( g0_str, new NHXParser() )[ 0 ];
+            g0.setRooted( true );
+            TaxonomyAssigner.execute( g0, s0 );
+            if ( !g0.getNode( "a" ).getNodeData().getTaxonomy().getScientificName().equals( "ABCD" ) ) {
+                return false;
+            }
+            if ( !g0.getNode( "b" ).getNodeData().getTaxonomy().getScientificName().equals( "ABCD" ) ) {
+                return false;
+            }
+            if ( !g0.getNode( "ab" ).getNodeData().getTaxonomy().getScientificName().equals( "ABCD" ) ) {
+                return false;
+            }
+            if ( !g0.getNode( "c" ).getNodeData().getTaxonomy().getScientificName().equals( "L" ) ) {
+                return false;
+            }
+            if ( !g0.getNode( "abc" ).getNodeData().getTaxonomy().getScientificName().equals( "ROOT" ) ) {
+                return false;
+            }
+            if ( !g0.getNode( "d" ).getNodeData().getTaxonomy().getScientificName().equals( "L" ) ) {
+                return false;
+            }
+            if ( !g0.getNode( "r" ).getNodeData().getTaxonomy().getScientificName().equals( "ROOT" ) ) {
+                return false;
+            }
+            g0_str = "(((([&&NHX:S=A],[&&NHX:S=A],[&&NHX:S=C],[&&NHX:S=D])a,"
+                    + "([&&NHX:S=D],[&&NHX:S=D],[&&NHX:S=B],[&&NHX:S=A])b)ab,"
+                    + "([&&NHX:S=L],[&&NHX:S=L],[&&NHX:S=L],[&&NHX:S=L])c)abc, "
+                    + "([&&NHX:S=L],[&&NHX:S=L],[&&NHX:S=L],[&&NHX:S=L])d)r";
+            g0 = ParserBasedPhylogenyFactory.getInstance().create( g0_str, new NHXParser() )[ 0 ];
+            g0.setRooted( true );
+            TaxonomyAssigner.execute( g0, s0 );
+            if ( !g0.getNode( "a" ).getNodeData().getTaxonomy().getScientificName().equals( "ABCD" ) ) {
+                return false;
+            }
+            if ( !g0.getNode( "b" ).getNodeData().getTaxonomy().getScientificName().equals( "ABCD" ) ) {
+                return false;
+            }
+            if ( !g0.getNode( "ab" ).getNodeData().getTaxonomy().getScientificName().equals( "ABCD" ) ) {
+                return false;
+            }
+            if ( !g0.getNode( "c" ).getNodeData().getTaxonomy().getScientificName().equals( "L" ) ) {
+                return false;
+            }
+            if ( !g0.getNode( "abc" ).getNodeData().getTaxonomy().getScientificName().equals( "ROOT" ) ) {
+                return false;
+            }
+            if ( !g0.getNode( "d" ).getNodeData().getTaxonomy().getScientificName().equals( "L" ) ) {
+                return false;
+            }
+            if ( !g0.getNode( "r" ).getNodeData().getTaxonomy().getScientificName().equals( "ROOT" ) ) {
+                return false;
+            }
+            g0_str = "(((([&&NHX:S=A],[&&NHX:S=A],[&&NHX:S=C],[&&NHX:S=D])a,"
+                    + "([&&NHX:S=D],[&&NHX:S=D],[&&NHX:S=B],[&&NHX:S=A])b)ab,"
+                    + "([&&NHX:S=L],[&&NHX:S=L],[&&NHX:S=L],[&&NHX:S=L])c)abc, "
+                    + "([&&NHX:S=L],[&&NHX:S=L],[&&NHX:S=L],[&&NHX:S=A])d)r";
+            g0 = ParserBasedPhylogenyFactory.getInstance().create( g0_str, new NHXParser() )[ 0 ];
+            g0.setRooted( true );
+            TaxonomyAssigner.execute( g0, s0 );
+            if ( !g0.getNode( "a" ).getNodeData().getTaxonomy().getScientificName().equals( "ABCD" ) ) {
+                return false;
+            }
+            if ( !g0.getNode( "b" ).getNodeData().getTaxonomy().getScientificName().equals( "ABCD" ) ) {
+                return false;
+            }
+            if ( !g0.getNode( "ab" ).getNodeData().getTaxonomy().getScientificName().equals( "ABCD" ) ) {
+                return false;
+            }
+            if ( !g0.getNode( "c" ).getNodeData().getTaxonomy().getScientificName().equals( "L" ) ) {
+                return false;
+            }
+            if ( !g0.getNode( "abc" ).getNodeData().getTaxonomy().getScientificName().equals( "ROOT" ) ) {
+                return false;
+            }
+            if ( !g0.getNode( "d" ).getNodeData().getTaxonomy().getScientificName().equals( "ROOT" ) ) {
+                return false;
+            }
+            if ( !g0.getNode( "r" ).getNodeData().getTaxonomy().getScientificName().equals( "ROOT" ) ) {
+                return false;
+            }
+            g0_str = "(((([&&NHX:S=A],[&&NHX:S=A],[&&NHX:S=C],[&&NHX:S=D])a,"
+                    + "([&&NHX:S=D],[&&NHX:S=D],[&&NHX:S=B],[&&NHX:S=A])b)ab,"
+                    + "([&&NHX:S=A],[&&NHX:S=A],[&&NHX:S=A],[&&NHX:S=A])c)abc, "
+                    + "([&&NHX:S=L],[&&NHX:S=L],[&&NHX:S=L],[&&NHX:S=A])d)r";
+            g0 = ParserBasedPhylogenyFactory.getInstance().create( g0_str, new NHXParser() )[ 0 ];
+            g0.setRooted( true );
+            TaxonomyAssigner.execute( g0, s0 );
+            if ( !g0.getNode( "a" ).getNodeData().getTaxonomy().getScientificName().equals( "ABCD" ) ) {
+                return false;
+            }
+            if ( !g0.getNode( "b" ).getNodeData().getTaxonomy().getScientificName().equals( "ABCD" ) ) {
+                return false;
+            }
+            if ( !g0.getNode( "ab" ).getNodeData().getTaxonomy().getScientificName().equals( "ABCD" ) ) {
+                return false;
+            }
+            if ( !g0.getNode( "c" ).getNodeData().getTaxonomy().getScientificName().equals( "A" ) ) {
+                return false;
+            }
+            if ( !g0.getNode( "abc" ).getNodeData().getTaxonomy().getScientificName().equals( "ABCD" ) ) {
+                return false;
+            }
+            if ( !g0.getNode( "d" ).getNodeData().getTaxonomy().getScientificName().equals( "ROOT" ) ) {
+                return false;
+            }
+            if ( !g0.getNode( "r" ).getNodeData().getTaxonomy().getScientificName().equals( "ROOT" ) ) {
+                return false;
+            }
+            s0_str = "(([&&NHX:S=A],[&&NHX:S=B],[&&NHX:S=C],[&&NHX:S=D]),"
+                    + "([&&NHX:S=E],[&&NHX:S=F],[&&NHX:S=G],[&&NHX:S=H]),"
+                    + "([&&NHX:S=I],[&&NHX:S=J],[&&NHX:S=K],[&&NHX:S=L]), "
+                    + "([&&NHX:S=M],[&&NHX:S=N],[&&NHX:S=O],[&&NHX:S=P]))";
+            s0 = ParserBasedPhylogenyFactory.getInstance().create( s0_str, new NHXParser() )[ 0 ];
+            s0.setRooted( true );
+            g0_str = "(((([&&NHX:S=A],[&&NHX:S=A],[&&NHX:S=C],[&&NHX:S=D])a,"
+                    + "([&&NHX:S=D],[&&NHX:S=D],[&&NHX:S=B],[&&NHX:S=A])b)ab,"
+                    + "([&&NHX:S=A],[&&NHX:S=A],[&&NHX:S=A],[&&NHX:S=A])c)abc, "
+                    + "([&&NHX:S=L],[&&NHX:S=L],[&&NHX:S=L],[&&NHX:S=A])d)r";
+            g0 = ParserBasedPhylogenyFactory.getInstance().create( g0_str, new NHXParser() )[ 0 ];
+            g0.setRooted( true );
+            TaxonomyAssigner.execute( g0, s0 );
+            if ( g0.getNode( "a" ).getNodeData().isHasTaxonomy() ) {
+                return false;
+            }
+            if ( !g0.getNode( "c" ).getNodeData().getTaxonomy().getScientificName().equals( "A" ) ) {
+                return false;
+            }
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace( System.out );
+            return false;
+        }
+        return true;
+    }
+
+    private static boolean testUniprotTaxonomySearch() {
+        try {
+            List<UniProtTaxonomy> results = UniProtWsTools
+                    .getTaxonomiesFromCommonNameStrict( "starlet sea anemone", 10 );
+            if ( results.size() != 1 ) {
+                return false;
+            }
+            if ( !results.get( 0 ).getCode().equals( "NEMVE" ) ) {
+                return false;
+            }
+            if ( !results.get( 0 ).getCommonName().equalsIgnoreCase( "starlet sea anemone" ) ) {
+                return false;
+            }
+            if ( !results.get( 0 ).getId().equalsIgnoreCase( "45351" ) ) {
+                return false;
+            }
+            if ( !results.get( 0 ).getRank().equalsIgnoreCase( "species" ) ) {
+                return false;
+            }
+            if ( !results.get( 0 ).getScientificName().equals( "Nematostella vectensis" ) ) {
+                return false;
+            }
+            results = null;
+            results = UniProtWsTools.getTaxonomiesFromScientificNameStrict( "Nematostella vectensis", 10 );
+            if ( results.size() != 1 ) {
+                return false;
+            }
+            if ( !results.get( 0 ).getCode().equals( "NEMVE" ) ) {
+                return false;
+            }
+            if ( !results.get( 0 ).getCommonName().equalsIgnoreCase( "starlet sea anemone" ) ) {
+                return false;
+            }
+            if ( !results.get( 0 ).getId().equalsIgnoreCase( "45351" ) ) {
+                return false;
+            }
+            if ( !results.get( 0 ).getRank().equalsIgnoreCase( "species" ) ) {
+                return false;
+            }
+            if ( !results.get( 0 ).getScientificName().equals( "Nematostella vectensis" ) ) {
+                return false;
+            }
+            results = null;
+            results = UniProtWsTools.getTaxonomiesFromId( "45351", 10 );
+            if ( results.size() != 1 ) {
+                return false;
+            }
+            if ( !results.get( 0 ).getCode().equals( "NEMVE" ) ) {
+                return false;
+            }
+            if ( !results.get( 0 ).getCommonName().equalsIgnoreCase( "starlet sea anemone" ) ) {
+                return false;
+            }
+            if ( !results.get( 0 ).getId().equalsIgnoreCase( "45351" ) ) {
+                return false;
+            }
+            if ( !results.get( 0 ).getRank().equalsIgnoreCase( "species" ) ) {
+                return false;
+            }
+            if ( !results.get( 0 ).getScientificName().equals( "Nematostella vectensis" ) ) {
+                return false;
+            }
+            results = null;
+            results = UniProtWsTools.getTaxonomiesFromTaxonomyCode( "NEMVE", 10 );
+            if ( results.size() != 1 ) {
+                return false;
+            }
+            if ( !results.get( 0 ).getCode().equals( "NEMVE" ) ) {
+                return false;
+            }
+            if ( !results.get( 0 ).getCommonName().equalsIgnoreCase( "starlet sea anemone" ) ) {
+                return false;
+            }
+            if ( !results.get( 0 ).getId().equalsIgnoreCase( "45351" ) ) {
+                return false;
+            }
+            if ( !results.get( 0 ).getRank().equalsIgnoreCase( "species" ) ) {
+                return false;
+            }
+            if ( !results.get( 0 ).getScientificName().equals( "Nematostella vectensis" ) ) {
+                return false;
+            }
+            if ( !results.get( 0 ).getLineage()[ 0 ].equals( "Eukaryota" ) ) {
+                return false;
+            }
+            if ( !results.get( 0 ).getLineage()[ 1 ].equals( "Metazoa" ) ) {
+                return false;
+            }
+            if ( !results.get( 0 ).getLineage()[ results.get( 0 ).getLineage().length - 1 ].equals( "Nematostella" ) ) {
+                return false;
+            }
+        }
+        catch ( final Exception e ) {
+            System.out.println();
+            System.out.println( "the following might be due to absence internet connection:" );
+            e.printStackTrace( System.out );
+            return false;
+        }
+        return true;
+    }
+
+    private static boolean testWabiTxSearch() {
+        try {
+            String result = "";
+            result = TxSearch.searchSimple( "nematostella" );
+            result = TxSearch.getTxId( "nematostella" );
+            if ( !result.equals( "45350" ) ) {
+                return false;
+            }
+            result = TxSearch.getTxName( "45350" );
+            if ( !result.equals( "Nematostella" ) ) {
+                return false;
+            }
+            result = TxSearch.getTxId( "nematostella vectensis" );
+            if ( !result.equals( "45351" ) ) {
+                return false;
+            }
+            result = TxSearch.getTxName( "45351" );
+            if ( !result.equals( "Nematostella vectensis" ) ) {
+                return false;
+            }
+            result = TxSearch.getTxId( "Bacillus subtilis subsp. subtilis str. N170" );
+            if ( !result.equals( "536089" ) ) {
+                return false;
+            }
+            result = TxSearch.getTxName( "536089" );
+            if ( !result.equals( "Bacillus subtilis subsp. subtilis str. N170" ) ) {
+                return false;
+            }
+            final List<String> queries = new ArrayList<String>();
+            queries.add( "Campylobacter coli" );
+            queries.add( "Escherichia coli" );
+            queries.add( "Arabidopsis" );
+            queries.add( "Trichoplax" );
+            queries.add( "Samanea saman" );
+            queries.add( "Kluyveromyces marxianus" );
+            queries.add( "Bacillus subtilis subsp. subtilis str. N170" );
+            queries.add( "Bornavirus parrot/PDD/2008" );
+            final List<RANKS> ranks = new ArrayList<RANKS>();
+            ranks.add( RANKS.SUPERKINGDOM );
+            ranks.add( RANKS.KINGDOM );
+            ranks.add( RANKS.FAMILY );
+            ranks.add( RANKS.GENUS );
+            ranks.add( RANKS.TRIBE );
+            result = TxSearch.searchLineage( queries, ranks );
+            result = TxSearch.searchParam( "Homo sapiens", TAX_NAME_CLASS.ALL, TAX_RANK.SPECIES, 10, true );
+            result = TxSearch.searchParam( "Samanea saman", TAX_NAME_CLASS.SCIENTIFIC_NAME, TAX_RANK.ALL, 10, true );
+        }
+        catch ( final Exception e ) {
+            System.out.println();
+            System.out.println( "the following might be due to absence internet connection:" );
+            e.printStackTrace( System.out );
+            return false;
+        }
+        return true;
+    }
+
+    private static boolean testAminoAcidSequence() {
+        try {
+            final Sequence aa1 = BasicSequence.createAaSequence( "aa1", "aAklm-?xX*z$#" );
+            if ( aa1.getLength() != 13 ) {
+                return false;
+            }
+            if ( aa1.getResidueAt( 0 ) != 'A' ) {
+                return false;
+            }
+            if ( aa1.getResidueAt( 2 ) != 'K' ) {
+                return false;
+            }
+            if ( !new String( aa1.getMolecularSequence() ).equals( "AAKLM-XXX*ZXX" ) ) {
+                return false;
+            }
+            final Sequence aa2 = BasicSequence.createAaSequence( "aa3", "ARNDCQEGHILKMFPSTWYVX*-BZOJU" );
+            if ( !new String( aa2.getMolecularSequence() ).equals( "ARNDCQEGHILKMFPSTWYVX*-BZXXU" ) ) {
+                return false;
+            }
+            final Sequence dna1 = BasicSequence.createDnaSequence( "dna1", "ACGTUX*-?RYMKWSN" );
+            if ( !new String( dna1.getMolecularSequence() ).equals( "ACGTNN*-NRYMKWSN" ) ) {
+                return false;
+            }
+            final Sequence rna1 = BasicSequence.createRnaSequence( "rna1", "..ACGUTX*-?RYMKWSN" );
+            if ( !new String( rna1.getMolecularSequence() ).equals( "--ACGUNN*-NRYMKWSN" ) ) {
+                return false;
+            }
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace();
+            return false;
+        }
+        return true;
+    }
+
+    private static boolean testCreateBalancedPhylogeny() {
+        try {
+            final Phylogeny p0 = DevelopmentTools.createBalancedPhylogeny( 6, 5 );
+            if ( p0.getRoot().getNumberOfDescendants() != 5 ) {
+                return false;
+            }
+            if ( p0.getNumberOfExternalNodes() != 15625 ) {
+                return false;
+            }
+            final Phylogeny p1 = DevelopmentTools.createBalancedPhylogeny( 2, 10 );
+            if ( p1.getRoot().getNumberOfDescendants() != 10 ) {
+                return false;
+            }
+            if ( p1.getNumberOfExternalNodes() != 100 ) {
+                return false;
+            }
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace();
+            return false;
+        }
+        return true;
+    }
+
+    private static boolean testFastaParser() {
+        try {
+            if ( !FastaParser.isLikelyFasta( new FileInputStream( PATH_TO_TEST_DATA + "fasta_0.fasta" ) ) ) {
+                return false;
+            }
+            if ( FastaParser.isLikelyFasta( new FileInputStream( PATH_TO_TEST_DATA + "msa_3.txt" ) ) ) {
+                return false;
+            }
+            final Msa msa_0 = FastaParser.parseMsa( new FileInputStream( PATH_TO_TEST_DATA + "fasta_0.fasta" ) );
+            if ( !msa_0.getSequenceAsString( 0 ).toString().equalsIgnoreCase( "ACGTGKXFMFDMXEXXXSFMFMF" ) ) {
+                return false;
+            }
+            if ( !msa_0.getIdentifier( 0 ).equals( "one dumb" ) ) {
+                return false;
+            }
+            if ( !msa_0.getSequenceAsString( 1 ).toString().equalsIgnoreCase( "DKXASDFXSFXFKFKSXDFKSLX" ) ) {
+                return false;
+            }
+            if ( !msa_0.getSequenceAsString( 2 ).toString().equalsIgnoreCase( "SXDFKSXLFSFPWEXPRXWXERR" ) ) {
+                return false;
+            }
+            if ( !msa_0.getSequenceAsString( 3 ).toString().equalsIgnoreCase( "AAAAAAAAAAAAAAAAAAAAAAA" ) ) {
+                return false;
+            }
+            if ( !msa_0.getSequenceAsString( 4 ).toString().equalsIgnoreCase( "DDDDDDDDDDDDDDDDDDDDAXF" ) ) {
+                return false;
+            }
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace();
+            return false;
+        }
+        return true;
+    }
+
+    private static boolean testGeneralMsaParser() {
+        try {
+            final String msa_str_0 = "seq1 abcd\n\nseq2 efgh\n";
+            final Msa msa_0 = GeneralMsaParser.parse( new ByteArrayInputStream( msa_str_0.getBytes() ) );
+            final String msa_str_1 = "seq_1 abc\nseq2 ghi\nseq_1 def\nseq2 jkm\n";
+            final Msa msa_1 = GeneralMsaParser.parse( new ByteArrayInputStream( msa_str_1.getBytes() ) );
+            final String msa_str_2 = "seq1 abc\nseq2 ghi\n\ndef\njkm\n";
+            final Msa msa_2 = GeneralMsaParser.parse( new ByteArrayInputStream( msa_str_2.getBytes() ) );
+            final String msa_str_3 = "seq1 abc\n def\nseq2 ghi\n jkm\n";
+            final Msa msa_3 = GeneralMsaParser.parse( new ByteArrayInputStream( msa_str_3.getBytes() ) );
+            final Msa msa_4 = GeneralMsaParser.parse( new FileInputStream( PATH_TO_TEST_DATA + "msa_1.txt" ) );
+            if ( !msa_4.getSequenceAsString( 0 ).toString().equalsIgnoreCase( "abcdefeeeeeeeexx" ) ) {
+                return false;
+            }
+            if ( !msa_4.getSequenceAsString( 1 ).toString().equalsIgnoreCase( "efghixffffffffyy" ) ) {
+                return false;
+            }
+            if ( !msa_4.getSequenceAsString( 2 ).toString().equalsIgnoreCase( "klmnxphhhhhhhhzz" ) ) {
+                return false;
+            }
+            final Msa msa_5 = GeneralMsaParser.parse( new FileInputStream( PATH_TO_TEST_DATA + "msa_2.txt" ) );
+            if ( !msa_5.getSequenceAsString( 0 ).toString().equalsIgnoreCase( "abcdefxx" ) ) {
+                return false;
+            }
+            if ( !msa_5.getSequenceAsString( 1 ).toString().equalsIgnoreCase( "efghixyy" ) ) {
+                return false;
+            }
+            if ( !msa_5.getSequenceAsString( 2 ).toString().equalsIgnoreCase( "klmnxpzz" ) ) {
+                return false;
+            }
+            final Msa msa_6 = GeneralMsaParser.parse( new FileInputStream( PATH_TO_TEST_DATA + "msa_3.txt" ) );
+            if ( !msa_6.getSequenceAsString( 0 ).toString().equalsIgnoreCase( "abcdefeeeeeeeexx" ) ) {
+                return false;
+            }
+            if ( !msa_6.getSequenceAsString( 1 ).toString().equalsIgnoreCase( "efghixffffffffyy" ) ) {
+                return false;
+            }
+            if ( !msa_6.getSequenceAsString( 2 ).toString().equalsIgnoreCase( "klmnxphhhhhhhhzz" ) ) {
+                return false;
+            }
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace();
+            return false;
+        }
+        return true;
+    }
+
+    private static boolean testMafft() {
+        try {
+            final List<String> opts = new ArrayList<String>();
+            opts.add( "--maxiterate" );
+            opts.add( "1000" );
+            opts.add( "--localpair" );
+            opts.add( "--quiet" );
+            Msa msa = null;
+            final MsaInferrer mafft = Mafft.createInstance();
+            msa = mafft.infer( new File( PATH_TO_TEST_DATA + "ncbi.fasta" ), opts );
+            if ( ( msa == null ) || ( msa.getLength() < 10 ) || ( msa.getNumberOfSequences() != 19 ) ) {
+                return false;
+            }
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace( System.out );
+            return false;
+        }
+        return true;
+    }
+}
diff --git a/forester/java/src/org/forester/tools/ConfidenceAssessor.java b/forester/java/src/org/forester/tools/ConfidenceAssessor.java

new file mode 100644 (file)

index 0000000..7fc7c90
--- /dev/null
+++ b/forester/java/src/org/forester/tools/ConfidenceAssessor.java
@@ -0,0 +1,178 @@
+// $Id:
+//
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.tools;
+
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import org.forester.phylogeny.Phylogeny;
+import org.forester.phylogeny.PhylogenyNode;
+import org.forester.phylogeny.data.Confidence;
+import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
+import org.forester.util.ForesterUtil;
+
+public final class ConfidenceAssessor {
+
+    private ConfidenceAssessor() {
+        // Hidden constructor.
+    }
+
+    private final static void addExternalNodesToMap( final Map<PhylogenyNode, Set<PhylogenyNode>> node_to_ext_nodes_map,
+                                                     final PhylogenyNode node ) {
+        final Set<PhylogenyNode> ex_descs = new HashSet<PhylogenyNode>();
+        for( final PhylogenyNode n : node.getAllExternalDescendants() ) {
+            if ( ex_descs.contains( n ) ) {
+                throw new IllegalArgumentException( "node [" + n.toString() + "] of target is not unique" );
+            }
+            ex_descs.add( n );
+        }
+        node_to_ext_nodes_map.put( node, ex_descs );
+    }
+
+    private final static void checkPreconditions( final String confidence_type,
+                                                  final Phylogeny[] evaluators,
+                                                  final Phylogeny target,
+                                                  final double value,
+                                                  final int first,
+                                                  final int last ) {
+        if ( ( first < 0 ) || ( last < 0 ) ) {
+            throw new IllegalArgumentException( "attempt to set first or last evaluator topology to use to a number less than zero" );
+        }
+        if ( evaluators.length < 1 ) {
+            throw new IllegalArgumentException( "need at least one evaluator topology" );
+        }
+        if ( ForesterUtil.isEmpty( confidence_type ) ) {
+            throw new IllegalArgumentException( "attempt to use empty confidence type" );
+        }
+        if ( value <= 0 ) {
+            throw new IllegalArgumentException( "attempt to use zero or negative \'count value\'" );
+        }
+        if ( ( first != 0 ) || ( last != 0 ) ) {
+            if ( ( last >= evaluators.length ) || ( last <= first ) ) {
+                throw new IllegalArgumentException( "illegal value for last evaluator topology to use" );
+            }
+        }
+        final Set<PhylogenyNode> nodes = new HashSet<PhylogenyNode>();
+        for( final PhylogenyNodeIterator it = target.iteratorPostorder(); it.hasNext(); ) {
+            final PhylogenyNode node = it.next();
+            if ( nodes.contains( node ) ) {
+                throw new IllegalArgumentException( "node [" + node + "] in target is not unique" );
+            }
+            nodes.add( node );
+            final List<Confidence> confidences = node.getBranchData().getConfidences();
+            for( final Confidence confidence : confidences ) {
+                if ( confidence.getType().equals( confidence_type ) ) {
+                    throw new IllegalArgumentException( "confidence [" + confidence_type
+                            + "] is already present in target" );
+                }
+            }
+        }
+    }
+
+    public final static void evaluate( final String confidence_type,
+                                       final Phylogeny[] evaluators,
+                                       final Phylogeny target,
+                                       final boolean strict,
+                                       final double value ) {
+        evaluate( confidence_type, evaluators, target, strict, value, 0, 0 );
+    }
+
+    public final static void evaluate( final String confidence_type,
+                                       final Phylogeny[] evaluators,
+                                       final Phylogeny target,
+                                       final boolean strict,
+                                       final double value,
+                                       final int first,
+                                       final int last ) {
+        try {
+            checkPreconditions( confidence_type, evaluators, target, value, first, last );
+        }
+        catch ( final IllegalArgumentException e ) {
+            throw e;
+        }
+        boolean all = true;
+        if ( ( first != 0 ) || ( last != 0 ) ) {
+            all = false;
+        }
+        int counter = 0;
+        final Map<PhylogenyNode, Set<PhylogenyNode>> node_to_ext_nodes_map = new HashMap<PhylogenyNode, Set<PhylogenyNode>>();
+        for( final Phylogeny evaluator : evaluators ) {
+            if ( all || ( ( counter >= first ) && ( counter <= last ) ) ) {
+                if ( strict ) {
+                    if ( evaluator.getNumberOfExternalNodes() != target.getNumberOfExternalNodes() ) {
+                        throw new IllegalArgumentException( "evaluator #" + counter
+                                + " does not have the same number of external nodes ["
+                                + evaluator.getNumberOfExternalNodes() + "] than the corresponding target ["
+                                + target.getNumberOfExternalNodes() + "]" );
+                    }
+                }
+                final TreeSplitMatrix s = new TreeSplitMatrix( evaluator, strict, target );
+                for( final PhylogenyNodeIterator it = target.iteratorPostorder(); it.hasNext(); ) {
+                    final PhylogenyNode node = it.next();
+                    if ( !node.isExternal() && !node.isRoot() ) {
+                        if ( node.getParent().isRoot()
+                                && ( target.getRoot().getNumberOfDescendants() == 2 )
+                                && ( target.getRoot().getChildNode1().isExternal() || target.getRoot().getChildNode2()
+                                        .isExternal() ) ) {
+                            continue;
+                        }
+                        if ( !node_to_ext_nodes_map.containsKey( node ) ) {
+                            addExternalNodesToMap( node_to_ext_nodes_map, node );
+                        }
+                        final Set<PhylogenyNode> ex_descs = node_to_ext_nodes_map.get( node );
+                        final Confidence c = ConfidenceAssessor.obtainConfidence( node, confidence_type );
+                        if ( s.match( ex_descs ) ) {
+                            c.setValue( c.getValue() + value );
+                        }
+                    }
+                }
+            }
+            ++counter;
+        }
+    }
+
+    private final static Confidence obtainConfidence( final PhylogenyNode n, final String confidence_type ) {
+        final List<Confidence> confidences = n.getBranchData().getConfidences();
+        Confidence match = null;
+        for( final Confidence confidence : confidences ) {
+            if ( confidence.getType().equals( confidence_type ) ) {
+                if ( match != null ) {
+                    throw new IllegalArgumentException( "confidence [" + confidence_type + "] is not unique" );
+                }
+                match = confidence;
+            }
+        }
+        if ( match == null ) {
+            match = new Confidence( 0, confidence_type );
+            confidences.add( match );
+        }
+        return match;
+    }
+}
diff --git a/forester/java/src/org/forester/tools/PhylogenyDecorator.java b/forester/java/src/org/forester/tools/PhylogenyDecorator.java

new file mode 100644 (file)

index 0000000..086e51a
--- /dev/null
+++ b/forester/java/src/org/forester/tools/PhylogenyDecorator.java
@@ -0,0 +1,525 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.tools;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.forester.io.parsers.nhx.NHXFormatException;
+import org.forester.phylogeny.Phylogeny;
+import org.forester.phylogeny.PhylogenyNode;
+import org.forester.phylogeny.data.Accession;
+import org.forester.phylogeny.data.Annotation;
+import org.forester.phylogeny.data.DomainArchitecture;
+import org.forester.phylogeny.data.Identifier;
+import org.forester.phylogeny.data.Sequence;
+import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
+import org.forester.util.BasicTable;
+import org.forester.util.BasicTableParser;
+import org.forester.util.ForesterUtil;
+
+public final class PhylogenyDecorator {
+
+    // From evoruby/lib/evo/apps/tseq_taxonomy_processor.rb:
+    final private static String  TP_TAXONOMY_CODE                   = "TAXONOMY_CODE";
+    final private static String  TP_TAXONOMY_ID                     = "TAXONOMY_ID";
+    final private static String  TP_TAXONOMY_ID_PROVIDER            = "TAXONOMY_ID_PROVIDER";
+    final private static String  TP_TAXONOMY_SN                     = "TAXONOMY_SN";
+    final private static String  TP_TAXONOMY_CN                     = "TAXONOMY_CN";
+    final private static String  TP_TAXONOMY_SYN                    = "TAXONOMY_SYN";
+    final private static String  TP_SEQ_SYMBOL                      = "SEQ_SYMBOL";
+    final private static String  TP_SEQ_ACCESSION                   = "SEQ_ACCESSION";
+    final private static String  TP_SEQ_ACCESSION_SOURCE            = "SEQ_ACCESSION_SOURCE";
+    final private static String  TP_SEQ_ANNOTATION_DESC             = "SEQ_ANNOTATION_DESC";
+    final private static String  TP_SEQ_ANNOTATION_REF              = "SEQ_ANNOTATION_REF";
+    final private static String  TP_SEQ_MOL_SEQ                     = "SEQ_MOL_SEQ";
+    final private static String  TP_SEQ_NAME                        = "SEQ_NAME";
+    final private static String  TP_NODE_NAME                       = "NODE_NAME";
+    final private static Pattern NODENAME_SEQNUMBER_TAXDOMAINNUMBER = Pattern
+                                                                            .compile( "^([a-fA-Z0-9]{1,5})_([A-Z0-9]{2,4}[A-Z])(\\d{1,4})$" );
+    public final static boolean  SANITIZE                           = false;
+    public final static boolean  VERBOSE                            = true;
+
+    private PhylogenyDecorator() {
+        // Not needed.
+    }
+
+    public static void decorate( final Phylogeny phylogeny,
+                                 final Map<String, Map<String, String>> map,
+                                 final boolean picky,
+                                 final int numbers_of_chars_allowed_to_remove_if_not_found_in_map )
+            throws IllegalArgumentException {
+        for( final PhylogenyNodeIterator iter = phylogeny.iteratorPostorder(); iter.hasNext(); ) {
+            final PhylogenyNode node = iter.next();
+            final String name = node.getName();
+            if ( !ForesterUtil.isEmpty( name ) ) {
+                if ( map.containsKey( name ) || ( numbers_of_chars_allowed_to_remove_if_not_found_in_map > 0 ) ) {
+                    Map<String, String> new_values = map.get( name );
+                    int x = 0;
+                    while ( ( new_values == null ) && ( numbers_of_chars_allowed_to_remove_if_not_found_in_map > 0 )
+                            && ( x <= numbers_of_chars_allowed_to_remove_if_not_found_in_map ) ) {
+                        new_values = map.get( name.substring( 0, name.length() - x ) );
+                        ++x;
+                    }
+                    if ( new_values != null ) {
+                        if ( new_values.containsKey( TP_TAXONOMY_CODE ) ) {
+                            ForesterUtil.ensurePresenceOfTaxonomy( node );
+                            node.getNodeData().getTaxonomy().setTaxonomyCode( new_values.get( TP_TAXONOMY_CODE ) );
+                        }
+                        if ( new_values.containsKey( TP_TAXONOMY_ID )
+                                && new_values.containsKey( TP_TAXONOMY_ID_PROVIDER ) ) {
+                            ForesterUtil.ensurePresenceOfTaxonomy( node );
+                            node.getNodeData().getTaxonomy().setIdentifier( new Identifier( new_values
+                                    .get( TP_TAXONOMY_ID ), new_values.get( TP_TAXONOMY_ID_PROVIDER ) ) );
+                        }
+                        else if ( new_values.containsKey( TP_TAXONOMY_ID ) ) {
+                            ForesterUtil.ensurePresenceOfTaxonomy( node );
+                            node.getNodeData().getTaxonomy().setIdentifier( new Identifier( new_values
+                                    .get( TP_TAXONOMY_ID ) ) );
+                        }
+                        if ( new_values.containsKey( TP_TAXONOMY_SN ) ) {
+                            ForesterUtil.ensurePresenceOfTaxonomy( node );
+                            node.getNodeData().getTaxonomy().setScientificName( new_values.get( TP_TAXONOMY_SN ) );
+                        }
+                        if ( new_values.containsKey( TP_TAXONOMY_CN ) ) {
+                            ForesterUtil.ensurePresenceOfTaxonomy( node );
+                            node.getNodeData().getTaxonomy().setCommonName( new_values.get( TP_TAXONOMY_CN ) );
+                        }
+                        if ( new_values.containsKey( TP_TAXONOMY_SYN ) ) {
+                            ForesterUtil.ensurePresenceOfTaxonomy( node );
+                            node.getNodeData().getTaxonomy().getSynonyms().add( new_values.get( TP_TAXONOMY_SYN ) );
+                        }
+                        if ( new_values.containsKey( TP_SEQ_ACCESSION )
+                                && new_values.containsKey( TP_SEQ_ACCESSION_SOURCE ) ) {
+                            ForesterUtil.ensurePresenceOfSequence( node );
+                            node.getNodeData().getSequence().setAccession( new Accession( new_values
+                                    .get( TP_SEQ_ACCESSION ), new_values.get( TP_SEQ_ACCESSION_SOURCE ) ) );
+                        }
+                        if ( new_values.containsKey( TP_SEQ_ANNOTATION_DESC ) ) {
+                            ForesterUtil.ensurePresenceOfSequence( node );
+                            final Annotation ann = new Annotation( "?" );
+                            ann.setDesc( new_values.get( TP_SEQ_ANNOTATION_DESC ) );
+                            node.getNodeData().getSequence().addAnnotation( ann );
+                        }
+                        if ( new_values.containsKey( TP_SEQ_ANNOTATION_REF ) ) {
+                            ForesterUtil.ensurePresenceOfSequence( node );
+                            final Annotation ann = new Annotation( new_values.get( TP_SEQ_ANNOTATION_REF ) );
+                            node.getNodeData().getSequence().addAnnotation( ann );
+                        }
+                        if ( new_values.containsKey( TP_SEQ_SYMBOL ) ) {
+                            ForesterUtil.ensurePresenceOfSequence( node );
+                            node.getNodeData().getSequence().setSymbol( new_values.get( TP_SEQ_SYMBOL ) );
+                        }
+                        if ( new_values.containsKey( TP_SEQ_NAME ) ) {
+                            ForesterUtil.ensurePresenceOfSequence( node );
+                            node.getNodeData().getSequence().setName( new_values.get( TP_SEQ_NAME ) );
+                        }
+                        if ( new_values.containsKey( TP_SEQ_MOL_SEQ ) ) {
+                            ForesterUtil.ensurePresenceOfSequence( node );
+                            node.getNodeData().getSequence().setMolecularSequence( new_values.get( TP_SEQ_MOL_SEQ ) );
+                        }
+                        if ( new_values.containsKey( TP_NODE_NAME ) ) {
+                            node.setName( new_values.get( TP_NODE_NAME ) );
+                        }
+                    }
+                }
+                else if ( picky ) {
+                    throw new IllegalArgumentException( "\"" + name + "\" not found in name map" );
+                }
+            }
+        }
+    }
+
+    /**
+     * 
+     * 
+     * 
+     * 
+     * 
+     * @param phylogeny
+     * @param map
+     *            maps names (in phylogeny) to new values
+     * @param field
+     * @param picky
+     * @throws IllegalArgumentException
+     * @throws NHXFormatException
+     */
+    public static void decorate( final Phylogeny phylogeny,
+                                 final Map<String, String> map,
+                                 final FIELD field,
+                                 final boolean extract_bracketed_scientific_name,
+                                 final boolean picky,
+                                 final boolean cut_name_after_space,
+                                 final boolean process_name_intelligently,
+                                 final boolean process_similar_to,
+                                 final int numbers_of_chars_allowed_to_remove_if_not_found_in_map,
+                                 final boolean move_domain_numbers_at_end_to_middle ) throws IllegalArgumentException,
+            NHXFormatException {
+        PhylogenyDecorator.decorate( phylogeny,
+                                     map,
+                                     field,
+                                     extract_bracketed_scientific_name,
+                                     picky,
+                                     null,
+                                     cut_name_after_space,
+                                     process_name_intelligently,
+                                     process_similar_to,
+                                     numbers_of_chars_allowed_to_remove_if_not_found_in_map,
+                                     move_domain_numbers_at_end_to_middle );
+    }
+
+    /**
+     * 
+     * 
+     * 
+     * @param phylogeny
+     * @param map
+     *            maps names (in phylogeny) to new values if intermediate_map is
+     *            null otherwise maps intermediate value to new value
+     * @param field
+     * @param picky
+     * @param intermediate_map
+     *            maps name (in phylogeny) to a intermediate value
+     * @throws IllegalArgumentException
+     */
+    public static void decorate( final Phylogeny phylogeny,
+                                 final Map<String, String> map,
+                                 final FIELD field,
+                                 final boolean extract_bracketed_scientific_name,
+                                 final boolean picky,
+                                 final Map<String, String> intermediate_map,
+                                 final boolean cut_name_after_space,
+                                 final boolean process_name_intelligently,
+                                 final boolean process_similar_to,
+                                 final int numbers_of_chars_allowed_to_remove_if_not_found_in_map,
+                                 final boolean move_domain_numbers_at_end_to_middle ) throws IllegalArgumentException {
+        if ( extract_bracketed_scientific_name && ( field == FIELD.TAXONOMY_SCIENTIFIC_NAME ) ) {
+            throw new IllegalArgumentException( "Attempt to extract bracketed scientific name together with data field pointing to scientific name" );
+        }
+        for( final PhylogenyNodeIterator iter = phylogeny.iteratorPostorder(); iter.hasNext(); ) {
+            final PhylogenyNode node = iter.next();
+            String name = node.getName();
+            if ( !ForesterUtil.isEmpty( name ) ) {
+                if ( intermediate_map != null ) {
+                    name = PhylogenyDecorator.extractIntermediate( intermediate_map, name );
+                }
+                if ( map.containsKey( name ) || ( numbers_of_chars_allowed_to_remove_if_not_found_in_map > 0 ) ) {
+                    String new_value = map.get( name );
+                    int x = 0;
+                    while ( ( new_value == null ) && ( numbers_of_chars_allowed_to_remove_if_not_found_in_map > 0 )
+                            && ( x <= numbers_of_chars_allowed_to_remove_if_not_found_in_map ) ) {
+                        new_value = map.get( name.substring( 0, name.length() - x ) );
+                        ++x;
+                    }
+                    if ( new_value != null ) {
+                        new_value = new_value.trim();
+                        new_value.replaceAll( "/\\s+/", " " );
+                        if ( extract_bracketed_scientific_name && new_value.endsWith( "]" ) ) {
+                            extractBracketedScientificNames( node, new_value );
+                        }
+                        switch ( field ) {
+                            case SEQUENCE_ANNOTATION_DESC:
+                                if ( PhylogenyDecorator.VERBOSE ) {
+                                    System.out.println( name + ": " + new_value );
+                                }
+                                if ( !node.getNodeData().isHasSequence() ) {
+                                    node.getNodeData().setSequence( new Sequence() );
+                                }
+                                final Annotation annotation = new Annotation( "?" );
+                                annotation.setDesc( new_value );
+                                node.getNodeData().getSequence().addAnnotation( annotation );
+                                break;
+                            case DOMAIN_STRUCTURE:
+                                if ( PhylogenyDecorator.VERBOSE ) {
+                                    System.out.println( name + ": " + new_value );
+                                }
+                                if ( !node.getNodeData().isHasSequence() ) {
+                                    node.getNodeData().setSequence( new Sequence() );
+                                }
+                                node.getNodeData().getSequence()
+                                        .setDomainArchitecture( new DomainArchitecture( new_value ) );
+                                break;
+                            case TAXONOMY_CODE:
+                                if ( PhylogenyDecorator.VERBOSE ) {
+                                    System.out.println( name + ": " + new_value );
+                                }
+                                ForesterUtil.ensurePresenceOfTaxonomy( node );
+                                node.getNodeData().getTaxonomy().setTaxonomyCode( new_value );
+                                break;
+                            case TAXONOMY_SCIENTIFIC_NAME:
+                                if ( PhylogenyDecorator.VERBOSE ) {
+                                    System.out.println( name + ": " + new_value );
+                                }
+                                ForesterUtil.ensurePresenceOfTaxonomy( node );
+                                node.getNodeData().getTaxonomy().setScientificName( new_value );
+                                break;
+                            case SEQUENCE_NAME:
+                                if ( PhylogenyDecorator.VERBOSE ) {
+                                    System.out.println( name + ": " + new_value );
+                                }
+                                if ( !node.getNodeData().isHasSequence() ) {
+                                    node.getNodeData().setSequence( new Sequence() );
+                                }
+                                node.getNodeData().getSequence().setName( new_value );
+                                break;
+                            case NODE_NAME:
+                                if ( PhylogenyDecorator.VERBOSE ) {
+                                    System.out.print( name + " -> " );
+                                }
+                                if ( cut_name_after_space ) {
+                                    if ( PhylogenyDecorator.VERBOSE ) {
+                                        System.out.print( new_value + " -> " );
+                                    }
+                                    new_value = PhylogenyDecorator.deleteAtFirstSpace( new_value );
+                                }
+                                else if ( process_name_intelligently ) {
+                                    if ( PhylogenyDecorator.VERBOSE ) {
+                                        System.out.print( new_value + " -> " );
+                                    }
+                                    new_value = PhylogenyDecorator.processNameIntelligently( new_value );
+                                }
+                                else if ( process_similar_to ) {
+                                    if ( PhylogenyDecorator.VERBOSE ) {
+                                        System.out.print( new_value + " -> " );
+                                    }
+                                    new_value = PhylogenyDecorator.processSimilarTo( new_value );
+                                }
+                                if ( PhylogenyDecorator.SANITIZE ) {
+                                    new_value = PhylogenyDecorator.sanitize( new_value );
+                                }
+                                if ( PhylogenyDecorator.VERBOSE ) {
+                                    System.out.println( new_value );
+                                }
+                                node.setName( new_value );
+                                break;
+                            default:
+                                throw new RuntimeException( "unknown field \"" + field + "\"" );
+                        }
+                        if ( move_domain_numbers_at_end_to_middle && ( field != FIELD.NODE_NAME ) ) {
+                            node.setName( moveDomainNumbersAtEnd( node.getName() ) );
+                        }
+                    }
+                }
+                else if ( picky ) {
+                    throw new IllegalArgumentException( "\"" + name + "\" not found in name map" );
+                }
+            }
+        }
+    }
+
+    public static void decorate( final Phylogeny[] phylogenies,
+                                 final Map<String, Map<String, String>> map,
+                                 final boolean picky,
+                                 final int numbers_of_chars_allowed_to_remove_if_not_found_in_map )
+            throws IllegalArgumentException, NHXFormatException {
+        for( int i = 0; i < phylogenies.length; ++i ) {
+            PhylogenyDecorator.decorate( phylogenies[ i ],
+                                         map,
+                                         picky,
+                                         numbers_of_chars_allowed_to_remove_if_not_found_in_map );
+        }
+    }
+
+    public static void decorate( final Phylogeny[] phylogenies,
+                                 final Map<String, String> map,
+                                 final FIELD field,
+                                 final boolean extract_bracketed_scientific_name,
+                                 final boolean picky,
+                                 final boolean cut_name_after_space,
+                                 final boolean process_name_intelligently,
+                                 final boolean process_similar_to,
+                                 final int numbers_of_chars_allowed_to_remove_if_not_found_in_map,
+                                 final boolean move_domain_numbers_at_end_to_middle ) throws IllegalArgumentException,
+            NHXFormatException {
+        for( int i = 0; i < phylogenies.length; ++i ) {
+            PhylogenyDecorator.decorate( phylogenies[ i ],
+                                         map,
+                                         field,
+                                         extract_bracketed_scientific_name,
+                                         picky,
+                                         cut_name_after_space,
+                                         process_name_intelligently,
+                                         process_similar_to,
+                                         numbers_of_chars_allowed_to_remove_if_not_found_in_map,
+                                         move_domain_numbers_at_end_to_middle );
+        }
+    }
+
+    public static void decorate( final Phylogeny[] phylogenies,
+                                 final Map<String, String> map,
+                                 final FIELD field,
+                                 final boolean extract_bracketed_scientific_name,
+                                 final boolean picky,
+                                 final Map<String, String> intermediate_map,
+                                 final boolean cut_name_after_space,
+                                 final boolean process_name_intelligently,
+                                 final boolean process_similar_to,
+                                 final int numbers_of_chars_allowed_to_remove_if_not_found_in_map,
+                                 final boolean move_domain_numbers_at_end_to_middle ) throws IllegalArgumentException,
+            NHXFormatException {
+        for( int i = 0; i < phylogenies.length; ++i ) {
+            PhylogenyDecorator.decorate( phylogenies[ i ],
+                                         map,
+                                         field,
+                                         extract_bracketed_scientific_name,
+                                         picky,
+                                         intermediate_map,
+                                         cut_name_after_space,
+                                         process_name_intelligently,
+                                         process_similar_to,
+                                         numbers_of_chars_allowed_to_remove_if_not_found_in_map,
+                                         move_domain_numbers_at_end_to_middle );
+        }
+    }
+
+    private static String deleteAtFirstSpace( final String name ) {
+        final int first_space = name.indexOf( " " );
+        if ( first_space > 1 ) {
+            return name.substring( 0, first_space ).trim();
+        }
+        return name;
+    }
+
+    private static void extractBracketedScientificNames( final PhylogenyNode node, final String new_value ) {
+        final int i = new_value.lastIndexOf( "[" );
+        final String scientific_name = new_value.substring( i + 1, new_value.length() - 1 );
+        ForesterUtil.ensurePresenceOfTaxonomy( node );
+        node.getNodeData().getTaxonomy().setScientificName( scientific_name );
+    }
+
+    private static String extractIntermediate( final Map<String, String> intermediate_map, final String name ) {
+        String new_name = null;
+        if ( PhylogenyDecorator.VERBOSE ) {
+            System.out.print( name + " => " );
+        }
+        if ( intermediate_map.containsKey( name ) ) {
+            new_name = intermediate_map.get( name );
+            if ( ForesterUtil.isEmpty( new_name ) ) {
+                throw new IllegalArgumentException( "\"" + name + "\" maps to null or empty string in secondary map" );
+            }
+        }
+        else {
+            throw new IllegalArgumentException( "\"" + name + "\" not found in name secondary map" );
+        }
+        if ( PhylogenyDecorator.VERBOSE ) {
+            System.out.println( new_name + "  " );
+        }
+        return new_name;
+    }
+
+    private static String moveDomainNumbersAtEnd( final String node_name ) {
+        final Matcher m = NODENAME_SEQNUMBER_TAXDOMAINNUMBER.matcher( node_name );
+        if ( m.matches() ) {
+            final String seq_number = m.group( 1 );
+            final String tax = m.group( 2 );
+            final String domain_number = m.group( 3 );
+            return seq_number + "_[" + domain_number + "]_" + tax;
+        }
+        else {
+            return node_name;
+        }
+    }
+
+    public static Map<String, Map<String, String>> parseMappingTable( final File mapping_table_file )
+            throws IOException {
+        final Map<String, Map<String, String>> map = new HashMap<String, Map<String, String>>();
+        BasicTable<String> mapping_table = null;
+        mapping_table = BasicTableParser.parse( mapping_table_file, "\t", false );
+        for( int row = 0; row < mapping_table.getNumberOfRows(); ++row ) {
+            final Map<String, String> row_map = new HashMap<String, String>();
+            String name = null;
+            for( int col = 0; col < mapping_table.getNumberOfColumns(); ++col ) {
+                final String table_cell = mapping_table.getValue( col, row );
+                if ( col == 0 ) {
+                    name = table_cell;
+                }
+                else if ( table_cell != null ) {
+                    final String key = table_cell.substring( 0, table_cell.indexOf( ':' ) );
+                    final String val = table_cell.substring( table_cell.indexOf( ':' ) + 1, table_cell.length() );
+                    row_map.put( key, val );
+                }
+            }
+            map.put( name, row_map );
+        }
+        return map;
+    }
+
+    private static String processNameIntelligently( final String name ) {
+        final String[] s = name.split( " " );
+        if ( s.length < 2 ) {
+            return name;
+        }
+        else if ( ( s[ 0 ].indexOf( "_" ) > 0 ) && ( s[ 0 ].indexOf( "|" ) > 0 ) ) {
+            return s[ 0 ];
+        }
+        else if ( ( s[ 1 ].indexOf( "_" ) > 0 ) && ( s[ 1 ].indexOf( "|" ) > 0 ) ) {
+            return s[ 1 ];
+        }
+        else if ( ( s[ 0 ].indexOf( "_" ) > 0 ) && ( s[ 0 ].indexOf( "." ) > 0 ) ) {
+            return s[ 0 ];
+        }
+        else if ( ( s[ 1 ].indexOf( "_" ) > 0 ) && ( s[ 1 ].indexOf( "." ) > 0 ) ) {
+            return s[ 1 ];
+        }
+        else if ( s[ 0 ].indexOf( "_" ) > 0 ) {
+            return s[ 0 ];
+        }
+        else if ( s[ 1 ].indexOf( "_" ) > 0 ) {
+            return s[ 1 ];
+        }
+        else {
+            return s[ 0 ];
+        }
+    }
+
+    private static String processSimilarTo( final String name ) {
+        final int i = name.toLowerCase().indexOf( "similar to" );
+        String similar_to = "";
+        if ( i >= 0 ) {
+            similar_to = " similarity=" + name.substring( i + 10 ).trim();
+        }
+        final String pi = processNameIntelligently( name );
+        return pi + similar_to;
+    }
+
+    private static String sanitize( String s ) {
+        s = s.replace( ' ', '_' );
+        s = s.replace( '(', '{' );
+        s = s.replace( ')', '}' );
+        s = s.replace( '[', '{' );
+        s = s.replace( ']', '}' );
+        s = s.replace( ',', '_' );
+        return s;
+    }
+
+    public static enum FIELD {
+        NODE_NAME, SEQUENCE_ANNOTATION_DESC, DOMAIN_STRUCTURE, TAXONOMY_CODE, TAXONOMY_SCIENTIFIC_NAME, SEQUENCE_NAME;
+    }
+}
diff --git a/forester/java/src/org/forester/tools/SupportCount.java b/forester/java/src/org/forester/tools/SupportCount.java

new file mode 100644 (file)

index 0000000..a3c7fa8
--- /dev/null
+++ b/forester/java/src/org/forester/tools/SupportCount.java
@@ -0,0 +1,250 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.tools;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.NoSuchElementException;
+
+import org.forester.phylogeny.Phylogeny;
+import org.forester.phylogeny.PhylogenyMethods;
+import org.forester.phylogeny.PhylogenyNode;
+import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
+
+/*
+ * A simple class containing a static method to evaluate the topology of a given
+ * phylogeny with a list of resampled phylogenies.
+ * 
+ * 
+ * @author Christian M Zmasek
+ */
+public final class SupportCount {
+
+    private SupportCount() {
+    }
+
+    public static double compare( final Phylogeny phylogeny,
+                                  final Phylogeny evaluator_phylogeny,
+                                  final boolean strip_evaluator_phylogeny,
+                                  final boolean update_support_in_phylogeny,
+                                  final boolean re_root ) {
+        String[] seq_names_to_keep = null;
+        if ( strip_evaluator_phylogeny ) {
+            seq_names_to_keep = phylogeny.getAllExternalNodeNames();
+            SupportCount.strip( seq_names_to_keep, evaluator_phylogeny );
+        }
+        if ( re_root ) {
+            final String child0_name = phylogeny.getFirstExternalNode().getName();
+            phylogeny.reRoot( phylogeny.getNode( child0_name ) );
+            evaluator_phylogeny.reRoot( evaluator_phylogeny.getNode( child0_name ) );
+        }
+        final Map<Integer, ArrayList<String>> phylogeny_external_names_per_node = SupportCount
+                .extractExternalNamesPerNode( phylogeny );
+        return ( SupportCount.compare( phylogeny,
+                                       evaluator_phylogeny,
+                                       phylogeny_external_names_per_node,
+                                       update_support_in_phylogeny,
+                                       -1 ) );
+    }
+
+    /**
+     * 
+     * Precondition: phylogeny and evaluator_phylogeny have to be rooted in the
+     * same manner.
+     * 
+     * Returns a measure of the similarity ("average bootstrap similarity")
+     * between the topologies of phylogeny and evaluator_phylogeny: (sum of
+     * branches which divide phylogeny in a manner consitent with
+     * evaluator_phylogeny)/sum of branches in phylogeny. Therefore, this
+     * measure is 1.0 for indentical topologies and 0.0 for completely
+     * incompatible topologies.
+     * 
+     * 
+     * @param phylogeny
+     * @param evaluator_phylogeny
+     * @param external_names_per_node
+     * @param update_support_in_phylogeny
+     *            set to true to update support values in phylogeny, otherwise,
+     *            just calculation of the "average bootstrap similarity"
+     * @return a measure of the similarity ("average bootstrap similarity")
+     *         between phylogeny and evaluator_phylogeny
+     */
+    private static double compare( final Phylogeny phylogeny,
+                                   final Phylogeny evaluator_phylogeny,
+                                   final Map<Integer, ArrayList<String>> phylogeny_external_names_per_node,
+                                   final boolean update_support_in_phylogeny,
+                                   final double similarity_threshold ) {
+        int matching_branches = 0;
+        int phylogeny_total_internal_branches = 0;
+        for( final PhylogenyNodeIterator it = phylogeny.iteratorPostorder(); it.hasNext(); ) {
+            if ( !it.next().isExternal() ) {
+                ++phylogeny_total_internal_branches;
+            }
+        }
+        final Map<PhylogenyNode, Double> support_values = new HashMap<PhylogenyNode, Double>();
+        E: for( final PhylogenyNodeIterator evaluator_phylogeny_it = evaluator_phylogeny.iteratorPostorder(); evaluator_phylogeny_it
+                .hasNext(); ) {
+            final List<String> c1 = new ArrayList<String>();
+            for( final Object element : evaluator_phylogeny_it.next().getAllExternalDescendants() ) {
+                c1.add( ( ( PhylogenyNode ) element ).getName() );
+            }
+            for( final Integer id : phylogeny_external_names_per_node.keySet() ) {
+                final List<String> c2 = phylogeny_external_names_per_node.get( id );
+                if ( ( c2.size() == c1.size() ) && c2.containsAll( c1 ) ) {
+                    if ( c2.size() > 1 ) {
+                        matching_branches++;
+                    }
+                    if ( update_support_in_phylogeny ) {
+                        final PhylogenyNode node = phylogeny.getNode( id.intValue() );
+                        double d = PhylogenyMethods.getConfidenceValue( node );
+                        if ( d < 1.0 ) {
+                            d = 1.0;
+                        }
+                        else {
+                            ++d;
+                        }
+                        support_values.put( node, new Double( d ) );
+                    }
+                    continue E;
+                }
+            }
+        }
+        final double similarity = ( double ) matching_branches / phylogeny_total_internal_branches;
+        if ( ( similarity_threshold < 0.0 ) || ( similarity >= similarity_threshold ) ) {
+            for( final PhylogenyNode node : support_values.keySet() ) {
+                double b = support_values.get( node ).doubleValue();
+                if ( b < 0 ) {
+                    b = 0.0;
+                }
+                PhylogenyMethods.setBootstrapConfidence( node, b );
+            }
+        }
+        return similarity;
+    }
+
+    public static void count( final Phylogeny phylogeny,
+                              final Phylogeny[] evaluator_phylogenies,
+                              final boolean strip_evaluator_phylogenies,
+                              final boolean verbose ) {
+        SupportCount.count( phylogeny, evaluator_phylogenies, strip_evaluator_phylogenies, -1, verbose );
+    }
+
+    /**
+     * This counts the support of topology phylogeny by the topologies in
+     * phylogenies. If phylogenies contains topogies with names not present in
+     * phylogeny, strip_phylogenies must be set to true. phylogeny must not
+     * contain names not found in all phylogenies.
+     * 
+     * @param phylogeny
+     *            the topology to be evaluated
+     * @param evaluator_phylogenies
+     *            the topologies used for evaluation
+     * @param strip_evaluator_phylogenies
+     *            set to true if phylogenies contains topologies with names not
+     *            present in phylogeny
+     */
+    public static List<Phylogeny> count( final Phylogeny phylogeny,
+                                         final Phylogeny[] evaluator_phylogenies,
+                                         final boolean strip_evaluator_phylogenies,
+                                         final double similarity_threshold,
+                                         final boolean verbose ) {
+        String[] seq_names_to_keep = null;
+        final List<Phylogeny> evaluator_phylogenies_above_threshold = new ArrayList<Phylogeny>();
+        if ( strip_evaluator_phylogenies ) {
+            seq_names_to_keep = phylogeny.getAllExternalNodeNames();
+        }
+        final String child0_name = phylogeny.getFirstExternalNode().getName();
+        phylogeny.reRoot( phylogeny.getNode( child0_name ) );
+        final Map<Integer, ArrayList<String>> phylogeny_external_names_per_node = SupportCount
+                .extractExternalNamesPerNode( phylogeny );
+        if ( verbose ) {
+            System.out.println();
+            System.out.println( "evaluator phylogeny #: similarity score (max is 1.0)" );
+            System.out.println( "----------------------------------------------------" );
+            System.out.println();
+        }
+        for( int i = 0; i < evaluator_phylogenies.length; ++i ) {
+            final Phylogeny evaluator_phylogeny = evaluator_phylogenies[ i ];
+            evaluator_phylogeny.reRoot( evaluator_phylogeny.getNode( child0_name ) );
+            Phylogeny unstripped_evaluator_phylogeny = evaluator_phylogeny;
+            if ( strip_evaluator_phylogenies ) {
+                unstripped_evaluator_phylogeny = evaluator_phylogeny.copy();
+                SupportCount.strip( seq_names_to_keep, evaluator_phylogeny );
+                evaluator_phylogeny.orderAppearance( true ); // This is for
+                // easer
+                // comparison if
+                // phylos are saved
+                // to file.
+            }
+            final double s = SupportCount.compare( phylogeny,
+                                                   evaluator_phylogenies[ i ],
+                                                   phylogeny_external_names_per_node,
+                                                   true,
+                                                   similarity_threshold );
+            if ( ( similarity_threshold < 0.0 ) || ( s >= similarity_threshold ) ) {
+                unstripped_evaluator_phylogeny.orderAppearance( true );
+                evaluator_phylogenies_above_threshold.add( unstripped_evaluator_phylogeny );
+            }
+            if ( verbose ) {
+                if ( similarity_threshold < 0.0 ) {
+                    System.out.println( i + ": " + s );
+                }
+                else if ( s >= similarity_threshold ) {
+                    System.out.println( i + ": " + s + " <====" );
+                }
+                else {
+                    System.out.println( i + ": " + s );
+                }
+            }
+        }
+        if ( verbose ) {
+            System.out.println( "----------------------------------------------------" );
+            System.out.println();
+        }
+        return evaluator_phylogenies_above_threshold;
+    }
+
+    private static Map<Integer, ArrayList<String>> extractExternalNamesPerNode( final Phylogeny phylogeny )
+            throws NoSuchElementException {
+        final HashMap<Integer, ArrayList<String>> phylogeny_external_names_per_node = new HashMap<Integer, ArrayList<String>>();
+        for( final PhylogenyNodeIterator it = phylogeny.iteratorPostorder(); it.hasNext(); ) {
+            final PhylogenyNode n = it.next();
+            final List<PhylogenyNode> l = n.getAllExternalDescendants();
+            final ArrayList<String> c = new ArrayList<String>();
+            phylogeny_external_names_per_node.put( new Integer( n.getId() ), c );
+            for( final PhylogenyNode phylogenyNode : l ) {
+                c.add( phylogenyNode.getName() );
+            }
+        }
+        return phylogeny_external_names_per_node;
+    }
+
+    private static void strip( final String[] to_keep, final Phylogeny to_be_stripped ) {
+        PhylogenyMethods.deleteExternalNodesPositiveSelection( to_keep, to_be_stripped );
+    }
+}
diff --git a/forester/java/src/org/forester/tools/TreeSplitMatrix.java b/forester/java/src/org/forester/tools/TreeSplitMatrix.java

new file mode 100644 (file)

index 0000000..d1e015b
--- /dev/null
+++ b/forester/java/src/org/forester/tools/TreeSplitMatrix.java
@@ -0,0 +1,257 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.tools;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.SortedMap;
+import java.util.TreeMap;
+
+import org.forester.phylogeny.Phylogeny;
+import org.forester.phylogeny.PhylogenyNode;
+import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
+
+public class TreeSplitMatrix {
+
+    private final SortedMap<PhylogenyNode, List<Boolean>> _data;
+    private final Map<Integer, Integer>                   _positive_counts;
+    private final boolean                                 _strict;
+
+    public TreeSplitMatrix( final Phylogeny evaluator, final boolean strict, final Phylogeny target ) {
+        Set<PhylogenyNode> target_external_nodes = null;
+        if ( !strict ) {
+            if ( ( target == null ) || target.isEmpty() ) {
+                throw new IllegalArgumentException( "target must not be null or empty if non-strict evalution is expected" );
+            }
+            target_external_nodes = new HashSet<PhylogenyNode>();
+            for( final PhylogenyNodeIterator it = target.iteratorExternalForward(); it.hasNext(); ) {
+                final PhylogenyNode n = it.next();
+                if ( target_external_nodes.contains( n ) ) {
+                    throw new IllegalArgumentException( "node [" + n.toString() + "] of target is not unique" );
+                }
+                target_external_nodes.add( n );
+            }
+        }
+        _data = new TreeMap<PhylogenyNode, List<Boolean>>();
+        _positive_counts = new HashMap<Integer, Integer>();
+        _strict = strict;
+        decompose( evaluator, target_external_nodes );
+    }
+
+    /**
+     * If strict is true, target nodes (all external nodes of the phylogeny for
+     * which support values are to be calculated) is not used for anything during construction.
+     * 
+     * 
+     * @param target
+     * @param evaluator
+     * @param strict
+     */
+    public TreeSplitMatrix( final Phylogeny evaluator,
+                            final boolean strict,
+                            final Set<PhylogenyNode> target_external_nodes ) {
+        if ( !strict && ( ( target_external_nodes == null ) || target_external_nodes.isEmpty() ) ) {
+            throw new IllegalArgumentException( "target nodes list must not be null or empty if non-strict evalution is expected" );
+        }
+        _data = new TreeMap<PhylogenyNode, List<Boolean>>();
+        _positive_counts = new HashMap<Integer, Integer>();
+        _strict = strict;
+        decompose( evaluator, target_external_nodes );
+    }
+
+    private boolean contains( final PhylogenyNode node ) {
+        return _data.keySet().contains( node );
+    }
+
+    private void decompose( final Phylogeny phy, final Set<PhylogenyNode> target_external_nodes ) {
+        setUpKeys( phy, target_external_nodes );
+        setUpValues( phy, target_external_nodes );
+        sanityCheck();
+    }
+
+    private int getNumberOfTrueValuesAt( final int index ) {
+        if ( _positive_counts.containsKey( index ) ) {
+            return _positive_counts.get( index );
+        }
+        return 0;
+    }
+
+    private boolean getValue( final PhylogenyNode node, final int index ) {
+        if ( _data.containsKey( node ) ) {
+            return _data.get( node ).get( index );
+        }
+        return false;
+    }
+
+    private char getValueAsChar( final PhylogenyNode node, final int index ) {
+        if ( getValue( node, index ) ) {
+            return '.';
+        }
+        else {
+            return ' ';
+        }
+    }
+
+    private Set<PhylogenyNode> keySet() {
+        return _data.keySet();
+    }
+
+    public boolean match( final Set<PhylogenyNode> query_nodes ) {
+        final Set<PhylogenyNode> my_query_nodes = query_nodes;
+        if ( _strict ) {
+            if ( !keySet().containsAll( my_query_nodes ) ) {
+                throw new IllegalArgumentException( "external nodes of target and evaluator do not match" );
+            }
+        }
+        //else {
+        //THIS IS WRONG
+        // my_query_nodes.retainAll( keySet() );
+        //}
+        for( int i = 0; i < size(); ++i ) {
+            if ( match( my_query_nodes, i ) ) {
+                return true;
+            }
+        }
+        return false;
+    }
+
+    private boolean match( final Set<PhylogenyNode> query_nodes, final int i ) {
+        final int counts = getNumberOfTrueValuesAt( i );
+        final int q_counts = query_nodes.size();
+        boolean positive_matches = true;
+        boolean negative_matches = true;
+        if ( q_counts != counts ) {
+            positive_matches = false;
+        }
+        if ( q_counts != keySet().size() - counts ) {
+            negative_matches = false;
+        }
+        if ( !positive_matches && !negative_matches ) {
+            return false;
+        }
+        for( final PhylogenyNode query_node : query_nodes ) {
+            if ( !contains( query_node ) ) {
+                if ( _strict ) {
+                    //TODO remove me after testing
+                    throw new RuntimeException( "this should not have happened, for query " + query_node + ":\n"
+                            + toString() );
+                }
+                else {
+                    return false; //TODO really?!?!?
+                }
+            }
+            if ( getValue( query_node, i ) ) {
+                negative_matches = false;
+            }
+            else {
+                positive_matches = false;
+            }
+            if ( !positive_matches && !negative_matches ) {
+                return false;
+            }
+        }
+        return true;
+    }
+
+    private void sanityCheck() {
+        int size = -1;
+        for( final PhylogenyNode key : keySet() ) {
+            if ( size < 0 ) {
+                size = size( key );
+            }
+            else if ( size != size( key ) ) {
+                throw new RuntimeException( "this should not have happened: failed to build split matrix" );
+            }
+        }
+    }
+
+    private void setUpKeys( final Phylogeny phy, final Set<PhylogenyNode> target_external_nodes ) {
+        for( final PhylogenyNodeIterator it = phy.iteratorExternalForward(); it.hasNext(); ) {
+            final PhylogenyNode n = it.next();
+            if ( _strict || target_external_nodes.contains( n ) ) {
+                if ( _data.containsKey( n ) ) {
+                    throw new IllegalArgumentException( "node '" + n.toString() + "' of evaluator is not unique" );
+                }
+                _data.put( n, new ArrayList<Boolean>() );
+            }
+        }
+    }
+
+    private void setUpValues( final Phylogeny phy, final Set<PhylogenyNode> target_external_nodes ) {
+        int index = 0;
+        for( final PhylogenyNodeIterator it = phy.iteratorPreorder(); it.hasNext(); ) {
+            final PhylogenyNode node = it.next();
+            final List<PhylogenyNode> current_ext_descs = node.getAllExternalDescendants();
+            for( final PhylogenyNode key : keySet() ) {
+                //if ( _strict || target_external_nodes.contains( key ) ) {
+                if ( current_ext_descs.contains( key ) ) {
+                    _data.get( key ).add( index, true );
+                    if ( !_positive_counts.containsKey( index ) ) {
+                        _positive_counts.put( index, 1 );
+                    }
+                    else {
+                        _positive_counts.put( index, _positive_counts.get( index ) + 1 );
+                    }
+                }
+                else {
+                    _data.get( key ).add( index, false );
+                }
+                //}
+            }
+            index++;
+        }
+    }
+
+    private int size() {
+        for( final PhylogenyNode key : keySet() ) {
+            return size( key );
+        }
+        return 0;
+    }
+
+    private int size( final PhylogenyNode node ) {
+        return _data.get( node ).size();
+    }
+
+    @Override
+    public String toString() {
+        final StringBuffer sb = new StringBuffer();
+        for( final PhylogenyNode key : keySet() ) {
+            sb.append( key.getName() );
+            sb.append( ":" );
+            for( int i = 0; i < size( key ); ++i ) {
+                sb.append( " " );
+                sb.append( getValueAsChar( key, i ) );
+            }
+            sb.append( "\n" );
+        }
+        return sb.toString();
+    }
+}
diff --git a/forester/java/src/org/forester/util/AsciiHistogram.java b/forester/java/src/org/forester/util/AsciiHistogram.java

new file mode 100644 (file)

index 0000000..07d3da8
--- /dev/null
+++ b/forester/java/src/org/forester/util/AsciiHistogram.java
@@ -0,0 +1,127 @@
+// $Id:
+//
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.util;
+
+public class AsciiHistogram {
+
+    final private DescriptiveStatistics _stats;
+    final private String                _title;
+
+    public AsciiHistogram( final DescriptiveStatistics stats ) {
+        _stats = stats;
+        _title = "";
+    }
+
+    public AsciiHistogram( final DescriptiveStatistics stats, final String title ) {
+        _stats = stats;
+        _title = title;
+    }
+
+    private void drawToStringBuffer( final double min,
+                                     final char symbol,
+                                     final int size,
+                                     final int digits,
+                                     final StringBuffer sb,
+                                     final int[] bins,
+                                     final int max_count,
+                                     final int under,
+                                     final int over,
+                                     final double binning_factor ) {
+        final double draw_factor = ( double ) max_count / size;
+        final int counts_size = ForesterUtil.roundToInt( Math.log10( max_count ) ) + 1;
+        if ( !ForesterUtil.isEmpty( getTitle() ) ) {
+            sb.append( getTitle() );
+            sb.append( ForesterUtil.LINE_SEPARATOR );
+            sb.append( ForesterUtil.LINE_SEPARATOR );
+        }
+        if ( under > 0 ) {
+            sb.append( "[" + under + "] " );
+            sb.append( ForesterUtil.LINE_SEPARATOR );
+        }
+        for( int i = 0; i < bins.length; ++i ) {
+            final int count = bins[ i ];
+            final double label = ForesterUtil.round( ( min + i * ( 1.0 / binning_factor ) ), digits );
+            sb.append( ForesterUtil.pad( label + "", digits, '0', false ) );
+            sb.append( " [" + ForesterUtil.pad( count + "", counts_size, ' ', true ) + "] " );
+            final int s = ForesterUtil.roundToInt( count / draw_factor );
+            for( int j = 0; j < s; ++j ) {
+                sb.append( symbol );
+            }
+            sb.append( ForesterUtil.LINE_SEPARATOR );
+        }
+        if ( over > 0 ) {
+            sb.append( "[" + over + "] " );
+            sb.append( ForesterUtil.LINE_SEPARATOR );
+        }
+    }
+
+    private DescriptiveStatistics getDescriptiveStatistics() {
+        return _stats;
+    }
+
+    private String getTitle() {
+        return _title;
+    }
+
+    public StringBuffer toStringBuffer( final double min,
+                                        final double max,
+                                        final int number_of_bins,
+                                        final char symbol,
+                                        final int size,
+                                        final int digits ) {
+        if ( min >= max ) {
+            throw new IllegalArgumentException( "min [" + min + "] is larger than or equal to max [" + max + "]" );
+        }
+        if ( number_of_bins < 3 ) {
+            throw new IllegalArgumentException( "number of bins is smaller than 3" );
+        }
+        if ( size < 2 ) {
+            throw new IllegalArgumentException( "size is smaller than 2" );
+        }
+        final StringBuffer sb = new StringBuffer();
+        int max_count = 0;
+        final double binning_factor = number_of_bins / ( max - min );
+        final int[] bins = BasicDescriptiveStatistics
+                .performBinning( getDescriptiveStatistics().getDataAsDoubleArray(), min, max, number_of_bins );
+        for( final int bin : bins ) {
+            if ( bin > max_count ) {
+                max_count = bin;
+            }
+        }
+        drawToStringBuffer( min, symbol, size, digits, sb, bins, max_count, 0, 0, binning_factor );
+        return sb;
+    }
+
+    public StringBuffer toStringBuffer( final int bins, final char symbol, final int size, final int digits ) {
+        return toStringBuffer( getDescriptiveStatistics().getMin(),
+                               getDescriptiveStatistics().getMax(),
+                               bins,
+                               symbol,
+                               size,
+                               digits );
+    }
+}
diff --git a/forester/java/src/org/forester/util/BasicDescriptiveStatistics.java b/forester/java/src/org/forester/util/BasicDescriptiveStatistics.java

new file mode 100644 (file)

index 0000000..699526f
--- /dev/null
+++ b/forester/java/src/org/forester/util/BasicDescriptiveStatistics.java
@@ -0,0 +1,340 @@
+// $Id:
+// $
+//
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.util;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+public class BasicDescriptiveStatistics implements DescriptiveStatistics {
+
+    private List<Double> _data;
+    private double       _sum;
+    private double       _min;
+    private double       _max;
+    private double       _sigma;
+    private boolean      _recalc_sigma;
+
+    public BasicDescriptiveStatistics() {
+        init();
+    }
+
+    /* (non-Javadoc)
+     * @see org.forester.util.DescriptiveStatisticsI#addValue(double)
+     */
+    public void addValue( final double d ) {
+        _recalc_sigma = true;
+        _sum += d;
+        _data.add( new Double( d ) );
+        if ( d < _min ) {
+            _min = d;
+        }
+        if ( d > _max ) {
+            _max = d;
+        }
+    }
+
+    /* (non-Javadoc)
+     * @see org.forester.util.DescriptiveStatisticsI#arithmeticMean()
+     */
+    public double arithmeticMean() {
+        validate();
+        return getSum() / getN();
+    }
+
+    /* (non-Javadoc)
+     * @see org.forester.util.DescriptiveStatisticsI#asSummary()
+     */
+    public String asSummary() {
+        if ( getN() > 1 ) {
+            return arithmeticMean() + DescriptiveStatistics.PLUS_MINUS + sampleStandardDeviation() + " [" + getMin()
+                    + "..." + getMax() + "]";
+        }
+        else {
+            return "" + arithmeticMean();
+        }
+    }
+
+    /* (non-Javadoc)
+     * @see org.forester.util.DescriptiveStatisticsI#coefficientOfVariation()
+     */
+    public double coefficientOfVariation() {
+        validate();
+        return ( sampleStandardDeviation() / arithmeticMean() );
+    }
+
+    /* (non-Javadoc)
+     * @see org.forester.util.DescriptiveStatisticsI#getDataAsDoubleArray()
+     */
+    public double[] getDataAsDoubleArray() {
+        validate();
+        final double[] data_array = new double[ getN() ];
+        for( int i = 0; i < getN(); ++i ) {
+            data_array[ i ] = getValue( i );
+        }
+        return data_array;
+    }
+
+    /* (non-Javadoc)
+     * @see org.forester.util.DescriptiveStatisticsI#getMax()
+     */
+    public double getMax() {
+        validate();
+        return _max;
+    }
+
+    /* (non-Javadoc)
+     * @see org.forester.util.DescriptiveStatisticsI#getMin()
+     */
+    public double getMin() {
+        validate();
+        return _min;
+    }
+
+    /* (non-Javadoc)
+     * @see org.forester.util.DescriptiveStatisticsI#getN()
+     */
+    public int getN() {
+        return _data.size();
+    }
+
+    /* (non-Javadoc)
+     * @see org.forester.util.DescriptiveStatisticsI#getSum()
+     */
+    public double getSum() {
+        validate();
+        return _sum;
+    }
+
+    /* (non-Javadoc)
+     * @see org.forester.util.DescriptiveStatisticsI#getSummaryAsString()
+     */
+    public String getSummaryAsString() {
+        validate();
+        final double mean = arithmeticMean();
+        final double sd = sampleStandardDeviation();
+        return "" + mean + ( ( char ) 177 ) + sd + " [" + getMin() + "..." + getMax() + "]";
+    }
+
+    /* (non-Javadoc)
+     * @see org.forester.util.DescriptiveStatisticsI#getValue(int)
+     */
+    public double getValue( final int index ) {
+        validate();
+        return ( ( ( _data.get( index ) ) ).doubleValue() );
+    }
+
+    private void init() {
+        _data = new ArrayList<Double>();
+        _sum = 0.0;
+        _min = Double.MAX_VALUE;
+        _max = -Double.MAX_VALUE;
+        _sigma = 0.0;
+        _recalc_sigma = true;
+    }
+
+    /* (non-Javadoc)
+     * @see org.forester.util.DescriptiveStatisticsI#median()
+     */
+    public double median() {
+        validate();
+        double median = 0.0;
+        if ( getN() == 1 ) {
+            median = getValue( 0 );
+        }
+        else {
+            final int index = ( getN() / 2 );
+            final double[] data_array = getDataAsDoubleArray();
+            Arrays.sort( data_array );
+            if ( ( ( data_array.length ) % 2 ) == 0 ) {
+                // even number of data values
+                median = ( data_array[ index - 1 ] + data_array[ index ] ) / 2.0;
+            }
+            else {
+                median = data_array[ index ];
+            }
+        }
+        return median;
+    }
+
+    /* (non-Javadoc)
+     * @see org.forester.util.DescriptiveStatisticsI#midrange()
+     */
+    public double midrange() {
+        validate();
+        return ( _min + _max ) / 2.0;
+    }
+
+    /* (non-Javadoc)
+     * @see org.forester.util.DescriptiveStatisticsI#pearsonianSkewness()
+     */
+    public double pearsonianSkewness() {
+        validate();
+        final double mean = arithmeticMean();
+        final double median = median();
+        final double sd = sampleStandardDeviation();
+        return ( ( 3 * ( mean - median ) ) / sd );
+    }
+
+    /* (non-Javadoc)
+     * @see org.forester.util.DescriptiveStatisticsI#sampleStandardDeviation()
+     */
+    public double sampleStandardDeviation() {
+        return Math.sqrt( sampleVariance() );
+    }
+
+    /* (non-Javadoc)
+     * @see org.forester.util.DescriptiveStatisticsI#sampleStandardUnit(double)
+     */
+    public double sampleStandardUnit( final double value ) {
+        validate();
+        return BasicDescriptiveStatistics.sampleStandardUnit( value, arithmeticMean(), sampleStandardDeviation() );
+    }
+
+    /* (non-Javadoc)
+     * @see org.forester.util.DescriptiveStatisticsI#sampleVariance()
+     */
+    public double sampleVariance() {
+        validate();
+        if ( getN() < 2 ) {
+            throw new ArithmeticException( "attempt to calculate sample variance for less then two values" );
+        }
+        return ( sumDeviations() / ( getN() - 1 ) );
+    }
+
+    /* (non-Javadoc)
+     * @see org.forester.util.DescriptiveStatisticsI#standardErrorOfMean()
+     */
+    public double standardErrorOfMean() {
+        validate();
+        return ( sampleStandardDeviation() / Math.sqrt( getN() ) );
+    }
+
+    /* (non-Javadoc)
+     * @see org.forester.util.DescriptiveStatisticsI#sumDeviations()
+     */
+    public double sumDeviations() {
+        validate();
+        if ( _recalc_sigma ) {
+            _recalc_sigma = false;
+            _sigma = 0.0;
+            final double mean = arithmeticMean();
+            for( int i = 0; i < getN(); ++i ) {
+                _sigma += Math.pow( ( getValue( i ) - mean ), 2 );
+            }
+        }
+        return _sigma;
+    }
+
+    /* (non-Javadoc)
+     * @see org.forester.util.DescriptiveStatisticsI#toString()
+     */
+    @Override
+    public String toString() {
+        if ( getN() < 1 ) {
+            return "empty data set statistics";
+        }
+        final StringBuffer sb = new StringBuffer();
+        sb.append( "Descriptive statistics:" );
+        sb.append( ForesterUtil.getLineSeparator() );
+        sb.append( "n                       : " + getN() );
+        if ( getN() > 1 ) {
+            sb.append( ForesterUtil.getLineSeparator() );
+            sb.append( "min                     : " + getMin() );
+            sb.append( ForesterUtil.getLineSeparator() );
+            sb.append( "max                     : " + getMax() );
+            sb.append( ForesterUtil.getLineSeparator() );
+            sb.append( "midrange                : " + midrange() );
+            sb.append( ForesterUtil.getLineSeparator() );
+            sb.append( "median                  : " + median() );
+            sb.append( ForesterUtil.getLineSeparator() );
+            sb.append( "mean                    : " + arithmeticMean() );
+            sb.append( ForesterUtil.getLineSeparator() );
+            sb.append( "sd                      : " + sampleStandardDeviation() );
+            sb.append( ForesterUtil.getLineSeparator() );
+            sb.append( "variance                : " + sampleVariance() );
+            sb.append( ForesterUtil.getLineSeparator() );
+            sb.append( "standard error of mean  : " + standardErrorOfMean() );
+            sb.append( ForesterUtil.getLineSeparator() );
+            sb.append( "coefficient of variation: " + coefficientOfVariation() );
+            sb.append( ForesterUtil.getLineSeparator() );
+            sb.append( "pearsonian skewness     : " + pearsonianSkewness() );
+        }
+        return sb.toString();
+    }
+
+    private void validate() throws ArithmeticException {
+        if ( getN() < 1 ) {
+            throw new ArithmeticException( "attempt to get a result from empty data set statistics" );
+        }
+    }
+
+    public static int[] performBinning( final double[] values,
+                                        final double min,
+                                        final double max,
+                                        final int number_of_bins ) {
+        if ( min >= max ) {
+            throw new IllegalArgumentException( "min [" + min + "] is larger than or equal to max [" + max + "]" );
+        }
+        if ( number_of_bins < 3 ) {
+            throw new IllegalArgumentException( "number of bins is smaller than 3" );
+        }
+        final int[] bins = new int[ number_of_bins ];
+        final double binning_factor = number_of_bins / ( max - min );
+        final int last_index = number_of_bins - 1;
+        for( final double d : values ) {
+            if ( !( ( d > max ) || ( d < min ) ) ) {
+                final int bin = ( int ) ( ( d - min ) * binning_factor );
+                if ( bin > last_index ) {
+                    ++bins[ last_index ];
+                }
+                else {
+                    ++bins[ bin ];
+                }
+            }
+        }
+        return bins;
+    }
+
+    /**
+     * Computes the sample standard unit (z-score). Used to compute 'value' in
+     * terms of standard units. Note that 'value', 'mean' and 'sd' must be all
+     * from the same sample data.
+     * 
+     * @param value
+     *            a double in the sample for which
+     * @param mean
+     *            the mean of the sample.
+     * @param sd
+     *            The standard deviation of the sample.
+     * @return 'value' in terms of standard units
+     */
+    public static double sampleStandardUnit( final double value, final double mean, final double sd ) {
+        return ( value - mean ) / sd;
+    }
+}
diff --git a/forester/java/src/org/forester/util/BasicTable.java b/forester/java/src/org/forester/util/BasicTable.java

new file mode 100644 (file)

index 0000000..2469129
--- /dev/null
+++ b/forester/java/src/org/forester/util/BasicTable.java
@@ -0,0 +1,188 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org
+
+package org.forester.util;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+
+public class BasicTable<E> {
+
+    private Map<String, Map<String, E>> _rows;
+    private int                         _max_row;
+    private int                         _max_col;
+
+    public BasicTable() {
+        init();
+    }
+
+    public Map<String, E> getColumnsAsMap( final int key_col, final int value_col ) throws IllegalArgumentException {
+        final Map<String, E> map = new HashMap<String, E>();
+        for( int row = 0; row < getNumberOfRows(); ++row ) {
+            final String key = ( String ) getValue( key_col, row );
+            final E value = getValue( value_col, row );
+            if ( ( key != null ) && ( value != null ) ) {
+                if ( map.containsKey( key ) ) {
+                    throw new IllegalArgumentException( "attempt to use non-unique table value as key [" + key + "]" );
+                }
+                map.put( key, value );
+            }
+        }
+        return map;
+    }
+
+    public Map<String, Double> getColumnsAsMapDouble( final int key_col, final int value_col )
+            throws IllegalArgumentException, IOException {
+        final Map<String, Double> map = new HashMap<String, Double>();
+        for( int row = 0; row < getNumberOfRows(); ++row ) {
+            final String key = ( String ) getValue( key_col, row );
+            double value = 0;
+            try {
+                value = Double.parseDouble( getValueAsString( value_col, row ) );
+            }
+            catch ( final NumberFormatException e ) {
+                throw new IOException( e );
+            }
+            if ( key != null ) {
+                if ( map.containsKey( key ) ) {
+                    throw new IllegalArgumentException( "attempt to use non-unique table value as key [" + key + "]" );
+                }
+                map.put( key, value );
+            }
+        }
+        return map;
+    }
+
+    // Returns -1 if not found, IllegalArgumentException if not unique.
+    public int findRow( final String first_col_value ) throws IllegalArgumentException {
+        int result = -1;
+        for( int i = 0; i < this.getNumberOfRows(); ++i ) {
+            if ( getValueAsString( 0, i ).equals( first_col_value ) ) {
+                if ( result >= 0 ) {
+                    throw new IllegalArgumentException( "\"" + first_col_value + "\" is not unique" );
+                }
+                result = i;
+            }
+        }
+        return result;
+    }
+
+    public int getNumberOfColumns() {
+        return _max_col + 1;
+    }
+
+    public int getNumberOfRows() {
+        return _max_row + 1;
+    }
+
+    private Map<String, E> getRow( final int row ) {
+        return getRows().get( "" + row );
+    }
+
+    private Map<String, Map<String, E>> getRows() {
+        return _rows;
+    }
+
+    public E getValue( final int col, final int row ) throws IllegalArgumentException {
+        if ( ( row > getNumberOfRows() - 1 ) || ( row < 0 ) ) {
+            throw new IllegalArgumentException( "value for row (" + row + ") is out of range [number of rows: "
+                    + getNumberOfRows() + "]" );
+        }
+        else if ( ( col >= getNumberOfColumns() ) || ( row < 0 ) ) {
+            throw new IllegalArgumentException( "value for column (" + col + ") is out of range [number of columns: "
+                    + getNumberOfColumns() + "]" );
+        }
+        final Map<String, E> row_map = getRow( row );
+        if ( ( row_map == null ) || ( row_map.size() < 1 ) ) {
+            return null;
+        }
+        return row_map.get( "" + col );
+    }
+
+    public String getValueAsString( final int col, final int row ) throws IllegalArgumentException {
+        if ( getValue( col, row ) != null ) {
+            return getValue( col, row ).toString();
+        }
+        return null;
+    }
+
+    private void init() {
+        _rows = new HashMap<String, Map<String, E>>();
+        setMaxCol( -1 );
+        setMaxRow( -1 );
+    }
+
+    public boolean isEmpty() {
+        return getNumberOfRows() <= 0;
+    }
+
+    private void setMaxCol( final int max_col ) {
+        _max_col = max_col;
+    }
+
+    private void setMaxRow( final int max_row ) {
+        _max_row = max_row;
+    }
+
+    public void setValue( final int col, final int row, final E value ) {
+        if ( ( row < 0 ) || ( col < 0 ) ) {
+            throw new IllegalArgumentException( "attempt to use negative values for row or column" );
+        }
+        if ( row > getNumberOfRows() - 1 ) {
+            setMaxRow( row );
+        }
+        if ( col > getNumberOfColumns() - 1 ) {
+            setMaxCol( col );
+        }
+        final String row_key = "" + row;
+        Map<String, E> row_map = null;
+        if ( getRows().containsKey( row_key ) ) {
+            row_map = getRows().get( row_key );
+        }
+        else {
+            row_map = new HashMap<String, E>();
+            getRows().put( row_key, row_map );
+        }
+        row_map.put( "" + col, value );
+    }
+
+    @Override
+    public String toString() {
+        final StringBuffer sb = new StringBuffer();
+        for( int row = 0; row < getNumberOfRows(); ++row ) {
+            for( int col = 0; col < getNumberOfColumns(); ++col ) {
+                sb.append( getValue( col, row ) );
+                if ( col < getNumberOfColumns() - 1 ) {
+                    sb.append( " " );
+                }
+            }
+            if ( row < getNumberOfRows() - 1 ) {
+                sb.append( ForesterUtil.LINE_SEPARATOR );
+            }
+        }
+        return sb.toString();
+    }
+}
diff --git a/forester/java/src/org/forester/util/BasicTableParser.java b/forester/java/src/org/forester/util/BasicTableParser.java

new file mode 100644 (file)

index 0000000..9ebae2b
--- /dev/null
+++ b/forester/java/src/org/forester/util/BasicTableParser.java
@@ -0,0 +1,108 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.util;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.StringTokenizer;
+
+public class BasicTableParser {
+
+    private final static String START_OF_COMMENT_LINE_DEFAULT = "#";
+
+    private BasicTableParser() {
+    }
+
+    public static BasicTable<String> parse( final Object source, final String column_delimiter ) throws IOException {
+        return BasicTableParser.parse( source, column_delimiter, false, START_OF_COMMENT_LINE_DEFAULT, false ).get( 0 );
+    }
+
+    public static BasicTable<String> parse( final Object source,
+                                            final String column_delimiter,
+                                            final boolean use_first_separator_only ) throws IOException {
+        return BasicTableParser.parse( source,
+                                       column_delimiter,
+                                       use_first_separator_only,
+                                       START_OF_COMMENT_LINE_DEFAULT,
+                                       false ).get( 0 );
+    }
+
+    public static List<BasicTable<String>> parse( final Object source,
+                                                  final String column_delimiter,
+                                                  final boolean use_first_separator_only,
+                                                  final String start_of_comment_line,
+                                                  final boolean tables_separated_by_single_string_line )
+            throws IOException {
+        final BufferedReader reader = ForesterUtil.obtainReader( source );
+        final List<BasicTable<String>> tables = new ArrayList<BasicTable<String>>();
+        BasicTable<String> table = new BasicTable<String>();
+        int row = 0;
+        String line;
+        boolean saw_first_table = false;
+        final boolean use_start_of_comment_line = !( ForesterUtil.isEmpty( start_of_comment_line ) );
+        while ( ( line = reader.readLine() ) != null ) {
+            line = line.trim();
+            if ( saw_first_table
+                    && ( ForesterUtil.isEmpty( line ) || ( tables_separated_by_single_string_line && ( line
+                            .indexOf( column_delimiter ) < 0 ) ) ) ) {
+                if ( !table.isEmpty() ) {
+                    tables.add( table );
+                }
+                table = new BasicTable<String>();
+                row = 0;
+            }
+            else if ( !ForesterUtil.isEmpty( line )
+                    && ( !use_start_of_comment_line || !line.startsWith( start_of_comment_line ) ) ) {
+                saw_first_table = true;
+                final StringTokenizer st = new StringTokenizer( line, column_delimiter );
+                int col = 0;
+                if ( st.hasMoreTokens() ) {
+                    table.setValue( col++, row, st.nextToken().trim() );
+                }
+                if ( !use_first_separator_only ) {
+                    while ( st.hasMoreTokens() ) {
+                        table.setValue( col++, row, st.nextToken().trim() );
+                    }
+                }
+                else {
+                    final StringBuffer rest = new StringBuffer();
+                    while ( st.hasMoreTokens() ) {
+                        rest.append( st.nextToken() );
+                    }
+                    table.setValue( col++, row, rest.toString().trim() );
+                }
+                ++row;
+            }
+        }
+        if ( !table.isEmpty() ) {
+            tables.add( table );
+        }
+        reader.close();
+        return tables;
+    }
+}
diff --git a/forester/java/src/org/forester/util/CommandLineArguments.java b/forester/java/src/org/forester/util/CommandLineArguments.java

new file mode 100644 (file)

index 0000000..0fc0485
--- /dev/null
+++ b/forester/java/src/org/forester/util/CommandLineArguments.java
@@ -0,0 +1,281 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.util;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+
+public final class CommandLineArguments {
+
+    private final static String OPTIONS_PREFIX          = "-";
+    private final static String EXTENDED_OPTIONS_PREFIX = "--";
+    private final static String OPTIONS_SEPARATOR       = "=";
+    private Map<String, String> _options;
+    private Map<String, String> _extended_options;
+    private List<String>        _names;
+    private String              _command_line_str;
+
+    public CommandLineArguments( final String[] args ) throws IOException {
+        init();
+        parseCommandLineArguments( args );
+    }
+
+    private Map<String, String> getAllOptions() {
+        final Map<String, String> o = new HashMap<String, String>();
+        o.putAll( getOptionsList() );
+        o.putAll( getExtendedOptionsList() );
+        return o;
+    }
+
+    public String getCommandLineArgsAsString() {
+        return _command_line_str;
+    }
+
+    private Map<String, String> getExtendedOptionsList() {
+        return _extended_options;
+    }
+
+    public File getFile( final int i ) {
+        return new File( getNames()[ i ] );
+    }
+
+    public String getName( final int i ) {
+        return getNames()[ i ];
+    }
+
+    public String[] getNames() {
+        final String[] a = new String[ getNamesList().size() ];
+        return getNamesList().toArray( a );
+    }
+
+    private List<String> getNamesList() {
+        return _names;
+    }
+
+    public int getNumberOfNames() {
+        return getNames().length;
+    }
+
+    private Map<String, String> getOptionsList() {
+        return _options;
+    }
+
+    public String getOptionValue( final String option_name ) throws IllegalArgumentException {
+        final Map<String, String> o = getAllOptions();
+        if ( o.containsKey( option_name ) ) {
+            final String value = o.get( option_name );
+            if ( !ForesterUtil.isEmpty( value ) ) {
+                return value;
+            }
+            else {
+                throw new IllegalArgumentException( "value for \"" + option_name + "\" is not set" );
+            }
+        }
+        else {
+            throw new IllegalArgumentException( "option \"" + option_name + "\" is not set" );
+        }
+    }
+
+    /**
+     * Removes quotes
+     * 
+     */
+    public String getOptionValueAsCleanString( final String option_name ) throws IllegalArgumentException {
+        return getOptionValue( option_name ).replaceAll( "\"", "" ).replaceAll( "\'", "" );
+    }
+
+    public double getOptionValueAsDouble( final String option_name ) throws IOException {
+        double d = -Double.MAX_VALUE;
+        try {
+            d = new Double( getOptionValue( option_name ) ).doubleValue();
+        }
+        catch ( final NumberFormatException e ) {
+            throw new IOException( "value for option \"" + option_name + "\" is expected to be of type double" );
+        }
+        return d;
+    }
+
+    public int getOptionValueAsInt( final String option_name ) throws IOException {
+        int i = Integer.MIN_VALUE;
+        try {
+            i = new Integer( getOptionValue( option_name ) ).intValue();
+        }
+        catch ( final NumberFormatException e ) {
+            throw new IOException( "value for option \"" + option_name + "\" is expected to be of type integer" );
+        }
+        return i;
+    }
+
+    public long getOptionValueAsLong( final String option_name ) throws IOException {
+        long l = Long.MIN_VALUE;
+        try {
+            l = new Long( getOptionValue( option_name ) ).longValue();
+        }
+        catch ( final NumberFormatException e ) {
+            throw new IOException( "value for option \"" + option_name + "\" is expected to be of type long" );
+        }
+        return l;
+    }
+
+    private void init() {
+        _options = new HashMap<String, String>();
+        _extended_options = new HashMap<String, String>();
+        _names = new ArrayList<String>();
+        _command_line_str = "";
+    }
+
+    public boolean isOptionHasAValue( final String option_name ) {
+        final Map<String, String> o = getAllOptions();
+        if ( o.containsKey( option_name ) ) {
+            final String value = o.get( option_name );
+            return ( !ForesterUtil.isEmpty( value ) );
+        }
+        else {
+            throw new IllegalArgumentException( "option \"" + option_name + "\" is not set" );
+        }
+    }
+
+    public boolean isOptionSet( final String option_name ) {
+        final Map<String, String> o = getAllOptions();
+        return ( o.containsKey( option_name ) );
+    }
+
+    public boolean isOptionValueSet( final String option_name ) throws IllegalArgumentException {
+        final Map<String, String> o = getAllOptions();
+        if ( o.containsKey( option_name ) ) {
+            return !( ForesterUtil.isEmpty( o.get( option_name ) ) );
+        }
+        else {
+            throw new IllegalArgumentException( "option \"" + option_name + "\" is not set" );
+        }
+    }
+
+    private void parseCommandLineArguments( final String[] args ) throws IOException {
+        for( int i = 0; i < args.length; ++i ) {
+            final String arg = args[ i ].trim();
+            _command_line_str += arg;
+            if ( i < args.length - 1 ) {
+                _command_line_str += " ";
+            }
+            if ( arg.startsWith( CommandLineArguments.EXTENDED_OPTIONS_PREFIX ) ) {
+                parseOption( arg.substring( CommandLineArguments.EXTENDED_OPTIONS_PREFIX.length() ),
+                             getExtendedOptionsList() );
+            }
+            else if ( arg.startsWith( CommandLineArguments.OPTIONS_PREFIX ) ) {
+                parseOption( arg.substring( CommandLineArguments.OPTIONS_PREFIX.length() ), getOptionsList() );
+            }
+            else {
+                getNamesList().add( arg );
+            }
+        }
+    }
+
+    private void parseOption( final String option, final Map<String, String> options_map ) throws IOException {
+        final int sep_index = option.indexOf( CommandLineArguments.OPTIONS_SEPARATOR );
+        if ( sep_index < 1 ) {
+            if ( ForesterUtil.isEmpty( option ) ) {
+                throw new IOException( "attempt to set option with an empty name" );
+            }
+            if ( getAllOptions().containsKey( option ) ) {
+                throw new IOException( "attempt to set option \"" + option + "\" mutiple times" );
+            }
+            options_map.put( option, null );
+        }
+        else {
+            final String key = option.substring( 0, sep_index );
+            final String value = option.substring( sep_index + 1 );
+            if ( ForesterUtil.isEmpty( key ) ) {
+                throw new IllegalArgumentException( "attempt to set option with an empty name" );
+            }
+            //  if ( ForesterUtil.isEmpty( value ) ) {
+            //      throw new IllegalArgumentException( "attempt to set option with an empty value" );
+            //  }
+            if ( getAllOptions().containsKey( key ) ) {
+                throw new IllegalArgumentException( "attempt to set option \"" + key + "\" mutiple times [" + option
+                        + "]" );
+            }
+            options_map.put( key, value );
+        }
+    }
+
+    public List<String> validateAllowedOptions( final List<String> allowed_options ) {
+        final Map<String, String> options = getAllOptions();
+        final List<String> dissallowed = new ArrayList<String>();
+        for( final String o : options.keySet() ) {
+            if ( !allowed_options.contains( o ) ) {
+                dissallowed.add( o );
+            }
+        }
+        return dissallowed;
+    }
+
+    public String validateAllowedOptionsAsString( final List<String> allowed_options ) {
+        final List<String> dissallowed = validateAllowedOptions( allowed_options );
+        String dissallowed_string = "";
+        for( final Iterator<String> iter = dissallowed.iterator(); iter.hasNext(); ) {
+            dissallowed_string += "\"" + iter.next();
+            if ( iter.hasNext() ) {
+                dissallowed_string += "\", ";
+            }
+            else {
+                dissallowed_string += "\"";
+            }
+        }
+        return dissallowed_string;
+    }
+
+    public List<String> validateMandatoryOptions( final List<String> mandatory_options ) {
+        final Map<String, String> options = getAllOptions();
+        final List<String> missing = new ArrayList<String>();
+        for( final String string : mandatory_options ) {
+            final String ma = string;
+            if ( !options.containsKey( ma ) ) {
+                missing.add( ma );
+            }
+        }
+        return missing;
+    }
+
+    public String validateMandatoryOptionsAsString( final List<String> mandatory_options ) {
+        final List<String> missing = validateMandatoryOptions( mandatory_options );
+        String missing_string = "";
+        for( final Iterator<String> iter = missing.iterator(); iter.hasNext(); ) {
+            missing_string += "\"" + iter.next();
+            if ( iter.hasNext() ) {
+                missing_string += "\", ";
+            }
+            else {
+                missing_string += "\"";
+            }
+        }
+        return missing_string;
+    }
+}
diff --git a/forester/java/src/org/forester/util/CommandProcessBuilder.java b/forester/java/src/org/forester/util/CommandProcessBuilder.java

new file mode 100644 (file)

index 0000000..054f12c
--- /dev/null
+++ b/forester/java/src/org/forester/util/CommandProcessBuilder.java
@@ -0,0 +1,81 @@
+// $Id:
+// forester -- software libraries and applications
+// for genomics and evolutionary biology research.
+//
+// Copyright (C) 2010 Christian M Zmasek
+// Copyright (C) 2010 Sanford-Burnham Medical Research Institute
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.util;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.util.ArrayList;
+import java.util.List;
+
+public class CommandProcessBuilder {
+
+    public static Process execute( final List<String> command, final File working_dir ) throws InterruptedException,
+            IOException {
+        final ProcessBuilder builder = new ProcessBuilder( command );
+        if ( working_dir != null ) {
+            if ( !working_dir.exists() ) {
+                throw new IllegalArgumentException( "directory [" + working_dir.getAbsolutePath() + "] does not exist" );
+            }
+            if ( !working_dir.isDirectory() ) {
+                throw new IllegalArgumentException( "[" + working_dir.getAbsolutePath() + "] is not a directory" );
+            }
+            if ( !working_dir.canWrite() ) {
+                throw new IllegalArgumentException( "cannot write to [" + working_dir.getAbsolutePath() + "]" );
+            }
+            builder.directory( working_dir );
+        }
+        final Process process = builder.start();
+        return process;
+    }
+
+    public static void main( final String args[] ) {
+        final List<String> command = new ArrayList<String>();
+        command.add( System.getenv( "windir" ) + "\\system32\\" + "tree.com" );
+        command.add( "/A" );
+        Process p;
+        System.out.println( "Directory : " + System.getenv( "temp" ) );
+        try {
+            p = CommandProcessBuilder.execute( command, new File( System.getenv( "temp" ) ) );
+            final InputStream is = p.getInputStream();
+            final InputStreamReader isr = new InputStreamReader( is );
+            final BufferedReader br = new BufferedReader( isr );
+            String line;
+            while ( ( line = br.readLine() ) != null ) {
+                System.out.println( line );
+            }
+            System.out.println( "OK." );
+        }
+        catch ( final InterruptedException e ) {
+            e.printStackTrace();
+        }
+        catch ( final IOException e ) {
+            e.printStackTrace();
+        }
+    }
+}
diff --git a/forester/java/src/org/forester/util/DescriptiveStatistics.java b/forester/java/src/org/forester/util/DescriptiveStatistics.java

new file mode 100644 (file)

index 0000000..83b2f4f
--- /dev/null
+++ b/forester/java/src/org/forester/util/DescriptiveStatistics.java
@@ -0,0 +1,83 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.util;
+
+public interface DescriptiveStatistics {
+
+    public final static String PLUS_MINUS = "" + ( char ) 177;
+
+    public abstract void addValue( final double d );
+
+    public abstract double arithmeticMean();
+
+    public abstract String asSummary();
+
+    /**
+     * Computes the coefficient of variation. Used to express standard deviation
+     * independent of units of measure.
+     * 
+     * @return
+     */
+    public abstract double coefficientOfVariation();
+
+    public abstract double[] getDataAsDoubleArray();
+
+    public abstract double getMax();
+
+    public abstract double getMin();
+
+    public abstract int getN();
+
+    public abstract double getSum();
+
+    public abstract String getSummaryAsString();
+
+    public abstract double getValue( final int index );
+
+    public abstract double median();
+
+    public abstract double midrange();
+
+    /**
+     * Determines relationship between the mean and the median. This reflects
+     * how the data differs from the normal bell shaped distribution.
+     * 
+     * @return
+     */
+    public abstract double pearsonianSkewness();
+
+    public abstract double sampleStandardDeviation();
+
+    public abstract double sampleStandardUnit( final double value );
+
+    public abstract double sampleVariance();
+
+    public abstract double standardErrorOfMean();
+
+    public abstract double sumDeviations();
+
+    public abstract String toString();
+}
\ No newline at end of file
diff --git a/forester/java/src/org/forester/util/ExternalProgram.java b/forester/java/src/org/forester/util/ExternalProgram.java

new file mode 100644 (file)

index 0000000..54aafe8
--- /dev/null
+++ b/forester/java/src/org/forester/util/ExternalProgram.java
@@ -0,0 +1,124 @@
+// $Id:
+// forester -- software libraries and applications
+// for genomics and evolutionary biology research.
+//
+// Copyright (C) 2010 Christian M Zmasek
+// Copyright (C) 2010 Sanford-Burnham Medical Research Institute
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.util;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+
+public class ExternalProgram {
+
+    public static boolean isExecuteableFile( final File path_to_cmd_f ) {
+        if ( !path_to_cmd_f.exists() ) {
+            return false;
+        }
+        else if ( path_to_cmd_f.isDirectory() ) {
+            return false;
+        }
+        else if ( !path_to_cmd_f.canExecute() ) {
+            return false;
+        }
+        return true;
+    }
+    private Process      _process;
+    private final String _path_to_cmd;
+
+    public ExternalProgram( final String path_to_cmd ) {
+        final File path_to_cmd_f = new File( path_to_cmd );
+        checkCmdFile( path_to_cmd_f );
+        _path_to_cmd = path_to_cmd_f.getAbsolutePath();
+    }
+
+    private void checkCmdFile( final File path_to_cmd_f ) {
+        if ( !path_to_cmd_f.exists() ) {
+            throw new IllegalArgumentException( "[" + path_to_cmd_f.getAbsolutePath() + "] does not exist" );
+        }
+        else if ( path_to_cmd_f.isDirectory() ) {
+            throw new IllegalArgumentException( "[" + path_to_cmd_f.getAbsolutePath() + "] is a directory" );
+        }
+        else if ( !path_to_cmd_f.canExecute() ) {
+            throw new IllegalArgumentException( "[" + path_to_cmd_f.getAbsolutePath() + "] is not executeable" );
+        }
+    }
+
+    public InputStream getErrorStream() {
+        return getProcess().getErrorStream();
+    }
+
+    public InputStream getInputStream() {
+        return getProcess().getInputStream();
+    }
+
+    public OutputStream getOutputStream() {
+        return getProcess().getOutputStream();
+    }
+
+    private String getPathToCmd() {
+        return _path_to_cmd;
+    }
+
+    private Process getProcess() {
+        return _process;
+    }
+
+    public Process launch( final String[] opts ) throws IOException, InterruptedException {
+        String[] cmd;
+        if ( ( opts == null ) || ( opts.length < 1 ) ) {
+            cmd = new String[ 1 ];
+        }
+        else {
+            cmd = new String[ opts.length + 1 ];
+            for( int i = 0; i < opts.length; i++ ) {
+                cmd[ i + 1 ] = opts[ i ];
+            }
+        }
+        cmd[ 0 ] = getPathToCmd();
+        System.out.println();
+        for( final String element : cmd ) {
+            System.out.print( element + " " );
+        }
+        System.out.println();
+        setProcess( Runtime.getRuntime().exec( cmd ) );
+        return getProcess();
+    }
+
+    private void setProcess( final Process process ) {
+        _process = process;
+    }
+
+    public int waitFor() {
+        try {
+            return getProcess().waitFor();
+        }
+        catch ( final InterruptedException e ) {
+            // TODO Auto-generated catch block
+            getProcess().destroy();
+            e.printStackTrace();
+            return -1;
+        }
+    }
+}
diff --git a/forester/java/src/org/forester/util/FailedConditionCheckException.java b/forester/java/src/org/forester/util/FailedConditionCheckException.java

new file mode 100644 (file)

index 0000000..d297377
--- /dev/null
+++ b/forester/java/src/org/forester/util/FailedConditionCheckException.java
@@ -0,0 +1,43 @@
+// $Id:
+// Exp $
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.util;
+
+public class FailedConditionCheckException extends RuntimeException {
+
+    /**
+     * 
+     */
+    private static final long serialVersionUID = -860013990231493438L;
+
+    public FailedConditionCheckException() {
+        super();
+    }
+
+    public FailedConditionCheckException( final String message ) {
+        super( message );
+    }
+}
diff --git a/forester/java/src/org/forester/util/ForesterConstants.java b/forester/java/src/org/forester/util/ForesterConstants.java

new file mode 100644 (file)

index 0000000..3edec56
--- /dev/null
+++ b/forester/java/src/org/forester/util/ForesterConstants.java
@@ -0,0 +1,39 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2000-2009 Christian M. Zmasek
+// Copyright (C) 2007-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.util;
+
+public final class ForesterConstants {
+
+    public final static String  PHYLO_XML_VERSION           = "1.10";
+    public final static String  PHYLO_XML_LOCATION          = "http://www.phyloxml.org";
+    public final static String  PHYLO_XML_XSD               = "phyloxml.xsd";
+    public final static String  XML_SCHEMA_INSTANCE         = "http://www.w3.org/2001/XMLSchema-instance";
+    public final static String  LOCAL_PHYLOXML_XSD_RESOURCE = "resources/phyloxml.xsd";
+    public final static String  PHYLO_XML_SUFFIX            = ".xml";
+    public final static String  UTF8                        = "UTF-8";
+    public final static String  PHYLO_XML_REFERENCE         = "Han MV and Zmasek CM (2009): \"phyloXML: XML for evolutionary biology and comparative genomics\", BMC Bioinformatics 10:356";
+    public final static boolean RELEASE                     = false;
+}
diff --git a/forester/java/src/org/forester/util/ForesterUtil.java b/forester/java/src/org/forester/util/ForesterUtil.java

new file mode 100644 (file)

index 0000000..ada203b
--- /dev/null
+++ b/forester/java/src/org/forester/util/ForesterUtil.java
@@ -0,0 +1,1245 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.util;
+
+import java.awt.Color;
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.FileReader;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.StringReader;
+import java.math.BigDecimal;
+import java.net.URL;
+import java.text.DateFormat;
+import java.text.DecimalFormat;
+import java.text.DecimalFormatSymbols;
+import java.text.NumberFormat;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.ArrayList;
+import java.util.Date;
+import java.util.Hashtable;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.SortedMap;
+import java.util.SortedSet;
+import java.util.TreeMap;
+import java.util.TreeSet;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.forester.io.parsers.PhylogenyParser;
+import org.forester.io.parsers.nexus.NexusPhylogeniesParser;
+import org.forester.io.parsers.nhx.NHXParser;
+import org.forester.io.parsers.phyloxml.PhyloXmlParser;
+import org.forester.io.parsers.tol.TolParser;
+import org.forester.phylogeny.Phylogeny;
+import org.forester.phylogeny.PhylogenyMethods;
+import org.forester.phylogeny.PhylogenyNode;
+import org.forester.phylogeny.data.Confidence;
+import org.forester.phylogeny.data.Distribution;
+import org.forester.phylogeny.data.Sequence;
+import org.forester.phylogeny.data.Taxonomy;
+import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
+
+public final class ForesterUtil {
+
+    public final static String       FILE_SEPARATOR                   = System.getProperty( "file.separator" );
+    public final static String       LINE_SEPARATOR                   = System.getProperty( "line.separator" );
+    public final static String       JAVA_VENDOR                      = System.getProperty( "java.vendor" );
+    public final static String       JAVA_VERSION                     = System.getProperty( "java.version" );
+    public final static String       OS_ARCH                          = System.getProperty( "os.arch" );
+    public final static String       OS_NAME                          = System.getProperty( "os.name" );
+    public final static String       OS_VERSION                       = System.getProperty( "os.version" );
+    public final static Pattern      PARANTHESESABLE_NH_CHARS_PATTERN = Pattern.compile( "[(),;\\s]" );
+    public final static double       ZERO_DIFF                        = 1.0E-9;
+    public static final BigDecimal   NULL_BD                          = new BigDecimal( 0 );
+    public static final NumberFormat FORMATTER_9;
+    public static final NumberFormat FORMATTER_6;
+    public static final NumberFormat FORMATTER_06;
+    public static final NumberFormat FORMATTER_3;
+    static {
+        final DecimalFormatSymbols dfs = new DecimalFormatSymbols();
+        dfs.setDecimalSeparator( '.' );
+        // dfs.setGroupingSeparator( ( char ) 0 );
+        FORMATTER_9 = new DecimalFormat( "#.#########", dfs );
+        FORMATTER_6 = new DecimalFormat( "#.######", dfs );
+        FORMATTER_06 = new DecimalFormat( "0.######", dfs );
+        FORMATTER_3 = new DecimalFormat( "#.###", dfs );
+    }
+
+    private ForesterUtil() {
+    }
+
+    final public static void appendSeparatorIfNotEmpty( final StringBuffer sb, final char separator ) {
+        if ( sb.length() > 0 ) {
+            sb.append( separator );
+        }
+    }
+
+    final public static boolean isEmpty( final List<?> l ) {
+        if ( ( l == null ) || l.isEmpty() ) {
+            return true;
+        }
+        for( final Object o : l ) {
+            if ( o != null ) {
+                return false;
+            }
+        }
+        return true;
+    }
+
+    final public static boolean isEmpty( final Set<?> s ) {
+        if ( ( s == null ) || s.isEmpty() ) {
+            return true;
+        }
+        for( final Object o : s ) {
+            if ( o != null ) {
+                return false;
+            }
+        }
+        return true;
+    }
+
+    /**
+     * This calculates a color. If value is equal to min the returned color is
+     * minColor, if value is equal to max the returned color is maxColor,
+     * otherwise a color 'proportional' to value is returned.
+     * 
+     * @param value
+     *            the value 
+     * @param min
+     *            the smallest value 
+     * @param max
+     *            the largest value 
+     * @param minColor
+     *            the color for min
+     * @param maxColor
+     *            the color for max
+     * @return a Color
+     */
+    final public static Color calcColor( double value,
+                                         final double min,
+                                         final double max,
+                                         final Color minColor,
+                                         final Color maxColor ) {
+        if ( value < min ) {
+            value = min;
+        }
+        if ( value > max ) {
+            value = max;
+        }
+        final double x = ForesterUtil.calculateColorFactor( value, max, min );
+        final int red = ForesterUtil.calculateColorComponent( minColor.getRed(), maxColor.getRed(), x );
+        final int green = ForesterUtil.calculateColorComponent( minColor.getGreen(), maxColor.getGreen(), x );
+        final int blue = ForesterUtil.calculateColorComponent( minColor.getBlue(), maxColor.getBlue(), x );
+        return new Color( red, green, blue );
+    }
+
+    /**
+     * This calculates a color. If value is equal to min the returned color is
+     * minColor, if value is equal to max the returned color is maxColor, if
+     * value is equal to mean the returned color is meanColor, otherwise a color
+     * 'proportional' to value is returned -- either between min-mean or
+     * mean-max
+     * 
+     * @param value
+     *            the value
+     * @param min
+     *            the smallest value
+     * @param max
+     *            the largest value 
+     * @param mean
+     *            the mean/median value 
+     * @param minColor
+     *            the color for min
+     * @param maxColor
+     *            the color for max
+     * @param meanColor
+     *            the color for mean
+     * @return a Color
+     */
+    final public static Color calcColor( double value,
+                                         final double min,
+                                         final double max,
+                                         final double mean,
+                                         final Color minColor,
+                                         final Color maxColor,
+                                         final Color meanColor ) {
+        if ( value < min ) {
+            value = min;
+        }
+        if ( value > max ) {
+            value = max;
+        }
+        if ( value < mean ) {
+            final double x = ForesterUtil.calculateColorFactor( value, mean, min );
+            final int red = ForesterUtil.calculateColorComponent( minColor.getRed(), meanColor.getRed(), x );
+            final int green = ForesterUtil.calculateColorComponent( minColor.getGreen(), meanColor.getGreen(), x );
+            final int blue = ForesterUtil.calculateColorComponent( minColor.getBlue(), meanColor.getBlue(), x );
+            return new Color( red, green, blue );
+        }
+        else if ( value > mean ) {
+            final double x = ForesterUtil.calculateColorFactor( value, max, mean );
+            final int red = ForesterUtil.calculateColorComponent( meanColor.getRed(), maxColor.getRed(), x );
+            final int green = ForesterUtil.calculateColorComponent( meanColor.getGreen(), maxColor.getGreen(), x );
+            final int blue = ForesterUtil.calculateColorComponent( meanColor.getBlue(), maxColor.getBlue(), x );
+            return new Color( red, green, blue );
+        }
+        else {
+            return meanColor;
+        }
+    }
+
+    /**
+     * Helper method for calcColor methods.
+     * 
+     * @param smallercolor_component_x
+     *            color component the smaller color
+     * @param largercolor_component_x
+     *            color component the larger color
+     * @param x
+     *            factor
+     * @return an int representing a color component
+     */
+    final private static int calculateColorComponent( final double smallercolor_component_x,
+                                                      final double largercolor_component_x,
+                                                      final double x ) {
+        return ( int ) ( smallercolor_component_x + ( ( x * ( largercolor_component_x - smallercolor_component_x ) ) / 255.0 ) );
+    }
+
+    /**
+     * Helper method for calcColor methods.
+     * 
+     * 
+     * @param value
+     *            the value
+     * @param larger
+     *            the largest value
+     * @param smaller
+     *            the smallest value
+     * @return a normalized value between larger and smaller
+     */
+    final private static double calculateColorFactor( final double value, final double larger, final double smaller ) {
+        return ( 255.0 * ( value - smaller ) ) / ( larger - smaller );
+    }
+
+    final public static String collapseWhiteSpace( final String s ) {
+        return s.replaceAll( "[\\s]+", " " );
+    }
+
+    final public static String colorToHex( final Color color ) {
+        final String rgb = Integer.toHexString( color.getRGB() );
+        return rgb.substring( 2, rgb.length() );
+    }
+
+    synchronized public static void copyFile( final File in, final File out ) throws IOException {
+        final FileInputStream in_s = new FileInputStream( in );
+        final FileOutputStream out_s = new FileOutputStream( out );
+        try {
+            final byte[] buf = new byte[ 1024 ];
+            int i = 0;
+            while ( ( i = in_s.read( buf ) ) != -1 ) {
+                out_s.write( buf, 0, i );
+            }
+        }
+        catch ( final IOException e ) {
+            throw e;
+        }
+        finally {
+            if ( in_s != null ) {
+                in_s.close();
+            }
+            if ( out_s != null ) {
+                out_s.close();
+            }
+        }
+    }
+
+    final public static int countChars( final String str, final char c ) {
+        int count = 0;
+        for( int i = 0; i < str.length(); ++i ) {
+            if ( str.charAt( i ) == c ) {
+                ++count;
+            }
+        }
+        return count;
+    }
+
+    final public static BufferedWriter createBufferedWriter( final File file ) throws IOException {
+        if ( file.exists() ) {
+            throw new IOException( "[" + file + "] already exists" );
+        }
+        return new BufferedWriter( new FileWriter( file ) );
+    }
+
+    final public static BufferedWriter createBufferedWriter( final String name ) throws IOException {
+        return new BufferedWriter( new FileWriter( createFileForWriting( name ) ) );
+    }
+
+    final public static File createFileForWriting( final String name ) throws IOException {
+        final File file = new File( name );
+        if ( file.exists() ) {
+            throw new IOException( "[" + name + "] already exists" );
+        }
+        return file;
+    }
+
+    final public static PhylogenyParser createParserDependingFileContents( final File file,
+                                                                           final boolean phyloxml_validate_against_xsd )
+            throws FileNotFoundException, IOException {
+        PhylogenyParser parser = null;
+        final String first_line = ForesterUtil.getFirstLine( file ).trim().toLowerCase();
+        if ( first_line.startsWith( "<" ) ) {
+            parser = new PhyloXmlParser();
+            if ( phyloxml_validate_against_xsd ) {
+                final ClassLoader cl = PhyloXmlParser.class.getClassLoader();
+                final URL xsd_url = cl.getResource( ForesterConstants.LOCAL_PHYLOXML_XSD_RESOURCE );
+                if ( xsd_url != null ) {
+                    ( ( PhyloXmlParser ) parser ).setValidateAgainstSchema( xsd_url.toString() );
+                }
+                else {
+                    if ( ForesterConstants.RELEASE ) {
+                        throw new RuntimeException( "failed to get URL for phyloXML XSD from jar file from ["
+                                + ForesterConstants.LOCAL_PHYLOXML_XSD_RESOURCE + "]" );
+                    }
+                }
+            }
+        }
+        else if ( ( first_line.startsWith( "nexus" ) ) || ( first_line.startsWith( "#nexus" ) )
+                || ( first_line.startsWith( "# nexus" ) ) || ( first_line.startsWith( "begin" ) ) ) {
+            parser = new NexusPhylogeniesParser();
+        }
+        else {
+            parser = new NHXParser();
+        }
+        return parser;
+    }
+
+    final public static PhylogenyParser createParserDependingOnFileType( final File file,
+                                                                         final boolean phyloxml_validate_against_xsd )
+            throws FileNotFoundException, IOException {
+        PhylogenyParser parser = null;
+        parser = createParserDependingOnSuffix( file.getName(), phyloxml_validate_against_xsd );
+        if ( parser == null ) {
+            parser = createParserDependingFileContents( file, phyloxml_validate_against_xsd );
+        }
+        return parser;
+    }
+
+    /**
+     * Return null if it can not guess the parser to use based on name suffix.
+     * 
+     * @param filename
+     * @return
+     */
+    final public static PhylogenyParser createParserDependingOnSuffix( final String filename,
+                                                                       final boolean phyloxml_validate_against_xsd ) {
+        PhylogenyParser parser = null;
+        final String filename_lc = filename.toLowerCase();
+        if ( filename_lc.endsWith( ".tol" ) || filename_lc.endsWith( ".tolxml" ) || filename_lc.endsWith( ".tol.zip" ) ) {
+            parser = new TolParser();
+        }
+        else if ( filename_lc.endsWith( ".xml" ) || filename_lc.endsWith( ".px" ) || filename_lc.endsWith( "phyloxml" )
+                || filename_lc.endsWith( ".zip" ) ) {
+            parser = new PhyloXmlParser();
+            if ( phyloxml_validate_against_xsd ) {
+                final ClassLoader cl = PhyloXmlParser.class.getClassLoader();
+                final URL xsd_url = cl.getResource( ForesterConstants.LOCAL_PHYLOXML_XSD_RESOURCE );
+                if ( xsd_url != null ) {
+                    ( ( PhyloXmlParser ) parser ).setValidateAgainstSchema( xsd_url.toString() );
+                }
+                else {
+                    if ( ForesterConstants.RELEASE ) {
+                        throw new RuntimeException( "failed to get URL for phyloXML XSD from jar file from ["
+                                + ForesterConstants.LOCAL_PHYLOXML_XSD_RESOURCE + "]" );
+                    }
+                }
+            }
+        }
+        else if ( filename_lc.endsWith( ".nexus" ) || filename_lc.endsWith( ".nex" ) || filename_lc.endsWith( ".nx" ) ) {
+            parser = new NexusPhylogeniesParser();
+        }
+        else if ( filename_lc.endsWith( ".nhx" ) || filename_lc.endsWith( ".nh" ) || filename_lc.endsWith( ".newick" ) ) {
+            parser = new NHXParser();
+        }
+        return parser;
+    }
+
+    final public static PhylogenyParser createParserDependingOnUrlContents( final URL url,
+                                                                            final boolean phyloxml_validate_against_xsd )
+            throws FileNotFoundException, IOException {
+        final String lc_filename = url.getFile().toString().toLowerCase();
+        PhylogenyParser parser = createParserDependingOnSuffix( lc_filename, phyloxml_validate_against_xsd );
+        if ( ( parser != null ) && lc_filename.endsWith( ".zip" ) ) {
+            if ( parser instanceof PhyloXmlParser ) {
+                ( ( PhyloXmlParser ) parser ).setZippedInputstream( true );
+            }
+            else if ( parser instanceof TolParser ) {
+                ( ( TolParser ) parser ).setZippedInputstream( true );
+            }
+        }
+        if ( parser == null ) {
+            final String first_line = getFirstLine( url ).trim().toLowerCase();
+            if ( first_line.startsWith( "<" ) ) {
+                parser = new PhyloXmlParser();
+                if ( phyloxml_validate_against_xsd ) {
+                    final ClassLoader cl = PhyloXmlParser.class.getClassLoader();
+                    final URL xsd_url = cl.getResource( ForesterConstants.LOCAL_PHYLOXML_XSD_RESOURCE );
+                    if ( xsd_url != null ) {
+                        ( ( PhyloXmlParser ) parser ).setValidateAgainstSchema( xsd_url.toString() );
+                    }
+                    else {
+                        throw new RuntimeException( "failed to get URL for phyloXML XSD from jar file from ["
+                                + ForesterConstants.LOCAL_PHYLOXML_XSD_RESOURCE + "]" );
+                    }
+                }
+            }
+            else if ( ( first_line.startsWith( "nexus" ) ) || ( first_line.startsWith( "#nexus" ) )
+                    || ( first_line.startsWith( "# nexus" ) ) || ( first_line.startsWith( "begin" ) ) ) {
+                parser = new NexusPhylogeniesParser();
+            }
+            else {
+                parser = new NHXParser();
+            }
+        }
+        return parser;
+    }
+
+    final public static void ensurePresenceOfDate( final PhylogenyNode node ) {
+        if ( !node.getNodeData().isHasDate() ) {
+            node.getNodeData().setDate( new org.forester.phylogeny.data.Date() );
+        }
+    }
+
+    final public static void ensurePresenceOfDistribution( final PhylogenyNode node ) {
+        if ( !node.getNodeData().isHasDistribution() ) {
+            node.getNodeData().setDistribution( new Distribution( "" ) );
+        }
+    }
+
+    public static void ensurePresenceOfSequence( final PhylogenyNode node ) {
+        if ( !node.getNodeData().isHasSequence() ) {
+            node.getNodeData().setSequence( new Sequence() );
+        }
+    }
+
+    public static void ensurePresenceOfTaxonomy( final PhylogenyNode node ) {
+        if ( !node.getNodeData().isHasTaxonomy() ) {
+            node.getNodeData().setTaxonomy( new Taxonomy() );
+        }
+    }
+
+    /**
+     * Extracts a code if and only if:
+     * one and only one _, 
+     * shorter than 25, 
+     * no |, 
+     * no ., 
+     * if / present it has to be after the _, 
+     * if PFAM_STYLE_ONLY: / must be present,
+     * tax code can only contain uppercase letters and numbers,
+     * and must contain at least one uppercase letter.
+     * Return null if no code extractable.
+     * 
+     * @param name
+     * @param limit_to_five
+     * @return
+     */
+    public static String extractTaxonomyCodeFromNodeName( final String name,
+                                                          final boolean limit_to_five,
+                                                          final ForesterUtil.TAXONOMY_EXTRACTION taxonomy_extraction ) {
+        if ( ( name.indexOf( "_" ) > 0 )
+                && ( name.length() < 25 )
+                && ( name.lastIndexOf( "_" ) == name.indexOf( "_" ) )
+                && ( name.indexOf( "|" ) < 0 )
+                && ( name.indexOf( "." ) < 0 )
+                && ( ( taxonomy_extraction != ForesterUtil.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ) || ( name
+                        .indexOf( "/" ) >= 0 ) )
+                && ( ( ( name.indexOf( "/" ) ) < 0 ) || ( name.indexOf( "/" ) > name.indexOf( "_" ) ) ) ) {
+            final String[] s = name.split( "[_/]" );
+            if ( s.length > 1 ) {
+                String str = s[ 1 ];
+                if ( limit_to_five ) {
+                    if ( str.length() > 5 ) {
+                        str = str.substring( 0, 5 );
+                    }
+                    else if ( ( str.length() < 5 ) && ( str.startsWith( "RAT" ) || str.startsWith( "PIG" ) ) ) {
+                        str = str.substring( 0, 3 );
+                    }
+                }
+                final Matcher letters_and_numbers = NHXParser.UC_LETTERS_NUMBERS_PATTERN.matcher( str );
+                if ( !letters_and_numbers.matches() ) {
+                    return null;
+                }
+                final Matcher numbers_only = NHXParser.NUMBERS_ONLY_PATTERN.matcher( str );
+                if ( numbers_only.matches() ) {
+                    return null;
+                }
+                return str;
+            }
+        }
+        return null;
+    }
+
+    public static void fatalError( final String prg_name, final String message ) {
+        System.err.println();
+        System.err.println( "[" + prg_name + "] > " + message );
+        System.err.println();
+        System.exit( -1 );
+    }
+
+    public static String[] file2array( final File file ) throws IOException {
+        final List<String> list = file2list( file );
+        final String[] ary = new String[ list.size() ];
+        int i = 0;
+        for( final String s : list ) {
+            ary[ i++ ] = s;
+        }
+        return ary;
+    }
+
+    final public static List<String> file2list( final File file ) throws IOException {
+        final List<String> list = new ArrayList<String>();
+        final BufferedReader in = new BufferedReader( new FileReader( file ) );
+        String str;
+        while ( ( str = in.readLine() ) != null ) {
+            str = str.trim();
+            if ( ( str.length() > 0 ) && !str.startsWith( "#" ) ) {
+                for( final String s : splitString( str ) ) {
+                    list.add( s );
+                }
+            }
+        }
+        in.close();
+        return list;
+    }
+
+    final public static SortedSet<String> file2set( final File file ) throws IOException {
+        final SortedSet<String> set = new TreeSet<String>();
+        final BufferedReader in = new BufferedReader( new FileReader( file ) );
+        String str;
+        while ( ( str = in.readLine() ) != null ) {
+            str = str.trim();
+            if ( ( str.length() > 0 ) && !str.startsWith( "#" ) ) {
+                for( final String s : splitString( str ) ) {
+                    set.add( s );
+                }
+            }
+        }
+        in.close();
+        return set;
+    }
+
+    final public static String getCurrentDateTime() {
+        final DateFormat format = new SimpleDateFormat( "yyyy/MM/dd HH:mm:ss" );
+        return format.format( new Date() );
+    }
+
+    final public static String getFileSeparator() {
+        return ForesterUtil.FILE_SEPARATOR;
+    }
+
+    final public static String getFirstLine( final Object source ) throws FileNotFoundException, IOException {
+        BufferedReader reader = null;
+        if ( source instanceof File ) {
+            final File f = ( File ) source;
+            if ( !f.exists() ) {
+                throw new IOException( "[" + f.getAbsolutePath() + "] does not exist" );
+            }
+            else if ( !f.isFile() ) {
+                throw new IOException( "[" + f.getAbsolutePath() + "] is not a file" );
+            }
+            else if ( !f.canRead() ) {
+                throw new IOException( "[" + f.getAbsolutePath() + "] is not a readable" );
+            }
+            reader = new BufferedReader( new FileReader( f ) );
+        }
+        else if ( source instanceof InputStream ) {
+            reader = new BufferedReader( new InputStreamReader( ( InputStream ) source ) );
+        }
+        else if ( source instanceof String ) {
+            reader = new BufferedReader( new StringReader( ( String ) source ) );
+        }
+        else if ( source instanceof StringBuffer ) {
+            reader = new BufferedReader( new StringReader( source.toString() ) );
+        }
+        else if ( source instanceof URL ) {
+            reader = new BufferedReader( new InputStreamReader( ( ( URL ) source ).openStream() ) );
+        }
+        else {
+            throw new IllegalArgumentException( "dont know how to read [" + source.getClass() + "]" );
+        }
+        String line;
+        while ( ( line = reader.readLine() ) != null ) {
+            line = line.trim();
+            if ( !ForesterUtil.isEmpty( line ) ) {
+                if ( reader != null ) {
+                    reader.close();
+                }
+                return line;
+            }
+        }
+        if ( reader != null ) {
+            reader.close();
+        }
+        return line;
+    }
+
+    final public static String getLineSeparator() {
+        return ForesterUtil.LINE_SEPARATOR;
+    }
+
+    /**
+     * Returns all custom data tag names of this Phylogeny as Hashtable. Tag
+     * names are keys, values are Boolean set to false.
+     */
+    final public static Hashtable<String, Boolean> getPropertyRefs( final Phylogeny phylogeny ) {
+        final Hashtable<String, Boolean> ht = new Hashtable<String, Boolean>();
+        if ( phylogeny.isEmpty() ) {
+            return ht;
+        }
+        for( final PhylogenyNodeIterator iter = phylogeny.iteratorPreorder(); iter.hasNext(); ) {
+            final PhylogenyNode current_node = iter.next();
+            if ( current_node.getNodeData().isHasProperties() ) {
+                final String[] tags = current_node.getNodeData().getProperties().getPropertyRefs();
+                for( int i = 0; i < tags.length; ++i ) {
+                    ht.put( tags[ i ], new Boolean( false ) );
+                }
+            }
+        }
+        return ht;
+    }
+
+    final public static void increaseCountingMap( final Map<String, Integer> counting_map, final String item_name ) {
+        if ( !counting_map.containsKey( item_name ) ) {
+            counting_map.put( item_name, 1 );
+        }
+        else {
+            counting_map.put( item_name, counting_map.get( item_name ) + 1 );
+        }
+    }
+
+    final static public boolean isAllNonEmptyInternalLabelsArePositiveNumbers( final Phylogeny phy ) {
+        final PhylogenyNodeIterator it = phy.iteratorPostorder();
+        while ( it.hasNext() ) {
+            final PhylogenyNode n = it.next();
+            if ( !n.isRoot() && !n.isExternal() ) {
+                if ( !ForesterUtil.isEmpty( n.getName() ) ) {
+                    double d = -1.0;
+                    try {
+                        d = Double.parseDouble( n.getName() );
+                    }
+                    catch ( final Exception e ) {
+                        d = -1.0;
+                    }
+                    if ( d < 0.0 ) {
+                        return false;
+                    }
+                }
+            }
+        }
+        return true;
+    }
+
+    final public static boolean isEmpty( final String s ) {
+        return ( ( s == null ) || ( s.length() < 1 ) );
+    }
+
+    final public static boolean isEqual( final double a, final double b ) {
+        return ( ( Math.abs( a - b ) ) < ZERO_DIFF );
+    }
+
+    final public static boolean isEven( final int n ) {
+        return n % 2 == 0;
+    }
+
+    final static public boolean isHasAtLeastNodeWithEvent( final Phylogeny phy ) {
+        final PhylogenyNodeIterator it = phy.iteratorPostorder();
+        while ( it.hasNext() ) {
+            if ( it.next().getNodeData().isHasEvent() ) {
+                return true;
+            }
+        }
+        return false;
+    }
+
+    /**
+     * Returns true if at least one branch has a length larger than zero.
+     * 
+     * 
+     * @param phy
+     */
+    final static public boolean isHasAtLeastOneBranchLengthLargerThanZero( final Phylogeny phy ) {
+        final PhylogenyNodeIterator it = phy.iteratorPostorder();
+        while ( it.hasNext() ) {
+            if ( it.next().getDistanceToParent() > 0.0 ) {
+                return true;
+            }
+        }
+        return false;
+    }
+
+    final static public boolean isHasAtLeastOneBranchWithSupportValues( final Phylogeny phy ) {
+        final PhylogenyNodeIterator it = phy.iteratorPostorder();
+        while ( it.hasNext() ) {
+            if ( it.next().getBranchData().isHasConfidences() ) {
+                return true;
+            }
+        }
+        return false;
+    }
+
+    /**
+     * This determines whether String[] a and String[] b have at least one
+     * String in common (intersect). Returns false if at least one String[] is
+     * null or empty.
+     * 
+     * @param a
+     *            a String[] b a String[]
+     * @return true if both a and b or not empty or null and contain at least
+     *         one element in common false otherwise
+     */
+    final public static boolean isIntersecting( final String[] a, final String[] b ) {
+        if ( ( a == null ) || ( b == null ) ) {
+            return false;
+        }
+        if ( ( a.length < 1 ) || ( b.length < 1 ) ) {
+            return false;
+        }
+        for( int i = 0; i < a.length; ++i ) {
+            final String ai = a[ i ];
+            for( int j = 0; j < b.length; ++j ) {
+                if ( ( ai != null ) && ( b[ j ] != null ) && ai.equals( b[ j ] ) ) {
+                    return true;
+                }
+            }
+        }
+        return false;
+    }
+
+    final public static double isLargerOrEqualToZero( final double d ) {
+        if ( d > 0.0 ) {
+            return d;
+        }
+        else {
+            return 0.0;
+        }
+    }
+
+    final public static boolean isNull( final BigDecimal s ) {
+        return ( ( s == null ) || ( s.compareTo( NULL_BD ) == 0 ) );
+    }
+
+    final public static String isReadableFile( final File f ) {
+        if ( !f.exists() ) {
+            return "file [" + f + "] does not exist";
+        }
+        if ( f.isDirectory() ) {
+            return "[" + f + "] is a directory";
+        }
+        if ( !f.isFile() ) {
+            return "[" + f + "] is not a file";
+        }
+        if ( !f.canRead() ) {
+            return "file [" + f + "] is not readable";
+        }
+        if ( f.length() < 1 ) {
+            return "file [" + f + "] is empty";
+        }
+        return null;
+    }
+
+    final public static String isReadableFile( final String s ) {
+        return isReadableFile( new File( s ) );
+    }
+
+    final public static String isWritableFile( final File f ) {
+        if ( f.isDirectory() ) {
+            return "[" + f + "] is a directory";
+        }
+        if ( f.exists() ) {
+            return "[" + f + "] already exists";
+        }
+        return null;
+    }
+
+    /**
+     * Helper for method "stringToColor".
+     * <p>
+     * (Last modified: 12/20/03)
+     */
+    final public static int limitRangeForColor( int i ) {
+        if ( i > 255 ) {
+            i = 255;
+        }
+        else if ( i < 0 ) {
+            i = 0;
+        }
+        return i;
+    }
+
+    final public static SortedMap<Object, Integer> listToSortedCountsMap( final List list ) {
+        final SortedMap<Object, Integer> map = new TreeMap<Object, Integer>();
+        for( final Object key : list ) {
+            if ( !map.containsKey( key ) ) {
+                map.put( key, 1 );
+            }
+            else {
+                map.put( key, map.get( key ) + 1 );
+            }
+        }
+        return map;
+    }
+
+    final public static StringBuffer mapToStringBuffer( final Map map, final String key_value_separator ) {
+        final StringBuffer sb = new StringBuffer();
+        for( final Iterator iter = map.keySet().iterator(); iter.hasNext(); ) {
+            final Object key = iter.next();
+            sb.append( key.toString() );
+            sb.append( key_value_separator );
+            sb.append( map.get( key ).toString() );
+            sb.append( ForesterUtil.getLineSeparator() );
+        }
+        return sb;
+    }
+
+    final public static String normalizeString( final String s,
+                                                final int length,
+                                                final boolean left_pad,
+                                                final char pad_char ) {
+        if ( s.length() > length ) {
+            return s.substring( 0, length );
+        }
+        else {
+            final StringBuffer pad = new StringBuffer( length - s.length() );
+            for( int i = 0; i < ( length - s.length() ); ++i ) {
+                pad.append( pad_char );
+            }
+            if ( left_pad ) {
+                return pad + s;
+            }
+            else {
+                return s + pad;
+            }
+        }
+    }
+
+    final public static BufferedReader obtainReader( final Object source ) throws IOException, FileNotFoundException {
+        BufferedReader reader = null;
+        if ( source instanceof File ) {
+            final File f = ( File ) source;
+            if ( !f.exists() ) {
+                throw new IOException( "\"" + f.getAbsolutePath() + "\" does not exist" );
+            }
+            else if ( !f.isFile() ) {
+                throw new IOException( "\"" + f.getAbsolutePath() + "\" is not a file" );
+            }
+            else if ( !f.canRead() ) {
+                throw new IOException( "\"" + f.getAbsolutePath() + "\" is not a readable" );
+            }
+            reader = new BufferedReader( new FileReader( f ) );
+        }
+        else if ( source instanceof InputStream ) {
+            reader = new BufferedReader( new InputStreamReader( ( InputStream ) source ) );
+        }
+        else if ( source instanceof String ) {
+            reader = new BufferedReader( new StringReader( ( String ) source ) );
+        }
+        else if ( source instanceof StringBuffer ) {
+            reader = new BufferedReader( new StringReader( source.toString() ) );
+        }
+        else {
+            throw new IllegalArgumentException( "attempt to parse object of type [" + source.getClass()
+                    + "] (can only parse objects of type File, InputStream, String, or StringBuffer)" );
+        }
+        return reader;
+    }
+
+    final public static StringBuffer pad( final double number, final int size, final char pad, final boolean left_pad ) {
+        return pad( new StringBuffer( number + "" ), size, pad, left_pad );
+    }
+
+    final public static StringBuffer pad( final String string, final int size, final char pad, final boolean left_pad ) {
+        return pad( new StringBuffer( string ), size, pad, left_pad );
+    }
+
+    final public static StringBuffer pad( final StringBuffer string,
+                                          final int size,
+                                          final char pad,
+                                          final boolean left_pad ) {
+        final StringBuffer padding = new StringBuffer();
+        final int s = size - string.length();
+        if ( s < 1 ) {
+            return new StringBuffer( string.substring( 0, size ) );
+        }
+        for( int i = 0; i < s; ++i ) {
+            padding.append( pad );
+        }
+        if ( left_pad ) {
+            return padding.append( string );
+        }
+        else {
+            return string.append( padding );
+        }
+    }
+
+    final public static double parseDouble( final String str ) throws ParseException {
+        if ( ForesterUtil.isEmpty( str ) ) {
+            return 0.0;
+        }
+        return Double.parseDouble( str );
+    }
+
+    final public static int parseInt( final String str ) throws ParseException {
+        if ( ForesterUtil.isEmpty( str ) ) {
+            return 0;
+        }
+        return Integer.parseInt( str );
+    }
+
+    final public static void postOrderRelabelInternalNodes( final Phylogeny phylogeny, final int starting_number ) {
+        int i = starting_number;
+        for( final PhylogenyNodeIterator it = phylogeny.iteratorPostorder(); it.hasNext(); ) {
+            final PhylogenyNode node = it.next();
+            if ( !node.isExternal() ) {
+                node.setName( String.valueOf( i++ ) );
+            }
+        }
+    }
+
+    final public static void printArray( final Object[] a ) {
+        for( int i = 0; i < a.length; ++i ) {
+            System.out.println( "[" + i + "]=" + a[ i ] );
+        }
+    }
+
+    final public static void printCountingMap( final Map<String, Integer> counting_map ) {
+        for( final String key : counting_map.keySet() ) {
+            System.out.println( key + ": " + counting_map.get( key ) );
+        }
+    }
+
+    final public static void printErrorMessage( final String prg_name, final String message ) {
+        System.out.println( "[" + prg_name + "] > error: " + message );
+    }
+
+    final public static void printProgramInformation( final String prg_name, final String prg_version, final String date ) {
+        final int l = prg_name.length() + prg_version.length() + date.length() + 4;
+        System.out.println();
+        System.out.println( prg_name + " " + prg_version + " (" + date + ")" );
+        for( int i = 0; i < l; ++i ) {
+            System.out.print( "_" );
+        }
+        System.out.println();
+    }
+
+    final public static void printProgramInformation( final String prg_name,
+                                                      final String prg_version,
+                                                      final String date,
+                                                      final String email,
+                                                      final String www ) {
+        final int l = prg_name.length() + prg_version.length() + date.length() + 4;
+        System.out.println();
+        System.out.println( prg_name + " " + prg_version + " (" + date + ")" );
+        for( int i = 0; i < l; ++i ) {
+            System.out.print( "_" );
+        }
+        System.out.println();
+        System.out.println();
+        System.out.println( "WWW    : " + www );
+        System.out.println( "Contact: " + email );
+        if ( !ForesterUtil.isEmpty( ForesterUtil.JAVA_VERSION ) && !ForesterUtil.isEmpty( ForesterUtil.JAVA_VENDOR ) ) {
+            System.out.println();
+            System.out.println( "[running on Java " + ForesterUtil.JAVA_VERSION + " " + ForesterUtil.JAVA_VENDOR + "]" );
+        }
+        System.out.println();
+    }
+
+    final public static void printWarningMessage( final String prg_name, final String message ) {
+        System.out.println( "[" + prg_name + "] > warning: " + message );
+    }
+
+    final public static void programMessage( final String prg_name, final String message ) {
+        System.out.println( "[" + prg_name + "] > " + message );
+    }
+
+    final public static String removeSuffix( final String file_name ) {
+        final int i = file_name.lastIndexOf( '.' );
+        if ( i > 1 ) {
+            return file_name.substring( 0, i );
+        }
+        return file_name;
+    }
+
+    /**
+     * Removes all white space from String s.
+     * 
+     * @return String s with white space removed
+     */
+    final public static String removeWhiteSpace( String s ) {
+        int i;
+        for( i = 0; i <= s.length() - 1; i++ ) {
+            if ( ( s.charAt( i ) == ' ' ) || ( s.charAt( i ) == '\t' ) || ( s.charAt( i ) == '\n' )
+                    || ( s.charAt( i ) == '\r' ) ) {
+                s = s.substring( 0, i ) + s.substring( i + 1 );
+                i--;
+            }
+        }
+        return s;
+    }
+
+    final public static boolean isContainsParanthesesableNhCharacter( final String nh ) {
+        return PARANTHESESABLE_NH_CHARS_PATTERN.matcher( nh ).find();
+    }
+
+    final public static String replaceIllegalNhCharacters( final String nh ) {
+        if ( nh == null ) {
+            return "";
+        }
+        return nh.trim().replaceAll( "[\\[\\]:]+", "_" );
+    }
+
+    final public static String replaceIllegalNhxCharacters( final String nhx ) {
+        if ( nhx == null ) {
+            return "";
+        }
+        return nhx.trim().replaceAll( "[\\[\\](),:;\\s]+", "_" );
+    }
+
+    final public static double round( final double value, final int decimal_place ) {
+        BigDecimal bd = new BigDecimal( value );
+        bd = bd.setScale( decimal_place, BigDecimal.ROUND_HALF_UP );
+        return bd.doubleValue();
+    }
+
+    /**
+     * Rounds d to an int.
+     */
+    final public static int roundToInt( final double d ) {
+        return ( int ) ( d + 0.5 );
+    }
+
+    final public static int roundToInt( final float f ) {
+        return ( int ) ( f + 0.5f );
+    }
+
+    final public static String sanitizeString( final String s ) {
+        if ( s == null ) {
+            return "";
+        }
+        else {
+            return s.trim();
+        }
+    }
+
+    final private static String[] splitString( final String str ) {
+        final String regex = "[\\s;,]+";
+        return str.split( regex );
+    }
+
+    final public static String stringArrayToString( final String[] a ) {
+        final StringBuffer sb = new StringBuffer();
+        if ( ( a != null ) && ( a.length > 0 ) ) {
+            for( int i = 0; i < a.length - 1; ++i ) {
+                sb.append( a[ i ] + ", " );
+            }
+            sb.append( a[ a.length - 1 ] );
+        }
+        return sb.toString();
+    }
+
+    final static public void transferInternalNamesToBootstrapSupport( final Phylogeny phy ) {
+        final PhylogenyNodeIterator it = phy.iteratorPostorder();
+        while ( it.hasNext() ) {
+            final PhylogenyNode n = it.next();
+            if ( !n.isExternal() && !ForesterUtil.isEmpty( n.getName() ) ) {
+                double value = -1;
+                try {
+                    value = Double.parseDouble( n.getName() );
+                }
+                catch ( final NumberFormatException e ) {
+                    throw new IllegalArgumentException( "failed to parse number from [" + n.getName() + "]: "
+                            + e.getLocalizedMessage() );
+                }
+                if ( value >= 0.0 ) {
+                    n.getBranchData().addConfidence( new Confidence( value, "bootstrap" ) );
+                    n.setName( "" );
+                }
+            }
+        }
+    }
+
+    final static public void transferInternalNodeNamesToConfidence( final Phylogeny phy ) {
+        final PhylogenyNodeIterator it = phy.iteratorPostorder();
+        while ( it.hasNext() ) {
+            final PhylogenyNode n = it.next();
+            if ( !n.isRoot() && !n.isExternal() && !n.getBranchData().isHasConfidences() ) {
+                if ( !ForesterUtil.isEmpty( n.getName() ) ) {
+                    double d = -1.0;
+                    try {
+                        d = Double.parseDouble( n.getName() );
+                    }
+                    catch ( final Exception e ) {
+                        d = -1.0;
+                    }
+                    if ( d >= 0.0 ) {
+                        n.getBranchData().addConfidence( new Confidence( d, "" ) );
+                        n.setName( "" );
+                    }
+                }
+            }
+        }
+    }
+
+    final static public void transferNodeNameToField( final Phylogeny phy, final PhylogenyNodeField field ) {
+        final PhylogenyNodeIterator it = phy.iteratorPostorder();
+        while ( it.hasNext() ) {
+            final PhylogenyNode n = it.next();
+            final String name = n.getName().trim();
+            if ( !ForesterUtil.isEmpty( name ) ) {
+                switch ( field ) {
+                    case TAXONOMY_CODE:
+                        //temp hack
+                        //                        if ( name.length() > 5 ) {
+                        //                            n.setName( "" );
+                        //                            if ( !n.getNodeData().isHasTaxonomy() ) {
+                        //                                n.getNodeData().setTaxonomy( new Taxonomy() );
+                        //                            }
+                        //                            n.getNodeData().getTaxonomy().setScientificName( name );
+                        //                            break;
+                        //                        }
+                        //
+                        n.setName( "" );
+                        PhylogenyMethods.setTaxonomyCode( n, name );
+                        break;
+                    case TAXONOMY_SCIENTIFIC_NAME:
+                        n.setName( "" );
+                        if ( !n.getNodeData().isHasTaxonomy() ) {
+                            n.getNodeData().setTaxonomy( new Taxonomy() );
+                        }
+                        n.getNodeData().getTaxonomy().setScientificName( name );
+                        break;
+                    case TAXONOMY_COMMON_NAME:
+                        n.setName( "" );
+                        if ( !n.getNodeData().isHasTaxonomy() ) {
+                            n.getNodeData().setTaxonomy( new Taxonomy() );
+                        }
+                        n.getNodeData().getTaxonomy().setCommonName( name );
+                        break;
+                    case SEQUENCE_SYMBOL:
+                        n.setName( "" );
+                        if ( !n.getNodeData().isHasSequence() ) {
+                            n.getNodeData().setSequence( new Sequence() );
+                        }
+                        n.getNodeData().getSequence().setSymbol( name );
+                        break;
+                    case SEQUENCE_NAME:
+                        n.setName( "" );
+                        if ( !n.getNodeData().isHasSequence() ) {
+                            n.getNodeData().setSequence( new Sequence() );
+                        }
+                        n.getNodeData().getSequence().setName( name );
+                        break;
+                }
+            }
+        }
+    }
+
+    final public static void unexpectedFatalError( final String prg_name, final Exception e ) {
+        System.err.println();
+        System.err.println( "[" + prg_name
+                + "] > unexpected error (Should not have occured! Please contact program author(s).)" );
+        e.printStackTrace( System.err );
+        System.err.println();
+        System.exit( -1 );
+    }
+
+    final public static void unexpectedFatalError( final String prg_name, final String message ) {
+        System.err.println();
+        System.err.println( "[" + prg_name
+                + "] > unexpected error. Should not have occured! Please contact program author(s)." );
+        System.err.println( message );
+        System.err.println();
+        System.exit( -1 );
+    }
+
+    final public static void unexpectedFatalError( final String prg_name, final String message, final Exception e ) {
+        System.err.println();
+        System.err.println( "[" + prg_name
+                + "] > unexpected error. Should not have occured! Please contact program author(s)." );
+        System.err.println( message );
+        e.printStackTrace( System.err );
+        System.err.println();
+        System.exit( -1 );
+    }
+
+    public final static String wordWrap( final String str, final int width ) {
+        final StringBuilder sb = new StringBuilder( str );
+        int start = 0;
+        int ls = -1;
+        int i = 0;
+        while ( i < sb.length() ) {
+            if ( sb.charAt( i ) == ' ' ) {
+                ls = i;
+            }
+            if ( sb.charAt( i ) == '\n' ) {
+                ls = -1;
+                start = i + 1;
+            }
+            if ( i > start + width - 1 ) {
+                if ( ls != -1 ) {
+                    sb.setCharAt( ls, '\n' );
+                    start = ls + 1;
+                    ls = -1;
+                }
+                else {
+                    sb.insert( i, '\n' );
+                    start = i + 1;
+                }
+            }
+            i++;
+        }
+        return sb.toString();
+    }
+
+    public static enum PhylogenyNodeField {
+        CLADE_NAME, TAXONOMY_CODE, TAXONOMY_SCIENTIFIC_NAME, TAXONOMY_COMMON_NAME, SEQUENCE_SYMBOL, SEQUENCE_NAME;
+    }
+
+    public static enum TAXONOMY_EXTRACTION {
+        NO, YES, PFAM_STYLE_ONLY;
+    }
+}
diff --git a/forester/java/src/org/forester/util/GeneralTable.java b/forester/java/src/org/forester/util/GeneralTable.java

new file mode 100644 (file)

index 0000000..7b0a655
--- /dev/null
+++ b/forester/java/src/org/forester/util/GeneralTable.java
@@ -0,0 +1,139 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org
+
+package org.forester.util;
+
+import java.text.NumberFormat;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.SortedSet;
+import java.util.TreeSet;
+
+public class GeneralTable<IDENTIFIER_TYPE, VALUE_TYPE> {
+
+    private Map<IDENTIFIER_TYPE, Map<IDENTIFIER_TYPE, VALUE_TYPE>> _rows;
+    private SortedSet<IDENTIFIER_TYPE>                             _row_identifiers;
+    private SortedSet<IDENTIFIER_TYPE>                             _column_identifiers;
+
+    public GeneralTable() {
+        init();
+    }
+
+    public SortedSet<IDENTIFIER_TYPE> getColumnIdentifiers() {
+        return _column_identifiers;
+    }
+
+    private Map<IDENTIFIER_TYPE, VALUE_TYPE> getRow( final IDENTIFIER_TYPE row ) {
+        return getRows().get( row );
+    }
+
+    public SortedSet<IDENTIFIER_TYPE> getRowIdentifiers() {
+        return _row_identifiers;
+    }
+
+    private Map<IDENTIFIER_TYPE, Map<IDENTIFIER_TYPE, VALUE_TYPE>> getRows() {
+        return _rows;
+    }
+
+    public VALUE_TYPE getValue( final IDENTIFIER_TYPE col, final IDENTIFIER_TYPE row ) throws IllegalArgumentException {
+        final Map<IDENTIFIER_TYPE, VALUE_TYPE> row_map = getRow( row );
+        if ( ( row_map == null ) || ( row_map.size() < 1 ) ) {
+            return null;
+        }
+        return row_map.get( col );
+    }
+
+    public String getValueAsString( final IDENTIFIER_TYPE col, final IDENTIFIER_TYPE row )
+            throws IllegalArgumentException {
+        final VALUE_TYPE value = getValue( col, row );
+        return ( value == null ? "" : getValue( col, row ).toString() );
+    }
+
+    private void init() {
+        _rows = new HashMap<IDENTIFIER_TYPE, Map<IDENTIFIER_TYPE, VALUE_TYPE>>();
+        _row_identifiers = new TreeSet<IDENTIFIER_TYPE>();
+        _column_identifiers = new TreeSet<IDENTIFIER_TYPE>();
+    }
+
+    public void setValue( final IDENTIFIER_TYPE col, final IDENTIFIER_TYPE row, final VALUE_TYPE value ) {
+        getColumnIdentifiers().add( col );
+        getRowIdentifiers().add( row );
+        Map<IDENTIFIER_TYPE, VALUE_TYPE> row_map = null;
+        if ( getRows().containsKey( row ) ) {
+            row_map = getRows().get( row );
+        }
+        else {
+            row_map = new HashMap<IDENTIFIER_TYPE, VALUE_TYPE>();
+            getRows().put( row, row_map );
+        }
+        row_map.put( col, value );
+    }
+
+    @Override
+    public String toString() {
+        final StringBuilder sb = new StringBuilder();
+        sb.append( "\t" );
+        for( final IDENTIFIER_TYPE col : getColumnIdentifiers() ) {
+            sb.append( col.toString() );
+            sb.append( "\t" );
+        }
+        sb.append( ForesterUtil.LINE_SEPARATOR );
+        for( final IDENTIFIER_TYPE row : getRowIdentifiers() ) {
+            sb.append( row.toString() );
+            sb.append( "\t" );
+            for( final IDENTIFIER_TYPE col : getColumnIdentifiers() ) {
+                sb.append( getValueAsString( col, row ) );
+                sb.append( "\t" );
+            }
+            sb.append( ForesterUtil.LINE_SEPARATOR );
+        }
+        return sb.toString();
+    }
+
+    public String toString( final NumberFormat number_format ) {
+        final StringBuilder sb = new StringBuilder();
+        sb.append( "\t" );
+        for( final IDENTIFIER_TYPE col : getColumnIdentifiers() ) {
+            sb.append( col.toString() );
+            sb.append( "\t" );
+        }
+        sb.append( ForesterUtil.LINE_SEPARATOR );
+        for( final IDENTIFIER_TYPE row : getRowIdentifiers() ) {
+            sb.append( row.toString() );
+            sb.append( "\t" );
+            for( final IDENTIFIER_TYPE col : getColumnIdentifiers() ) {
+                try {
+                    sb.append( number_format.format( getValue( col, row ) ) );
+                }
+                catch ( final IllegalArgumentException e ) {
+                    sb.append( getValueAsString( col, row ) );
+                }
+                sb.append( "\t" );
+            }
+            sb.append( ForesterUtil.LINE_SEPARATOR );
+        }
+        return sb.toString();
+    }
+}
\ No newline at end of file
diff --git a/forester/java/src/org/forester/util/IllegalFormatUseException.java b/forester/java/src/org/forester/util/IllegalFormatUseException.java

new file mode 100644 (file)

index 0000000..7b3704b
--- /dev/null
+++ b/forester/java/src/org/forester/util/IllegalFormatUseException.java
@@ -0,0 +1,42 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+// 
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.util;
+
+public class IllegalFormatUseException extends IllegalArgumentException {
+
+    /**
+     * 
+     */
+    private static final long serialVersionUID = -1126329548396073983L;
+
+    public IllegalFormatUseException() {
+        super();
+    }
+
+    public IllegalFormatUseException( final String message ) {
+        super( message );
+    }
+}
diff --git a/forester/java/src/org/forester/util/SystemCommandExecutor.java b/forester/java/src/org/forester/util/SystemCommandExecutor.java

new file mode 100644 (file)

index 0000000..5b32dc4
--- /dev/null
+++ b/forester/java/src/org/forester/util/SystemCommandExecutor.java
@@ -0,0 +1,154 @@
+// $Id:
+/**
+ * This class can be used to execute a system command from a Java application.
+ * See the documentation for the public methods of this class for more
+ * information.
+ * 
+ * Documentation for this class is available at this URL:
+ * 
+ * http://devdaily.com/java/java-processbuilder-process-system-exec
+ * 
+ * 
+ * Copyright 2010 alvin j. alexander, devdaily.com.
+ * 
+ * This program is free software: you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser Public License as published by the Free Software
+ * Foundation, either version 3 of the License, or (at your option) any later
+ * version.
+ * 
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU Lesser Public License for more details.
+ * 
+ * You should have received a copy of the GNU Lesser Public License along with
+ * this program. If not, see <http://www.gnu.org/licenses/>.
+ * 
+ * Please ee the following page for the LGPL license:
+ * http://www.gnu.org/licenses/lgpl.txt
+ * 
+ */
+
+package org.forester.util;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.List;
+
+public class SystemCommandExecutor {
+
+    private final List<String>    commandInformation;
+    private final String          adminPassword;
+    private ThreadedStreamHandler inputStreamHandler;
+    private ThreadedStreamHandler errorStreamHandler;
+
+    /**
+     * Pass in the system command you want to run as a List of Strings, as shown here:
+     * 
+     * List<String> commands = new ArrayList<String>();
+     * commands.add("/sbin/ping");
+     * commands.add("-c");
+     * commands.add("5");
+     * commands.add("www.google.com");
+     * SystemCommandExecutor commandExecutor = new SystemCommandExecutor(commands);
+     * commandExecutor.executeCommand();
+     * 
+     * Note: I've removed the other constructor that was here to support executing
+     *       the sudo command. I'll add that back in when I get the sudo command
+     *       working to the point where it won't hang when the given password is
+     *       wrong.
+     *
+     * @param commandInformation The command you want to run.
+     */
+    public SystemCommandExecutor( final List<String> commandInformation ) {
+        if ( ( commandInformation == null ) || commandInformation.isEmpty() ) {
+            throw new IllegalArgumentException( "The commandInformation is required." );
+        }
+        checkCmdFile( new File( commandInformation.get( 0 ) ) );
+        this.commandInformation = commandInformation;
+        adminPassword = null;
+    }
+
+    public static boolean isExecuteableFile( final File path_to_cmd_f ) {
+        if ( !path_to_cmd_f.exists() ) {
+            return false;
+        }
+        else if ( path_to_cmd_f.isDirectory() ) {
+            return false;
+        }
+        else if ( !path_to_cmd_f.canExecute() ) {
+            return false;
+        }
+        return true;
+    }
+
+    private void checkCmdFile( final File path_to_cmd_f ) {
+        if ( !path_to_cmd_f.exists() ) {
+            throw new IllegalArgumentException( "[" + path_to_cmd_f.getAbsolutePath() + "] does not exist" );
+        }
+        else if ( path_to_cmd_f.isDirectory() ) {
+            throw new IllegalArgumentException( "[" + path_to_cmd_f.getAbsolutePath() + "] is a directory" );
+        }
+        else if ( !path_to_cmd_f.canExecute() ) {
+            throw new IllegalArgumentException( "[" + path_to_cmd_f.getAbsolutePath() + "] is not executeable" );
+        }
+    }
+
+    public int executeCommand() throws IOException, InterruptedException {
+        int exitValue = -99;
+        try {
+            final ProcessBuilder pb = new ProcessBuilder( commandInformation );
+            final Process process = pb.start();
+            // you need this if you're going to write something to the command's input stream
+            // (such as when invoking the 'sudo' command, and it prompts you for a password).
+            final OutputStream stdOutput = process.getOutputStream();
+            // i'm currently doing these on a separate line here in case i need to set them to null
+            // to get the threads to stop.
+            // see http://java.sun.com/j2se/1.5.0/docs/guide/misc/threadPrimitiveDeprecation.html
+            final InputStream inputStream = process.getInputStream();
+            final InputStream errorStream = process.getErrorStream();
+            // these need to run as java threads to get the standard output and error from the command.
+            // the inputstream handler gets a reference to our stdOutput in case we need to write
+            // something to it, such as with the sudo command
+            inputStreamHandler = new ThreadedStreamHandler( inputStream, stdOutput, adminPassword );
+            errorStreamHandler = new ThreadedStreamHandler( errorStream );
+            // TODO the inputStreamHandler has a nasty side-effect of hanging if the given password is wrong; fix it
+            inputStreamHandler.start();
+            errorStreamHandler.start();
+            // TODO a better way to do this?
+            exitValue = process.waitFor();
+            // TODO a better way to do this?
+            inputStreamHandler.interrupt();
+            errorStreamHandler.interrupt();
+            inputStreamHandler.join();
+            errorStreamHandler.join();
+        }
+        catch ( final IOException e ) {
+            // TODO deal with this here, or just throw it?
+            throw e;
+        }
+        catch ( final InterruptedException e ) {
+            // generated by process.waitFor() call
+            // TODO deal with this here, or just throw it?
+            throw e;
+        }
+        finally {
+            return exitValue;
+        }
+    }
+
+    /**
+     * Get the standard error (stderr) from the command you just exec'd.
+     */
+    public StringBuilder getStandardErrorFromCommand() {
+        return errorStreamHandler.getOutputBuffer();
+    }
+
+    /**
+     * Get the standard output (stdout) from the command you just exec'd.
+     */
+    public StringBuilder getStandardOutputFromCommand() {
+        return inputStreamHandler.getOutputBuffer();
+    }
+}
diff --git a/forester/java/src/org/forester/util/ThreadedStreamHandler.java b/forester/java/src/org/forester/util/ThreadedStreamHandler.java

new file mode 100644 (file)

index 0000000..ed70e75
--- /dev/null
+++ b/forester/java/src/org/forester/util/ThreadedStreamHandler.java
@@ -0,0 +1,135 @@
+// $Id:
+/**
+ * This class is intended to be used with the SystemCommandExecutor class to let
+ * users execute system commands from Java applications.
+ * 
+ * This class is based on work that was shared in a JavaWorld article named
+ * "When System.exec() won't". That article is available at this url:
+ * 
+ * http://www.javaworld.com/javaworld/jw-12-2000/jw-1229-traps.html
+ * 
+ * Documentation for this class is available at this URL:
+ * 
+ * http://devdaily.com/java/java-processbuilder-process-system-exec
+ * 
+ * 
+ * Copyright 2010 alvin j. alexander, devdaily.com.
+ * 
+ * This program is free software: you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser Public License as published by the Free Software
+ * Foundation, either version 3 of the License, or (at your option) any later
+ * version.
+ * 
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU Lesser Public License for more details.
+ * 
+ * You should have received a copy of the GNU Lesser Public License along with
+ * this program. If not, see <http://www.gnu.org/licenses/>.
+ * 
+ * Please ee the following page for the LGPL license:
+ * http://www.gnu.org/licenses/lgpl.txt
+ * 
+ */
+
+package org.forester.util;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.OutputStream;
+import java.io.PrintWriter;
+
+class ThreadedStreamHandler extends Thread {
+
+    InputStream     inputStream;
+    String          adminPassword;
+    OutputStream    outputStream;
+    PrintWriter     printWriter;
+    StringBuilder   outputBuffer    = new StringBuilder( 65536 );
+    private boolean sudoIsRequested = false;
+
+    /**
+     * A simple constructor for when the sudo command is not necessary.
+     * This constructor will just run the command you provide, without
+     * running sudo before the command, and without expecting a password.
+     * 
+     * @param inputStream
+     * @param streamType
+     */
+    ThreadedStreamHandler( final InputStream inputStream ) {
+        this.inputStream = inputStream;
+    }
+
+    /**
+     * Use this constructor when you want to invoke the 'sudo' command.
+     * The outputStream must not be null. If it is, you'll regret it. :)
+     * 
+     * TODO this currently hangs if the admin password given for the sudo command is wrong.
+     * 
+     * @param inputStream
+     * @param streamType
+     * @param outputStream
+     * @param adminPassword
+     */
+    ThreadedStreamHandler( final InputStream inputStream, final OutputStream outputStream, final String adminPassword ) {
+        this.inputStream = inputStream;
+        this.outputStream = outputStream;
+        printWriter = new PrintWriter( outputStream );
+        this.adminPassword = adminPassword;
+        sudoIsRequested = true;
+    }
+
+    private void doSleep( final long millis ) {
+        try {
+            Thread.sleep( millis );
+        }
+        catch ( final InterruptedException e ) {
+            // ignore
+        }
+    }
+
+    public StringBuilder getOutputBuffer() {
+        return outputBuffer;
+    }
+
+    @Override
+    public void run() {
+        // on mac os x 10.5.x, when i run a 'sudo' command, i need to write
+        // the admin password out immediately; that's why this code is
+        // here.
+        if ( sudoIsRequested ) {
+            //doSleep(500);
+            printWriter.println( adminPassword );
+            printWriter.flush();
+        }
+        BufferedReader bufferedReader = null;
+        final String newline = ForesterUtil.LINE_SEPARATOR;
+        try {
+            bufferedReader = new BufferedReader( new InputStreamReader( inputStream ) );
+            String line = null;
+            while ( ( line = bufferedReader.readLine() ) != null ) {
+                // outputBuffer.append( line + "\n" ); // CMZ change
+                outputBuffer.append( line );
+                outputBuffer.append( newline );
+            }
+        }
+        catch ( final IOException ioe ) {
+            // TODO handle this better
+            ioe.printStackTrace();
+        }
+        catch ( final Throwable t ) {
+            // TODO handle this better
+            t.printStackTrace();
+        }
+        finally {
+            try {
+                bufferedReader.close();
+            }
+            catch ( final IOException e ) {
+                // ignore this one
+            }
+        }
+    }
+}
diff --git a/forester/java/src/org/forester/util/WindowsUtils.java b/forester/java/src/org/forester/util/WindowsUtils.java

new file mode 100644 (file)

index 0000000..b62906d
--- /dev/null
+++ b/forester/java/src/org/forester/util/WindowsUtils.java
@@ -0,0 +1,87 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// From: http://www.rgagnon.com/javadetails/java-0652.html
+//
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
+
+package org.forester.util;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.StringWriter;
+
+public class WindowsUtils {
+
+    private static final String REGQUERY_UTIL      = "reg query ";
+    private static final String REGSTR_TOKEN       = "REG_SZ";
+    private static final String DESKTOP_FOLDER_CMD = REGQUERY_UTIL
+                                                           + "\"HKCU\\Software\\Microsoft\\Windows\\CurrentVersion\\"
+                                                           + "Explorer\\Shell Folders\" /v DESKTOP";
+
+    private WindowsUtils() {
+    }
+
+    public static String getCurrentUserDesktopPath() {
+        try {
+            final Process process = Runtime.getRuntime().exec( DESKTOP_FOLDER_CMD );
+            final StreamReader reader = new StreamReader( process.getInputStream() );
+            reader.start();
+            process.waitFor();
+            reader.join();
+            final String result = reader.getResult();
+            final int p = result.indexOf( REGSTR_TOKEN );
+            if ( p == -1 ) {
+                return null;
+            }
+            return result.substring( p + REGSTR_TOKEN.length() ).trim();
+        }
+        catch ( final Exception e ) {
+            return null;
+        }
+    }
+
+    static class StreamReader extends Thread {
+
+        private final InputStream  is;
+        private final StringWriter sw;
+
+        StreamReader( final InputStream is ) {
+            this.is = is;
+            sw = new StringWriter();
+        }
+
+        String getResult() {
+            return sw.toString();
+        }
+
+        @Override
+        public void run() {
+            try {
+                int c;
+                while ( ( c = is.read() ) != -1 ) {
+                    sw.write( c );
+                }
+            }
+            catch ( final IOException e ) {
+                // Do nothing
+            }
+        }
+    }
+}
author	cmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
	Wed, 9 Feb 2011 01:09:48 +0000 (01:09 +0000)
committer	cmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
	Wed, 9 Feb 2011 01:09:48 +0000 (01:09 +0000)