X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Ftest%2FTest.java;h=0dd12a5987c29039c2980c0572e29ad2d8e395a2;hb=af5e25d369f3240ce41018957383044c12126d98;hp=e7aa418afe69e580803288c5183667ed8c98971b;hpb=b59302eba1039cb46f634ffe77cebc1b5c9bbcbf;p=jalview.git diff --git a/forester/java/src/org/forester/test/Test.java b/forester/java/src/org/forester/test/Test.java index e7aa418..0dd12a5 100644 --- a/forester/java/src/org/forester/test/Test.java +++ b/forester/java/src/org/forester/test/Test.java @@ -2,8 +2,8 @@ // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // -// Copyright (C) 2008-2009 Christian M. Zmasek -// Copyright (C) 2008-2009 Burnham Institute for Medical Research +// Copyright (C) 2014 Christian M. Zmasek +// Copyright (C) 2014 Sanford-Burnham Medical Research Institute // All rights reserved // // This library is free software; you can redistribute it and/or @@ -20,7 +20,6 @@ // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // -// Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.test; @@ -29,6 +28,8 @@ import java.io.ByteArrayInputStream; import java.io.File; import java.io.FileInputStream; import java.io.IOException; +import java.io.StringWriter; +import java.io.Writer; import java.net.URL; import java.util.ArrayList; import java.util.Date; @@ -40,7 +41,9 @@ import java.util.Set; import java.util.SortedSet; import org.forester.application.support_transfer; +import org.forester.archaeopteryx.AptxUtil; import org.forester.archaeopteryx.TreePanelUtil; +import org.forester.archaeopteryx.webservices.WebserviceUtil; import org.forester.development.DevelopmentTools; import org.forester.evoinference.TestPhylogenyReconstruction; import org.forester.evoinference.matrix.character.CharacterStateMatrix; @@ -50,7 +53,6 @@ import org.forester.io.parsers.FastaParser; import org.forester.io.parsers.GeneralMsaParser; import org.forester.io.parsers.HmmscanPerDomainTableParser; import org.forester.io.parsers.HmmscanPerDomainTableParser.INDIVIDUAL_SCORE_CUTOFF; -import org.forester.io.parsers.PhylogenyParser; import org.forester.io.parsers.nexus.NexusBinaryStatesMatrixParser; import org.forester.io.parsers.nexus.NexusCharactersParser; import org.forester.io.parsers.nexus.NexusPhylogeniesParser; @@ -62,8 +64,10 @@ import org.forester.io.parsers.util.ParserUtils; import org.forester.io.writers.PhylogenyWriter; import org.forester.io.writers.SequenceWriter; import org.forester.msa.BasicMsa; +import org.forester.msa.DeleteableMsa; import org.forester.msa.Mafft; import org.forester.msa.Msa; +import org.forester.msa.Msa.MSA_FORMAT; import org.forester.msa.MsaInferrer; import org.forester.msa.MsaMethods; import org.forester.pccx.TestPccx; @@ -102,7 +106,7 @@ import org.forester.sdi.SDI; import org.forester.sdi.SDIR; import org.forester.sdi.TestGSDI; import org.forester.sequence.BasicSequence; -import org.forester.sequence.Sequence; +import org.forester.sequence.MolecularSequence; import org.forester.species.BasicSpecies; import org.forester.species.Species; import org.forester.surfacing.TestSurfacing; @@ -129,301 +133,28 @@ import org.forester.ws.wabi.TxSearch.TAX_RANK; @SuppressWarnings( "unused") public final class Test { - private final static boolean PERFORM_DB_TESTS = false; - private final static double ZERO_DIFF = 1.0E-9; - private final static String PATH_TO_TEST_DATA = System.getProperty( "user.dir" ) - + ForesterUtil.getFileSeparator() + "test_data" - + ForesterUtil.getFileSeparator(); private final static String PATH_TO_RESOURCES = System.getProperty( "user.dir" ) + ForesterUtil.getFileSeparator() + "resources" + ForesterUtil.getFileSeparator(); - private final static boolean USE_LOCAL_PHYLOXML_SCHEMA = true; - private static final String PHYLOXML_REMOTE_XSD = ForesterConstants.PHYLO_XML_LOCATION + "/" + private final static String PATH_TO_TEST_DATA = System.getProperty( "user.dir" ) + + ForesterUtil.getFileSeparator() + "test_data" + + ForesterUtil.getFileSeparator(); + private final static boolean PERFORM_DB_TESTS = true; + private static final boolean PERFORM_WEB_TREE_ACCESS = true; + private static final String PHYLOXML_LOCAL_XSD = PATH_TO_RESOURCES + "phyloxml_schema/" + ForesterConstants.PHYLO_XML_VERSION + "/" + ForesterConstants.PHYLO_XML_XSD; - private static final String PHYLOXML_LOCAL_XSD = PATH_TO_RESOURCES + "phyloxml_schema/" + private static final String PHYLOXML_REMOTE_XSD = ForesterConstants.PHYLO_XML_LOCATION + "/" + ForesterConstants.PHYLO_XML_VERSION + "/" + ForesterConstants.PHYLO_XML_XSD; + private final static boolean USE_LOCAL_PHYLOXML_SCHEMA = true; + private final static double ZERO_DIFF = 1.0E-9; - public static boolean testOverlapRemoval() { - try { - final Domain d0 = new BasicDomain( "d0", ( short ) 2, ( short ) 5, ( short ) 1, ( short ) 1, 0.1, 1 ); - final Domain d1 = new BasicDomain( "d1", ( short ) 7, ( short ) 10, ( short ) 1, ( short ) 1, 0.1, 1 ); - final Domain d2 = new BasicDomain( "d2", ( short ) 0, ( short ) 20, ( short ) 1, ( short ) 1, 0.1, 1 ); - final Domain d3 = new BasicDomain( "d3", ( short ) 9, ( short ) 10, ( short ) 1, ( short ) 1, 0.1, 1 ); - final Domain d4 = new BasicDomain( "d4", ( short ) 7, ( short ) 8, ( short ) 1, ( short ) 1, 0.1, 1 ); - final List covered = new ArrayList(); - covered.add( true ); // 0 - covered.add( false ); // 1 - covered.add( true ); // 2 - covered.add( false ); // 3 - covered.add( true ); // 4 - covered.add( true ); // 5 - covered.add( false ); // 6 - covered.add( true ); // 7 - covered.add( true ); // 8 - if ( ForesterUtil.calculateOverlap( d0, covered ) != 3 ) { - return false; - } - if ( ForesterUtil.calculateOverlap( d1, covered ) != 2 ) { - return false; - } - if ( ForesterUtil.calculateOverlap( d2, covered ) != 6 ) { - return false; - } - if ( ForesterUtil.calculateOverlap( d3, covered ) != 0 ) { - return false; - } - if ( ForesterUtil.calculateOverlap( d4, covered ) != 2 ) { - return false; - } - final Domain a = new BasicDomain( "a", ( short ) 2, ( short ) 5, ( short ) 1, ( short ) 1, 1, -1 ); - final Domain b = new BasicDomain( "b", ( short ) 2, ( short ) 10, ( short ) 1, ( short ) 1, 0.1, -1 ); - final Protein ab = new BasicProtein( "ab", "varanus", 0 ); - ab.addProteinDomain( a ); - ab.addProteinDomain( b ); - final Protein ab_s0 = ForesterUtil.removeOverlappingDomains( 3, false, ab ); - if ( ab.getNumberOfProteinDomains() != 2 ) { - return false; - } - if ( ab_s0.getNumberOfProteinDomains() != 1 ) { - return false; - } - if ( !ab_s0.getProteinDomain( 0 ).getDomainId().equals( "b" ) ) { - return false; - } - final Protein ab_s1 = ForesterUtil.removeOverlappingDomains( 4, false, ab ); - if ( ab.getNumberOfProteinDomains() != 2 ) { - return false; - } - if ( ab_s1.getNumberOfProteinDomains() != 2 ) { - return false; - } - final Domain c = new BasicDomain( "c", ( short ) 20000, ( short ) 20500, ( short ) 1, ( short ) 1, 10, 1 ); - final Domain d = new BasicDomain( "d", - ( short ) 10000, - ( short ) 10500, - ( short ) 1, - ( short ) 1, - 0.0000001, - 1 ); - final Domain e = new BasicDomain( "e", ( short ) 5000, ( short ) 5500, ( short ) 1, ( short ) 1, 0.0001, 1 ); - final Protein cde = new BasicProtein( "cde", "varanus", 0 ); - cde.addProteinDomain( c ); - cde.addProteinDomain( d ); - cde.addProteinDomain( e ); - final Protein cde_s0 = ForesterUtil.removeOverlappingDomains( 0, false, cde ); - if ( cde.getNumberOfProteinDomains() != 3 ) { - return false; - } - if ( cde_s0.getNumberOfProteinDomains() != 3 ) { - return false; - } - final Domain f = new BasicDomain( "f", ( short ) 10, ( short ) 20, ( short ) 1, ( short ) 1, 10, 1 ); - final Domain g = new BasicDomain( "g", ( short ) 10, ( short ) 20, ( short ) 1, ( short ) 1, 0.01, 1 ); - final Domain h = new BasicDomain( "h", ( short ) 10, ( short ) 20, ( short ) 1, ( short ) 1, 0.0001, 1 ); - final Domain i = new BasicDomain( "i", ( short ) 10, ( short ) 20, ( short ) 1, ( short ) 1, 0.5, 1 ); - final Domain i2 = new BasicDomain( "i", ( short ) 5, ( short ) 30, ( short ) 1, ( short ) 1, 0.5, 10 ); - final Protein fghi = new BasicProtein( "fghi", "varanus", 0 ); - fghi.addProteinDomain( f ); - fghi.addProteinDomain( g ); - fghi.addProteinDomain( h ); - fghi.addProteinDomain( i ); - fghi.addProteinDomain( i ); - fghi.addProteinDomain( i ); - fghi.addProteinDomain( i2 ); - final Protein fghi_s0 = ForesterUtil.removeOverlappingDomains( 10, false, fghi ); - if ( fghi.getNumberOfProteinDomains() != 7 ) { - return false; - } - if ( fghi_s0.getNumberOfProteinDomains() != 1 ) { - return false; - } - if ( !fghi_s0.getProteinDomain( 0 ).getDomainId().equals( "h" ) ) { - return false; - } - final Protein fghi_s1 = ForesterUtil.removeOverlappingDomains( 11, false, fghi ); - if ( fghi.getNumberOfProteinDomains() != 7 ) { - return false; - } - if ( fghi_s1.getNumberOfProteinDomains() != 7 ) { - return false; - } - final Domain j = new BasicDomain( "j", ( short ) 10, ( short ) 20, ( short ) 1, ( short ) 1, 10, 1 ); - final Domain k = new BasicDomain( "k", ( short ) 10, ( short ) 20, ( short ) 1, ( short ) 1, 0.01, 1 ); - final Domain l = new BasicDomain( "l", ( short ) 10, ( short ) 20, ( short ) 1, ( short ) 1, 0.0001, 1 ); - final Domain m = new BasicDomain( "m", ( short ) 10, ( short ) 20, ( short ) 1, ( short ) 4, 0.5, 1 ); - final Domain m0 = new BasicDomain( "m", ( short ) 10, ( short ) 20, ( short ) 2, ( short ) 4, 0.5, 1 ); - final Domain m1 = new BasicDomain( "m", ( short ) 10, ( short ) 20, ( short ) 3, ( short ) 4, 0.5, 1 ); - final Domain m2 = new BasicDomain( "m", ( short ) 5, ( short ) 30, ( short ) 4, ( short ) 4, 0.5, 10 ); - final Protein jklm = new BasicProtein( "jklm", "varanus", 0 ); - jklm.addProteinDomain( j ); - jklm.addProteinDomain( k ); - jklm.addProteinDomain( l ); - jklm.addProteinDomain( m ); - jklm.addProteinDomain( m0 ); - jklm.addProteinDomain( m1 ); - jklm.addProteinDomain( m2 ); - final Protein jklm_s0 = ForesterUtil.removeOverlappingDomains( 10, false, jklm ); - if ( jklm.getNumberOfProteinDomains() != 7 ) { - return false; - } - if ( jklm_s0.getNumberOfProteinDomains() != 1 ) { - return false; - } - if ( !jklm_s0.getProteinDomain( 0 ).getDomainId().equals( "l" ) ) { - return false; - } - final Protein jklm_s1 = ForesterUtil.removeOverlappingDomains( 11, false, jklm ); - if ( jklm.getNumberOfProteinDomains() != 7 ) { - return false; - } - if ( jklm_s1.getNumberOfProteinDomains() != 7 ) { - return false; - } - final Domain only = new BasicDomain( "only", ( short ) 5, ( short ) 30, ( short ) 4, ( short ) 4, 0.5, 10 ); - final Protein od = new BasicProtein( "od", "varanus", 0 ); - od.addProteinDomain( only ); - final Protein od_s0 = ForesterUtil.removeOverlappingDomains( 0, false, od ); - if ( od.getNumberOfProteinDomains() != 1 ) { - return false; - } - if ( od_s0.getNumberOfProteinDomains() != 1 ) { - return false; - } - } - catch ( final Exception e ) { - e.printStackTrace( System.out ); - return false; - } - return true; - } - - public static boolean testEngulfingOverlapRemoval() { - try { - final Domain d0 = new BasicDomain( "d0", 0, 8, ( short ) 1, ( short ) 1, 0.1, 1 ); - final Domain d1 = new BasicDomain( "d1", 0, 1, ( short ) 1, ( short ) 1, 0.1, 1 ); - final Domain d2 = new BasicDomain( "d2", 0, 2, ( short ) 1, ( short ) 1, 0.1, 1 ); - final Domain d3 = new BasicDomain( "d3", 7, 8, ( short ) 1, ( short ) 1, 0.1, 1 ); - final Domain d4 = new BasicDomain( "d4", 7, 9, ( short ) 1, ( short ) 1, 0.1, 1 ); - final Domain d5 = new BasicDomain( "d4", 0, 9, ( short ) 1, ( short ) 1, 0.1, 1 ); - final Domain d6 = new BasicDomain( "d4", 4, 5, ( short ) 1, ( short ) 1, 0.1, 1 ); - final List covered = new ArrayList(); - covered.add( true ); // 0 - covered.add( false ); // 1 - covered.add( true ); // 2 - covered.add( false ); // 3 - covered.add( true ); // 4 - covered.add( true ); // 5 - covered.add( false ); // 6 - covered.add( true ); // 7 - covered.add( true ); // 8 - if ( ForesterUtil.isEngulfed( d0, covered ) ) { - return false; - } - if ( ForesterUtil.isEngulfed( d1, covered ) ) { - return false; - } - if ( ForesterUtil.isEngulfed( d2, covered ) ) { - return false; - } - if ( !ForesterUtil.isEngulfed( d3, covered ) ) { - return false; - } - if ( ForesterUtil.isEngulfed( d4, covered ) ) { - return false; - } - if ( ForesterUtil.isEngulfed( d5, covered ) ) { - return false; - } - if ( !ForesterUtil.isEngulfed( d6, covered ) ) { - return false; - } - final Domain a = new BasicDomain( "a", 0, 10, ( short ) 1, ( short ) 1, 0.1, 1 ); - final Domain b = new BasicDomain( "b", 8, 20, ( short ) 1, ( short ) 1, 0.2, 1 ); - final Domain c = new BasicDomain( "c", 15, 16, ( short ) 1, ( short ) 1, 0.3, 1 ); - final Protein abc = new BasicProtein( "abc", "nemve", 0 ); - abc.addProteinDomain( a ); - abc.addProteinDomain( b ); - abc.addProteinDomain( c ); - final Protein abc_r1 = ForesterUtil.removeOverlappingDomains( 3, false, abc ); - final Protein abc_r2 = ForesterUtil.removeOverlappingDomains( 3, true, abc ); - if ( abc.getNumberOfProteinDomains() != 3 ) { - return false; - } - if ( abc_r1.getNumberOfProteinDomains() != 3 ) { - return false; - } - if ( abc_r2.getNumberOfProteinDomains() != 2 ) { - return false; - } - if ( !abc_r2.getProteinDomain( 0 ).getDomainId().equals( "a" ) ) { - return false; - } - if ( !abc_r2.getProteinDomain( 1 ).getDomainId().equals( "b" ) ) { - return false; - } - final Domain d = new BasicDomain( "d", 0, 10, ( short ) 1, ( short ) 1, 0.1, 1 ); - final Domain e = new BasicDomain( "e", 8, 20, ( short ) 1, ( short ) 1, 0.3, 1 ); - final Domain f = new BasicDomain( "f", 15, 16, ( short ) 1, ( short ) 1, 0.2, 1 ); - final Protein def = new BasicProtein( "def", "nemve", 0 ); - def.addProteinDomain( d ); - def.addProteinDomain( e ); - def.addProteinDomain( f ); - final Protein def_r1 = ForesterUtil.removeOverlappingDomains( 5, false, def ); - final Protein def_r2 = ForesterUtil.removeOverlappingDomains( 5, true, def ); - if ( def.getNumberOfProteinDomains() != 3 ) { - return false; - } - if ( def_r1.getNumberOfProteinDomains() != 3 ) { - return false; - } - if ( def_r2.getNumberOfProteinDomains() != 3 ) { - return false; - } - if ( !def_r2.getProteinDomain( 0 ).getDomainId().equals( "d" ) ) { - return false; - } - if ( !def_r2.getProteinDomain( 1 ).getDomainId().equals( "f" ) ) { - return false; - } - if ( !def_r2.getProteinDomain( 2 ).getDomainId().equals( "e" ) ) { - return false; - } - } - catch ( final Exception e ) { - e.printStackTrace( System.out ); - return false; - } - return true; - } - - public static boolean isEqual( final double a, final double b ) { - return ( ( Math.abs( a - b ) ) < Test.ZERO_DIFF ); + public static boolean isEqual( final double a, final double b ) { + return ( ( Math.abs( a - b ) ) < Test.ZERO_DIFF ); } public static void main( final String[] args ) { - try { - String s = "https://sites.google.com/site/cmzmasek/home/software/archaeopteryx/examples/simple/simple_1.nh"; - final URL u = new URL( s ); - final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); - final PhylogenyParser parser = ParserUtils.createParserDependingOnUrlContents( u, true ); - final Phylogeny[] phys = factory.create( u.openStream(), parser ); - System.out.println( "results 1:" ); - for( final Phylogeny phy : phys ) { - System.out.println( phy.toString() ); - } - System.out.println( "" ); - final Phylogeny[] phys3 = factory.create( "((a,b),c)", parser ); - System.out.println( "results 3:" ); - for( final Phylogeny phy : phys3 ) { - System.out.println( phy.toString() ); - } - } - catch ( Exception e ) { - e.printStackTrace(); - } - System.exit( 0 ); System.out.println( "[Java version: " + ForesterUtil.JAVA_VERSION + " " + ForesterUtil.JAVA_VENDOR + "]" ); System.out.println( "[OS: " + ForesterUtil.OS_NAME + " " + ForesterUtil.OS_ARCH + " " + ForesterUtil.OS_VERSION + "]" ); @@ -450,6 +181,15 @@ public final class Test { System.exit( -1 ); } final long start_time = new Date().getTime(); + System.out.print( "MSA entropy: " ); + if ( Test.testMsaEntropy() ) { + System.out.println( "OK." ); + succeeded++; + } + else { + System.out.println( "failed." ); + failed++; + } System.out.print( "Basic node methods: " ); if ( Test.testBasicNodeMethods() ) { System.out.println( "OK." ); @@ -531,31 +271,6 @@ public final class Test { System.out.println( "failed." ); failed++; } - if ( PERFORM_DB_TESTS ) { - System.out.print( "Ebi Entry Retrieval: " ); - if ( Test.testEbiEntryRetrieval() ) { - System.out.println( "OK." ); - succeeded++; - } - else { - System.out.println( "failed." ); - failed++; - } - } - // System.exit( 0 ); - if ( PERFORM_DB_TESTS ) { - System.out.print( "Sequence DB tools 2: " ); - if ( testSequenceDbWsTools2() ) { - System.out.println( "OK." ); - succeeded++; - } - else { - System.out.println( "failed." ); - failed++; - System.exit( -1 ); - } - } - // System.exit( 0 ); System.out.print( "Hmmscan output parser: " ); if ( testHmmscanOutputParser() ) { System.out.println( "OK." ); @@ -565,7 +280,6 @@ public final class Test { System.out.println( "failed." ); failed++; } - // System.out.print( "Overlap removal: " ); if ( !org.forester.test.Test.testOverlapRemoval() ) { System.out.println( "failed." ); @@ -584,7 +298,15 @@ public final class Test { succeeded++; } System.out.println( "OK." ); - // + System.out.print( "Taxonomy data extraction: " ); + if ( Test.testExtractTaxonomyDataFromNodeName() ) { + System.out.println( "OK." ); + succeeded++; + } + else { + System.out.println( "failed." ); + failed++; + } System.out.print( "Taxonomy code extraction: " ); if ( Test.testExtractTaxonomyCodeFromNodeName() ) { System.out.println( "OK." ); @@ -1097,464 +819,1393 @@ public final class Test { System.out.println( "OK." ); succeeded++; } - else { - System.out.println( "failed." ); - failed++; + else { + System.out.println( "failed." ); + failed++; + } + System.out.print( "General table: " ); + if ( Test.testGeneralTable() ) { + System.out.println( "OK." ); + succeeded++; + } + else { + System.out.println( "failed." ); + failed++; + } + System.out.print( "Amino acid sequence: " ); + if ( Test.testAminoAcidSequence() ) { + System.out.println( "OK." ); + succeeded++; + } + else { + System.out.println( "failed." ); + failed++; + } + System.out.print( "General MSA parser: " ); + if ( Test.testGeneralMsaParser() ) { + System.out.println( "OK." ); + succeeded++; + } + else { + System.out.println( "failed." ); + failed++; + } + System.out.print( "Fasta parser for msa: " ); + if ( Test.testFastaParser() ) { + System.out.println( "OK." ); + succeeded++; + } + else { + System.out.println( "failed." ); + failed++; + } + System.out.print( "Creation of balanced phylogeny: " ); + if ( Test.testCreateBalancedPhylogeny() ) { + System.out.println( "OK." ); + succeeded++; + } + else { + System.out.println( "failed." ); + failed++; + } + System.out.print( "Genbank accessor parsing: " ); + if ( Test.testGenbankAccessorParsing() ) { + System.out.println( "OK." ); + succeeded++; + } + else { + System.out.println( "failed." ); + failed++; + } + String path = ""; + final String os = ForesterUtil.OS_NAME.toLowerCase(); + if ( ( os.indexOf( "mac" ) >= 0 ) && ( os.indexOf( "os" ) > 0 ) ) { + path = "/usr/local/bin/mafft"; + } + else if ( os.indexOf( "win" ) >= 0 ) { + path = "C:\\Program Files\\mafft-win\\mafft.bat"; + } + else { + path = "mafft"; + if ( !MsaInferrer.isInstalled( path ) ) { + path = "/usr/bin/mafft"; + } + if ( !MsaInferrer.isInstalled( path ) ) { + path = "/usr/local/bin/mafft"; + } + } + if ( MsaInferrer.isInstalled( path ) ) { + System.out.print( "MAFFT (external program): " ); + if ( Test.testMafft( path ) ) { + System.out.println( "OK." ); + succeeded++; + } + else { + System.out.println( "failed [will not count towards failed tests]" ); + } + } + System.out.print( "Next nodes with collapsed: " ); + if ( Test.testNextNodeWithCollapsing() ) { + System.out.println( "OK." ); + succeeded++; + } + else { + System.out.println( "failed." ); + failed++; + } + System.out.print( "Simple MSA quality: " ); + if ( Test.testMsaQualityMethod() ) { + System.out.println( "OK." ); + succeeded++; + } + else { + System.out.println( "failed." ); + failed++; + } + System.out.print( "Deleteable MSA: " ); + if ( Test.testDeleteableMsa() ) { + System.out.println( "OK." ); + succeeded++; + } + else { + System.out.println( "failed." ); + failed++; + } + if ( PERFORM_DB_TESTS ) { + System.out.print( "Uniprot Entry Retrieval: " ); + if ( Test.testUniprotEntryRetrieval() ) { + System.out.println( "OK." ); + succeeded++; + } + else { + System.out.println( "failed." ); + failed++; + } + System.out.print( "Ebi Entry Retrieval: " ); + if ( Test.testEbiEntryRetrieval() ) { + System.out.println( "OK." ); + succeeded++; + } + else { + System.out.println( "failed." ); + failed++; + } + System.out.print( "Sequence DB tools 2: " ); + if ( testSequenceDbWsTools2() ) { + System.out.println( "OK." ); + succeeded++; + } + else { + System.out.println( "failed." ); + failed++; + System.exit( -1 ); + } + System.out.print( "Uniprot Taxonomy Search: " ); + if ( Test.testUniprotTaxonomySearch() ) { + System.out.println( "OK." ); + succeeded++; + } + else { + System.out.println( "failed." ); + failed++; + } + } + if ( PERFORM_WEB_TREE_ACCESS ) { + System.out.print( "NHX parsing from URL: " ); + if ( Test.testNHXparsingFromURL() ) { + System.out.println( "OK." ); + succeeded++; + } + else { + System.out.println( "failed." ); + failed++; + } + System.out.print( "NHX parsing from URL 2: " ); + if ( Test.testNHXparsingFromURL2() ) { + System.out.println( "OK." ); + succeeded++; + } + else { + System.out.println( "failed." ); + failed++; + } + System.out.print( "phyloXML parsing from URL: " ); + if ( Test.testPhyloXMLparsingFromURL() ) { + System.out.println( "OK." ); + succeeded++; + } + else { + System.out.println( "failed." ); + failed++; + } + System.out.print( "TreeBase acccess: " ); + if ( Test.testTreeBaseReading() ) { + System.out.println( "OK." ); + succeeded++; + } + else { + System.out.println( "failed." ); + failed++; + } + // + System.out.print( "ToL access: " ); + if ( Test.testToLReading() ) { + System.out.println( "OK." ); + succeeded++; + } + else { + System.out.println( "failed." ); + failed++; + } + // + System.out.print( "TreeFam access: " ); + if ( Test.testTreeFamReading() ) { + System.out.println( "OK." ); + succeeded++; + } + else { + System.out.println( "failed." ); + failed++; + } + // + // + System.out.print( "Pfam tree access: " ); + if ( Test.testPfamTreeReading() ) { + System.out.println( "OK." ); + succeeded++; + } + else { + System.out.println( "failed." ); + failed++; + } + } + System.out.println(); + final Runtime rt = java.lang.Runtime.getRuntime(); + final long free_memory = rt.freeMemory() / 1000000; + final long total_memory = rt.totalMemory() / 1000000; + System.out.println( "Running time : " + ( new Date().getTime() - start_time ) + "ms " + "(free memory: " + + free_memory + "MB, total memory: " + total_memory + "MB)" ); + System.out.println(); + System.out.println( "Successful tests: " + succeeded ); + System.out.println( "Failed tests: " + failed ); + System.out.println(); + if ( failed < 1 ) { + System.out.println( "OK." ); + } + else { + System.out.println( "Not OK." ); + } + } + + public static boolean testEngulfingOverlapRemoval() { + try { + final Domain d0 = new BasicDomain( "d0", 0, 8, ( short ) 1, ( short ) 1, 0.1, 1 ); + final Domain d1 = new BasicDomain( "d1", 0, 1, ( short ) 1, ( short ) 1, 0.1, 1 ); + final Domain d2 = new BasicDomain( "d2", 0, 2, ( short ) 1, ( short ) 1, 0.1, 1 ); + final Domain d3 = new BasicDomain( "d3", 7, 8, ( short ) 1, ( short ) 1, 0.1, 1 ); + final Domain d4 = new BasicDomain( "d4", 7, 9, ( short ) 1, ( short ) 1, 0.1, 1 ); + final Domain d5 = new BasicDomain( "d4", 0, 9, ( short ) 1, ( short ) 1, 0.1, 1 ); + final Domain d6 = new BasicDomain( "d4", 4, 5, ( short ) 1, ( short ) 1, 0.1, 1 ); + final List covered = new ArrayList(); + covered.add( true ); // 0 + covered.add( false ); // 1 + covered.add( true ); // 2 + covered.add( false ); // 3 + covered.add( true ); // 4 + covered.add( true ); // 5 + covered.add( false ); // 6 + covered.add( true ); // 7 + covered.add( true ); // 8 + if ( ForesterUtil.isEngulfed( d0, covered ) ) { + return false; + } + if ( ForesterUtil.isEngulfed( d1, covered ) ) { + return false; + } + if ( ForesterUtil.isEngulfed( d2, covered ) ) { + return false; + } + if ( !ForesterUtil.isEngulfed( d3, covered ) ) { + return false; + } + if ( ForesterUtil.isEngulfed( d4, covered ) ) { + return false; + } + if ( ForesterUtil.isEngulfed( d5, covered ) ) { + return false; + } + if ( !ForesterUtil.isEngulfed( d6, covered ) ) { + return false; + } + final Domain a = new BasicDomain( "a", 0, 10, ( short ) 1, ( short ) 1, 0.1, 1 ); + final Domain b = new BasicDomain( "b", 8, 20, ( short ) 1, ( short ) 1, 0.2, 1 ); + final Domain c = new BasicDomain( "c", 15, 16, ( short ) 1, ( short ) 1, 0.3, 1 ); + final Protein abc = new BasicProtein( "abc", "nemve", 0 ); + abc.addProteinDomain( a ); + abc.addProteinDomain( b ); + abc.addProteinDomain( c ); + final Protein abc_r1 = ForesterUtil.removeOverlappingDomains( 3, false, abc ); + final Protein abc_r2 = ForesterUtil.removeOverlappingDomains( 3, true, abc ); + if ( abc.getNumberOfProteinDomains() != 3 ) { + return false; + } + if ( abc_r1.getNumberOfProteinDomains() != 3 ) { + return false; + } + if ( abc_r2.getNumberOfProteinDomains() != 2 ) { + return false; + } + if ( !abc_r2.getProteinDomain( 0 ).getDomainId().equals( "a" ) ) { + return false; + } + if ( !abc_r2.getProteinDomain( 1 ).getDomainId().equals( "b" ) ) { + return false; + } + final Domain d = new BasicDomain( "d", 0, 10, ( short ) 1, ( short ) 1, 0.1, 1 ); + final Domain e = new BasicDomain( "e", 8, 20, ( short ) 1, ( short ) 1, 0.3, 1 ); + final Domain f = new BasicDomain( "f", 15, 16, ( short ) 1, ( short ) 1, 0.2, 1 ); + final Protein def = new BasicProtein( "def", "nemve", 0 ); + def.addProteinDomain( d ); + def.addProteinDomain( e ); + def.addProteinDomain( f ); + final Protein def_r1 = ForesterUtil.removeOverlappingDomains( 5, false, def ); + final Protein def_r2 = ForesterUtil.removeOverlappingDomains( 5, true, def ); + if ( def.getNumberOfProteinDomains() != 3 ) { + return false; + } + if ( def_r1.getNumberOfProteinDomains() != 3 ) { + return false; + } + if ( def_r2.getNumberOfProteinDomains() != 3 ) { + return false; + } + if ( !def_r2.getProteinDomain( 0 ).getDomainId().equals( "d" ) ) { + return false; + } + if ( !def_r2.getProteinDomain( 1 ).getDomainId().equals( "f" ) ) { + return false; + } + if ( !def_r2.getProteinDomain( 2 ).getDomainId().equals( "e" ) ) { + return false; + } + } + catch ( final Exception e ) { + e.printStackTrace( System.out ); + return false; + } + return true; + } + + public static final boolean testNHXparsingFromURL2() { + try { + final String s = "https://sites.google.com/site/cmzmasek/home/software/archaeopteryx/examples/simple/simple_1.nh"; + final Phylogeny phys[] = AptxUtil.readPhylogeniesFromUrl( new URL( s ), + false, + false, + false, + TAXONOMY_EXTRACTION.NO, + false ); + if ( ( phys == null ) || ( phys.length != 5 ) ) { + return false; + } + if ( !phys[ 0 ].toNewHampshire().equals( "((((A,B),C),D),(E,F));" ) ) { + System.out.println( phys[ 0 ].toNewHampshire() ); + return false; + } + if ( !phys[ 1 ].toNewHampshire().equals( "((1,2,3),(4,5,6),(7,8,9));" ) ) { + System.out.println( phys[ 1 ].toNewHampshire() ); + return false; + } + final Phylogeny phys2[] = AptxUtil.readPhylogeniesFromUrl( new URL( s ), + false, + false, + false, + TAXONOMY_EXTRACTION.NO, + false ); + if ( ( phys2 == null ) || ( phys2.length != 5 ) ) { + return false; + } + if ( !phys2[ 0 ].toNewHampshire().equals( "((((A,B),C),D),(E,F));" ) ) { + System.out.println( phys2[ 0 ].toNewHampshire() ); + return false; + } + if ( !phys2[ 1 ].toNewHampshire().equals( "((1,2,3),(4,5,6),(7,8,9));" ) ) { + System.out.println( phys2[ 1 ].toNewHampshire() ); + return false; + } + final Phylogeny phys3[] = AptxUtil.readPhylogeniesFromUrl( new URL( "http://swisstree.vital-it.ch:80/" + + "SwissTree/ST001/consensus_tree.nhx" ), false, false, false, TAXONOMY_EXTRACTION.NO, false ); + if ( ( phys3 == null ) || ( phys3.length != 1 ) ) { + return false; + } + if ( !phys3[ 0 ] + .toNewHampshire() + .equals( "((((POP23a_CIOIN_ENSCING00000016202,POP23b_CIOIN_ENSCING00000016169),POP23_CIOSA_ENSCSAVG00000000248),((POP23a_BRAFL_C3ZMF1,POP23b_BRAFL_121417),(((POP3_ORYLA_ENSORLG00000019669,POP3_GASAC_ENSGACG00000014023,POP3_DANRE_Q6JWW1),(POP3_XENTR_B1H1F6,(POP3_CHICK_Q9DG25,(POP3_ORNAN_ENSOANG00000004179,POP3_MONDO_ENSMODG00000018033,((POP3_MOUSE_Q9ES81,POP3_RAT_Q3BCU3),POP3_RABIT_ENSOCUG00000025973,POP3_MACMU_ENSMMUG00000014473,POP3_HUMAN_Q9HBV1))))),(((POP2_GASAC_ENSGACG00000001420,POP2_ORYLA_ENSORLG00000008627,POP2_TAKRU_ENSTRUG00000015933),POP2_DANRE_ENSDARG00000069922),POP2_XENTR_ENSXETG00000018064,(((POP2_TAEGU_ENSTGUG00000013383,POP2_CHICK_Q6T9Z5),POP2_ANOCA_ENSACAG00000003557),((POP2_MACEU_ENSMEUG00000015825,POP2_MONDO_ENSMODG00000018205),((POP2_RABIT_ENSOCUG00000009515,(POP2_RAT_Q6P722,POP2_MOUSE_Q9ES82)),(POP2_MACMU_ENSMMUG00000000905,POP2_HUMAN_Q9HBU9)))))))),((POP1_CIOSA_ENSCSAVG00000000247,POP1_CIOIN_ENSCING00000000496),((POP1_DANRE_Q5PQZ7,(POP1_ORYLA_ENSORLG00000019663,POP1_GASAC_ENSGACG00000014015,POP1_TAKRU_ENSORLG00000019663)),(POP1_XENTR_B1H1G2,(POP1_ANOCA_ENSACAG00000003910,(POP1_TAEGU_ENSTGUG00000012218,POP1_CHICK_Q9DG23)),POP1_ORNAN_ENSOANG00000004180,POP1_MONDO_ENSMODG00000018034,(POP1_RABIT_ENSOCUG00000016944,(POP1_RAT_Q3BCU4,POP1_MOUSE_Q9ES83),(POP1_HUMAN_Q8NE79,POP1_MACMU_ENSMMUG00000014471))))));" ) ) { + System.out.println( phys3[ 0 ].toNewHampshire() ); + return false; + } + final Phylogeny phys4[] = AptxUtil.readPhylogeniesFromUrl( new URL( "http://swisstree.vital-it.ch:80/" + + "SwissTree/ST001/consensus_tree.nhx" ), false, false, false, TAXONOMY_EXTRACTION.NO, false ); + if ( ( phys4 == null ) || ( phys4.length != 1 ) ) { + return false; + } + if ( !phys4[ 0 ] + .toNewHampshire() + .equals( "((((POP23a_CIOIN_ENSCING00000016202,POP23b_CIOIN_ENSCING00000016169),POP23_CIOSA_ENSCSAVG00000000248),((POP23a_BRAFL_C3ZMF1,POP23b_BRAFL_121417),(((POP3_ORYLA_ENSORLG00000019669,POP3_GASAC_ENSGACG00000014023,POP3_DANRE_Q6JWW1),(POP3_XENTR_B1H1F6,(POP3_CHICK_Q9DG25,(POP3_ORNAN_ENSOANG00000004179,POP3_MONDO_ENSMODG00000018033,((POP3_MOUSE_Q9ES81,POP3_RAT_Q3BCU3),POP3_RABIT_ENSOCUG00000025973,POP3_MACMU_ENSMMUG00000014473,POP3_HUMAN_Q9HBV1))))),(((POP2_GASAC_ENSGACG00000001420,POP2_ORYLA_ENSORLG00000008627,POP2_TAKRU_ENSTRUG00000015933),POP2_DANRE_ENSDARG00000069922),POP2_XENTR_ENSXETG00000018064,(((POP2_TAEGU_ENSTGUG00000013383,POP2_CHICK_Q6T9Z5),POP2_ANOCA_ENSACAG00000003557),((POP2_MACEU_ENSMEUG00000015825,POP2_MONDO_ENSMODG00000018205),((POP2_RABIT_ENSOCUG00000009515,(POP2_RAT_Q6P722,POP2_MOUSE_Q9ES82)),(POP2_MACMU_ENSMMUG00000000905,POP2_HUMAN_Q9HBU9)))))))),((POP1_CIOSA_ENSCSAVG00000000247,POP1_CIOIN_ENSCING00000000496),((POP1_DANRE_Q5PQZ7,(POP1_ORYLA_ENSORLG00000019663,POP1_GASAC_ENSGACG00000014015,POP1_TAKRU_ENSORLG00000019663)),(POP1_XENTR_B1H1G2,(POP1_ANOCA_ENSACAG00000003910,(POP1_TAEGU_ENSTGUG00000012218,POP1_CHICK_Q9DG23)),POP1_ORNAN_ENSOANG00000004180,POP1_MONDO_ENSMODG00000018034,(POP1_RABIT_ENSOCUG00000016944,(POP1_RAT_Q3BCU4,POP1_MOUSE_Q9ES83),(POP1_HUMAN_Q8NE79,POP1_MACMU_ENSMMUG00000014471))))));" ) ) { + System.out.println( phys4[ 0 ].toNewHampshire() ); + return false; + } + } + catch ( final Exception e ) { + e.printStackTrace(); + } + return true; + } + + public static final boolean testNHXparsingFromURL() { + try { + final String s = "https://sites.google.com/site/cmzmasek/home/software/archaeopteryx/examples/simple/simple_1.nh"; + final URL u = new URL( s ); + final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); + final Phylogeny[] phys = factory.create( u, new NHXParser() ); + if ( ( phys == null ) || ( phys.length != 5 ) ) { + return false; + } + if ( !phys[ 0 ].toNewHampshire().equals( "((((A,B),C),D),(E,F));" ) ) { + System.out.println( phys[ 0 ].toNewHampshire() ); + return false; + } + if ( !phys[ 1 ].toNewHampshire().equals( "((1,2,3),(4,5,6),(7,8,9));" ) ) { + System.out.println( phys[ 1 ].toNewHampshire() ); + return false; + } + final Phylogeny[] phys2 = factory.create( u.openStream(), new NHXParser() ); + if ( ( phys2 == null ) || ( phys2.length != 5 ) ) { + return false; + } + if ( !phys2[ 0 ].toNewHampshire().equals( "((((A,B),C),D),(E,F));" ) ) { + System.out.println( phys2[ 0 ].toNewHampshire() ); + return false; + } + final PhylogenyFactory factory2 = ParserBasedPhylogenyFactory.getInstance(); + final NHXParser p = new NHXParser(); + final URL u2 = new URL( s ); + p.setSource( u2 ); + if ( !p.hasNext() ) { + return false; + } + if ( !p.next().toNewHampshire().equals( "((((A,B),C),D),(E,F));" ) ) { + return false; + } + if ( !p.hasNext() ) { + return false; + } + p.reset(); + if ( !p.hasNext() ) { + return false; + } + if ( !p.next().toNewHampshire().equals( "((((A,B),C),D),(E,F));" ) ) { + return false; + } + if ( !p.next().toNewHampshire().equals( "((1,2,3),(4,5,6),(7,8,9));" ) ) { + return false; + } + p.reset(); + if ( !p.hasNext() ) { + return false; + } + if ( !p.next().toNewHampshire().equals( "((((A,B),C),D),(E,F));" ) ) { + return false; + } + if ( !p.next().toNewHampshire().equals( "((1,2,3),(4,5,6),(7,8,9));" ) ) { + return false; + } + } + catch ( final Exception e ) { + e.printStackTrace(); + } + return true; + } + + public static boolean testOverlapRemoval() { + try { + final Domain d0 = new BasicDomain( "d0", ( short ) 2, ( short ) 5, ( short ) 1, ( short ) 1, 0.1, 1 ); + final Domain d1 = new BasicDomain( "d1", ( short ) 7, ( short ) 10, ( short ) 1, ( short ) 1, 0.1, 1 ); + final Domain d2 = new BasicDomain( "d2", ( short ) 0, ( short ) 20, ( short ) 1, ( short ) 1, 0.1, 1 ); + final Domain d3 = new BasicDomain( "d3", ( short ) 9, ( short ) 10, ( short ) 1, ( short ) 1, 0.1, 1 ); + final Domain d4 = new BasicDomain( "d4", ( short ) 7, ( short ) 8, ( short ) 1, ( short ) 1, 0.1, 1 ); + final List covered = new ArrayList(); + covered.add( true ); // 0 + covered.add( false ); // 1 + covered.add( true ); // 2 + covered.add( false ); // 3 + covered.add( true ); // 4 + covered.add( true ); // 5 + covered.add( false ); // 6 + covered.add( true ); // 7 + covered.add( true ); // 8 + if ( ForesterUtil.calculateOverlap( d0, covered ) != 3 ) { + return false; + } + if ( ForesterUtil.calculateOverlap( d1, covered ) != 2 ) { + return false; + } + if ( ForesterUtil.calculateOverlap( d2, covered ) != 6 ) { + return false; + } + if ( ForesterUtil.calculateOverlap( d3, covered ) != 0 ) { + return false; + } + if ( ForesterUtil.calculateOverlap( d4, covered ) != 2 ) { + return false; + } + final Domain a = new BasicDomain( "a", ( short ) 2, ( short ) 5, ( short ) 1, ( short ) 1, 1, -1 ); + final Domain b = new BasicDomain( "b", ( short ) 2, ( short ) 10, ( short ) 1, ( short ) 1, 0.1, -1 ); + final Protein ab = new BasicProtein( "ab", "varanus", 0 ); + ab.addProteinDomain( a ); + ab.addProteinDomain( b ); + final Protein ab_s0 = ForesterUtil.removeOverlappingDomains( 3, false, ab ); + if ( ab.getNumberOfProteinDomains() != 2 ) { + return false; + } + if ( ab_s0.getNumberOfProteinDomains() != 1 ) { + return false; + } + if ( !ab_s0.getProteinDomain( 0 ).getDomainId().equals( "b" ) ) { + return false; + } + final Protein ab_s1 = ForesterUtil.removeOverlappingDomains( 4, false, ab ); + if ( ab.getNumberOfProteinDomains() != 2 ) { + return false; + } + if ( ab_s1.getNumberOfProteinDomains() != 2 ) { + return false; + } + final Domain c = new BasicDomain( "c", ( short ) 20000, ( short ) 20500, ( short ) 1, ( short ) 1, 10, 1 ); + final Domain d = new BasicDomain( "d", + ( short ) 10000, + ( short ) 10500, + ( short ) 1, + ( short ) 1, + 0.0000001, + 1 ); + final Domain e = new BasicDomain( "e", ( short ) 5000, ( short ) 5500, ( short ) 1, ( short ) 1, 0.0001, 1 ); + final Protein cde = new BasicProtein( "cde", "varanus", 0 ); + cde.addProteinDomain( c ); + cde.addProteinDomain( d ); + cde.addProteinDomain( e ); + final Protein cde_s0 = ForesterUtil.removeOverlappingDomains( 0, false, cde ); + if ( cde.getNumberOfProteinDomains() != 3 ) { + return false; + } + if ( cde_s0.getNumberOfProteinDomains() != 3 ) { + return false; + } + final Domain f = new BasicDomain( "f", ( short ) 10, ( short ) 20, ( short ) 1, ( short ) 1, 10, 1 ); + final Domain g = new BasicDomain( "g", ( short ) 10, ( short ) 20, ( short ) 1, ( short ) 1, 0.01, 1 ); + final Domain h = new BasicDomain( "h", ( short ) 10, ( short ) 20, ( short ) 1, ( short ) 1, 0.0001, 1 ); + final Domain i = new BasicDomain( "i", ( short ) 10, ( short ) 20, ( short ) 1, ( short ) 1, 0.5, 1 ); + final Domain i2 = new BasicDomain( "i", ( short ) 5, ( short ) 30, ( short ) 1, ( short ) 1, 0.5, 10 ); + final Protein fghi = new BasicProtein( "fghi", "varanus", 0 ); + fghi.addProteinDomain( f ); + fghi.addProteinDomain( g ); + fghi.addProteinDomain( h ); + fghi.addProteinDomain( i ); + fghi.addProteinDomain( i ); + fghi.addProteinDomain( i ); + fghi.addProteinDomain( i2 ); + final Protein fghi_s0 = ForesterUtil.removeOverlappingDomains( 10, false, fghi ); + if ( fghi.getNumberOfProteinDomains() != 7 ) { + return false; + } + if ( fghi_s0.getNumberOfProteinDomains() != 1 ) { + return false; + } + if ( !fghi_s0.getProteinDomain( 0 ).getDomainId().equals( "h" ) ) { + return false; + } + final Protein fghi_s1 = ForesterUtil.removeOverlappingDomains( 11, false, fghi ); + if ( fghi.getNumberOfProteinDomains() != 7 ) { + return false; + } + if ( fghi_s1.getNumberOfProteinDomains() != 7 ) { + return false; + } + final Domain j = new BasicDomain( "j", ( short ) 10, ( short ) 20, ( short ) 1, ( short ) 1, 10, 1 ); + final Domain k = new BasicDomain( "k", ( short ) 10, ( short ) 20, ( short ) 1, ( short ) 1, 0.01, 1 ); + final Domain l = new BasicDomain( "l", ( short ) 10, ( short ) 20, ( short ) 1, ( short ) 1, 0.0001, 1 ); + final Domain m = new BasicDomain( "m", ( short ) 10, ( short ) 20, ( short ) 1, ( short ) 4, 0.5, 1 ); + final Domain m0 = new BasicDomain( "m", ( short ) 10, ( short ) 20, ( short ) 2, ( short ) 4, 0.5, 1 ); + final Domain m1 = new BasicDomain( "m", ( short ) 10, ( short ) 20, ( short ) 3, ( short ) 4, 0.5, 1 ); + final Domain m2 = new BasicDomain( "m", ( short ) 5, ( short ) 30, ( short ) 4, ( short ) 4, 0.5, 10 ); + final Protein jklm = new BasicProtein( "jklm", "varanus", 0 ); + jklm.addProteinDomain( j ); + jklm.addProteinDomain( k ); + jklm.addProteinDomain( l ); + jklm.addProteinDomain( m ); + jklm.addProteinDomain( m0 ); + jklm.addProteinDomain( m1 ); + jklm.addProteinDomain( m2 ); + final Protein jklm_s0 = ForesterUtil.removeOverlappingDomains( 10, false, jklm ); + if ( jklm.getNumberOfProteinDomains() != 7 ) { + return false; + } + if ( jklm_s0.getNumberOfProteinDomains() != 1 ) { + return false; + } + if ( !jklm_s0.getProteinDomain( 0 ).getDomainId().equals( "l" ) ) { + return false; + } + final Protein jklm_s1 = ForesterUtil.removeOverlappingDomains( 11, false, jklm ); + if ( jklm.getNumberOfProteinDomains() != 7 ) { + return false; + } + if ( jklm_s1.getNumberOfProteinDomains() != 7 ) { + return false; + } + final Domain only = new BasicDomain( "only", ( short ) 5, ( short ) 30, ( short ) 4, ( short ) 4, 0.5, 10 ); + final Protein od = new BasicProtein( "od", "varanus", 0 ); + od.addProteinDomain( only ); + final Protein od_s0 = ForesterUtil.removeOverlappingDomains( 0, false, od ); + if ( od.getNumberOfProteinDomains() != 1 ) { + return false; + } + if ( od_s0.getNumberOfProteinDomains() != 1 ) { + return false; + } + } + catch ( final Exception e ) { + e.printStackTrace( System.out ); + return false; } - System.out.print( "General table: " ); - if ( Test.testGeneralTable() ) { - System.out.println( "OK." ); - succeeded++; + return true; + } + + public static final boolean testPfamTreeReading() { + try { + final URL u = new URL( WebserviceUtil.PFAM_SERVER + "/family/PF" + "01849" + "/tree/download" ); + final NHXParser parser = new NHXParser(); + parser.setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ); + parser.setReplaceUnderscores( false ); + parser.setGuessRootedness( true ); + final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); + final Phylogeny[] phys = factory.create( u.openStream(), parser ); + if ( ( phys == null ) || ( phys.length != 1 ) ) { + return false; + } + if ( phys[ 0 ].getNumberOfExternalNodes() < 10 ) { + return false; + } } - else { - System.out.println( "failed." ); - failed++; + catch ( final Exception e ) { + e.printStackTrace(); } - System.out.print( "Amino acid sequence: " ); - if ( Test.testAminoAcidSequence() ) { - System.out.println( "OK." ); - succeeded++; + return true; + } + + public static final boolean testPhyloXMLparsingFromURL() { + try { + final String s = "https://sites.google.com/site/cmzmasek/home/software/archaeopteryx/examples/archaeopteryx_a/apaf_bcl2.xml"; + final URL u = new URL( s ); + final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); + final Phylogeny[] phys = factory.create( u.openStream(), PhyloXmlParser.createPhyloXmlParser() ); + if ( ( phys == null ) || ( phys.length != 2 ) ) { + return false; + } } - else { - System.out.println( "failed." ); - failed++; + catch ( final Exception e ) { + e.printStackTrace(); } - System.out.print( "General MSA parser: " ); - if ( Test.testGeneralMsaParser() ) { - System.out.println( "OK." ); - succeeded++; + return true; + } + + public static final boolean testToLReading() { + try { + final URL u = new URL( WebserviceUtil.TOL_URL_BASE + "15079" ); + final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); + final Phylogeny[] phys = factory.create( u.openStream(), new TolParser() ); + if ( ( phys == null ) || ( phys.length != 1 ) ) { + return false; + } + if ( !phys[ 0 ].getRoot().getNodeData().getTaxonomy().getIdentifier().getValue().equals( "15079" ) ) { + return false; + } + if ( !phys[ 0 ].getRoot().getNodeData().getTaxonomy().getScientificName().equals( "Protacanthopterygii" ) ) { + return false; + } + if ( phys[ 0 ].getNumberOfExternalNodes() < 5 ) { + return false; + } } - else { - System.out.println( "failed." ); - failed++; + catch ( final Exception e ) { + e.printStackTrace(); } - System.out.print( "Fasta parser for msa: " ); - if ( Test.testFastaParser() ) { - System.out.println( "OK." ); - succeeded++; + return true; + } + + public static final boolean testTreeBaseReading() { + try { + final URL u = new URL( WebserviceUtil.TREEBASE_PHYLOWS_TREE_URL_BASE + "825?format=nexus" ); + final NexusPhylogeniesParser parser = new NexusPhylogeniesParser(); + parser.setReplaceUnderscores( true ); + final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); + final Phylogeny[] phys = factory.create( u.openStream(), parser ); + if ( ( phys == null ) || ( phys.length != 1 ) ) { + return false; + } + final URL u2 = new URL( WebserviceUtil.TREEBASE_PHYLOWS_STUDY_URL_BASE + "15613?format=nexus" ); + final NexusPhylogeniesParser parser2 = new NexusPhylogeniesParser(); + parser2.setReplaceUnderscores( true ); + final PhylogenyFactory factory2 = ParserBasedPhylogenyFactory.getInstance(); + final Phylogeny[] phys2 = factory2.create( u2.openStream(), parser2 ); + if ( ( phys2 == null ) || ( phys2.length != 9 ) ) { + return false; + } } - else { - System.out.println( "failed." ); - failed++; + catch ( final Exception e ) { + e.printStackTrace(); } - System.out.print( "Creation of balanced phylogeny: " ); - if ( Test.testCreateBalancedPhylogeny() ) { - System.out.println( "OK." ); - succeeded++; + return true; + } + + public static final boolean testTreeFamReading() { + try { + final URL u = new URL( WebserviceUtil.TREE_FAM_URL_BASE + "101004" + "/tree/newick" ); + final NHXParser parser = new NHXParser(); + parser.setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.NO ); + parser.setReplaceUnderscores( false ); + parser.setGuessRootedness( true ); + final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); + final Phylogeny[] phys = factory.create( u.openStream(), parser ); + if ( ( phys == null ) || ( phys.length != 1 ) ) { + return false; + } + if ( phys[ 0 ].getNumberOfExternalNodes() < 10 ) { + return false; + } } - else { - System.out.println( "failed." ); - failed++; + catch ( final Exception e ) { + e.printStackTrace(); } - System.out.print( "Genbank accessor parsing: " ); - if ( Test.testGenbankAccessorParsing() ) { - System.out.println( "OK." ); - succeeded++; + return true; + } + + private final static Phylogeny createPhylogeny( final String nhx ) throws IOException { + final Phylogeny p = ParserBasedPhylogenyFactory.getInstance().create( nhx, new NHXParser() )[ 0 ]; + return p; + } + + private final static Event getEvent( final Phylogeny p, final String n1, final String n2 ) { + return PhylogenyMethods.calculateLCA( p.getNode( n1 ), p.getNode( n2 ) ).getNodeData().getEvent(); + } + + private static boolean testAminoAcidSequence() { + try { + final MolecularSequence aa1 = BasicSequence.createAaSequence( "aa1", "aAklm-?xX*z$#" ); + if ( aa1.getLength() != 13 ) { + return false; + } + if ( aa1.getResidueAt( 0 ) != 'A' ) { + return false; + } + if ( aa1.getResidueAt( 2 ) != 'K' ) { + return false; + } + if ( !new String( aa1.getMolecularSequence() ).equals( "AAKLM-XXX*ZXX" ) ) { + return false; + } + final MolecularSequence aa2 = BasicSequence.createAaSequence( "aa3", "ARNDCQEGHILKMFPSTWYVX*-BZOJU" ); + if ( !new String( aa2.getMolecularSequence() ).equals( "ARNDCQEGHILKMFPSTWYVX*-BZOXU" ) ) { + return false; + } + final MolecularSequence dna1 = BasicSequence.createDnaSequence( "dna1", "ACGTUX*-?RYMKWSN" ); + if ( !new String( dna1.getMolecularSequence() ).equals( "ACGTNN*-NRYMKWSN" ) ) { + return false; + } + final MolecularSequence rna1 = BasicSequence.createRnaSequence( "rna1", "..ACGUTX*-?RYMKWSN" ); + if ( !new String( rna1.getMolecularSequence() ).equals( "--ACGUNN*-NRYMKWSN" ) ) { + return false; + } } - else { - System.out.println( "failed." ); - failed++; + catch ( final Exception e ) { + e.printStackTrace(); + return false; } - if ( PERFORM_DB_TESTS ) { - System.out.print( "Uniprot Entry Retrieval: " ); - if ( Test.testUniprotEntryRetrieval() ) { - System.out.println( "OK." ); - succeeded++; + return true; + } + + private static boolean testBasicDomain() { + try { + final Domain pd = new BasicDomain( "id", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); + if ( !pd.getDomainId().equals( "id" ) ) { + return false; } - else { - System.out.println( "failed." ); - failed++; + if ( pd.getNumber() != 1 ) { + return false; + } + if ( pd.getTotalCount() != 4 ) { + return false; + } + if ( !pd.equals( new BasicDomain( "id", 22, 111, ( short ) 1, ( short ) 4, 0.2, -12 ) ) ) { + return false; + } + final Domain a1 = new BasicDomain( "a", 1, 10, ( short ) 1, ( short ) 4, 0.1, -12 ); + final BasicDomain a1_copy = new BasicDomain( "a", 1, 10, ( short ) 1, ( short ) 4, 0.1, -12 ); + final BasicDomain a1_equal = new BasicDomain( "a", 524, 743994, ( short ) 1, ( short ) 300, 3.0005, 230 ); + final BasicDomain a2 = new BasicDomain( "a", 1, 10, ( short ) 2, ( short ) 4, 0.1, -12 ); + final BasicDomain a3 = new BasicDomain( "A", 1, 10, ( short ) 1, ( short ) 4, 0.1, -12 ); + if ( !a1.equals( a1 ) ) { + return false; + } + if ( !a1.equals( a1_copy ) ) { + return false; + } + if ( !a1.equals( a1_equal ) ) { + return false; + } + if ( !a1.equals( a2 ) ) { + return false; + } + if ( a1.equals( a3 ) ) { + return false; + } + if ( a1.compareTo( a1 ) != 0 ) { + return false; + } + if ( a1.compareTo( a1_copy ) != 0 ) { + return false; + } + if ( a1.compareTo( a1_equal ) != 0 ) { + return false; } - } - if ( PERFORM_DB_TESTS ) { - System.out.print( "Uniprot Taxonomy Search: " ); - if ( Test.testUniprotTaxonomySearch() ) { - System.out.println( "OK." ); - succeeded++; + if ( a1.compareTo( a2 ) != 0 ) { + return false; } - else { - System.out.println( "failed." ); - failed++; + if ( a1.compareTo( a3 ) == 0 ) { + return false; } } - //---- - String path = ""; - final String os = ForesterUtil.OS_NAME.toLowerCase(); - if ( ( os.indexOf( "mac" ) >= 0 ) && ( os.indexOf( "os" ) > 0 ) ) { - path = "/usr/local/bin/mafft"; - } - else if ( os.indexOf( "win" ) >= 0 ) { - path = "C:\\Program Files\\mafft-win\\mafft.bat"; - } - else { - path = "/home/czmasek/bin/mafft"; - } - if ( !MsaInferrer.isInstalled( path ) ) { - path = "mafft"; - } - if ( !MsaInferrer.isInstalled( path ) ) { - path = "/usr/local/bin/mafft"; + catch ( final Exception e ) { + e.printStackTrace( System.out ); + return false; } - if ( MsaInferrer.isInstalled( path ) ) { - System.out.print( "MAFFT (external program): " ); - if ( Test.testMafft( path ) ) { - System.out.println( "OK." ); - succeeded++; + return true; + } + + private static boolean testBasicNodeMethods() { + try { + if ( PhylogenyNode.getNodeCount() != 0 ) { + return false; } - else { - System.out.println( "failed [will not count towards failed tests]" ); + final PhylogenyNode n1 = new PhylogenyNode(); + final PhylogenyNode n2 = PhylogenyNode + .createInstanceFromNhxString( "", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ); + final PhylogenyNode n3 = PhylogenyNode + .createInstanceFromNhxString( "n3", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ); + final PhylogenyNode n4 = PhylogenyNode + .createInstanceFromNhxString( "n4:0.01", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ); + if ( n1.isHasAssignedEvent() ) { + return false; + } + if ( PhylogenyNode.getNodeCount() != 4 ) { + return false; + } + if ( n3.getIndicator() != 0 ) { + return false; + } + if ( n3.getNumberOfExternalNodes() != 1 ) { + return false; + } + if ( !n3.isExternal() ) { + return false; + } + if ( !n3.isRoot() ) { + return false; + } + if ( !n4.getName().equals( "n4" ) ) { + return false; } } - //---- - System.out.print( "Next nodes with collapsed: " ); - if ( Test.testNextNodeWithCollapsing() ) { - System.out.println( "OK." ); - succeeded++; - } - else { - System.out.println( "failed." ); - failed++; - } - System.out.print( "Simple MSA quality: " ); - if ( Test.testMsaQualityMethod() ) { - System.out.println( "OK." ); - succeeded++; - } - else { - System.out.println( "failed." ); - failed++; - } - System.out.println(); - final Runtime rt = java.lang.Runtime.getRuntime(); - final long free_memory = rt.freeMemory() / 1000000; - final long total_memory = rt.totalMemory() / 1000000; - System.out.println( "Running time : " + ( new Date().getTime() - start_time ) + "ms " + "(free memory: " - + free_memory + "MB, total memory: " + total_memory + "MB)" ); - System.out.println(); - System.out.println( "Successful tests: " + succeeded ); - System.out.println( "Failed tests: " + failed ); - System.out.println(); - if ( failed < 1 ) { - System.out.println( "OK." ); - } - else { - System.out.println( "Not OK." ); + catch ( final Exception e ) { + e.printStackTrace( System.out ); + return false; } + return true; } - private final static Phylogeny createPhylogeny( final String nhx ) throws IOException { - final Phylogeny p = ParserBasedPhylogenyFactory.getInstance().create( nhx, new NHXParser() )[ 0 ]; - return p; - } - - private final static Event getEvent( final Phylogeny p, final String n1, final String n2 ) { - return PhylogenyMethods.calculateLCA( p.getNode( n1 ), p.getNode( n2 ) ).getNodeData().getEvent(); - } - - private static boolean testAminoAcidSequence() { + private static boolean testBasicPhyloXMLparsing() { try { - final Sequence aa1 = BasicSequence.createAaSequence( "aa1", "aAklm-?xX*z$#" ); - if ( aa1.getLength() != 13 ) { + final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); + final PhyloXmlParser xml_parser = PhyloXmlParser.createPhyloXmlParser(); + final Phylogeny[] phylogenies_0 = factory.create( Test.PATH_TO_TEST_DATA + "phyloxml_test_t1.xml", + xml_parser ); + if ( xml_parser.getErrorCount() > 0 ) { + System.out.println( xml_parser.getErrorMessages().toString() ); + return false; + } + if ( phylogenies_0.length != 4 ) { + return false; + } + final Phylogeny t1 = phylogenies_0[ 0 ]; + final Phylogeny t2 = phylogenies_0[ 1 ]; + final Phylogeny t3 = phylogenies_0[ 2 ]; + final Phylogeny t4 = phylogenies_0[ 3 ]; + if ( t1.getNumberOfExternalNodes() != 1 ) { + return false; + } + if ( !t1.isRooted() ) { + return false; + } + if ( t1.isRerootable() ) { + return false; + } + if ( !t1.getType().equals( "gene_tree" ) ) { + return false; + } + if ( t2.getNumberOfExternalNodes() != 2 ) { + return false; + } + if ( !isEqual( t2.getNode( "node a" ).getDistanceToParent(), 1.0 ) ) { + return false; + } + if ( !isEqual( t2.getNode( "node b" ).getDistanceToParent(), 2.0 ) ) { + return false; + } + if ( t2.getNode( "node a" ).getNodeData().getTaxonomies().size() != 2 ) { + return false; + } + if ( !t2.getNode( "node a" ).getNodeData().getTaxonomy( 0 ).getCommonName().equals( "some parasite" ) ) { + return false; + } + if ( !t2.getNode( "node a" ).getNodeData().getTaxonomy( 1 ).getCommonName().equals( "the host" ) ) { + return false; + } + if ( t2.getNode( "node a" ).getNodeData().getSequences().size() != 2 ) { + return false; + } + if ( !t2.getNode( "node a" ).getNodeData().getSequence( 0 ).getMolecularSequence() + .startsWith( "actgtgggggt" ) ) { + return false; + } + if ( !t2.getNode( "node a" ).getNodeData().getSequence( 1 ).getMolecularSequence() + .startsWith( "ctgtgatgcat" ) ) { + return false; + } + if ( t3.getNumberOfExternalNodes() != 4 ) { + return false; + } + if ( !t1.getName().equals( "t1" ) ) { + return false; + } + if ( !t2.getName().equals( "t2" ) ) { + return false; + } + if ( !t3.getName().equals( "t3" ) ) { + return false; + } + if ( !t4.getName().equals( "t4" ) ) { + return false; + } + if ( !t3.getIdentifier().getValue().equals( "1-1" ) ) { + return false; + } + if ( !t3.getIdentifier().getProvider().equals( "treebank" ) ) { + return false; + } + if ( !t3.getNode( "root node" ).getNodeData().getSequence().getType().equals( "protein" ) ) { + return false; + } + if ( !t3.getNode( "root node" ).getNodeData().getSequence().getName() + .equals( "Apoptosis facilitator Bcl-2-like 14 protein" ) ) { + return false; + } + if ( !t3.getNode( "root node" ).getNodeData().getSequence().getSymbol().equals( "BCL2L14" ) ) { + return false; + } + if ( !t3.getNode( "root node" ).getNodeData().getSequence().getAccession().getValue().equals( "Q9BZR8" ) ) { + return false; + } + if ( !t3.getNode( "root node" ).getNodeData().getSequence().getAccession().getSource().equals( "UniProtKB" ) ) { + return false; + } + if ( !( t3.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getDesc() + .equals( "apoptosis" ) ) { + return false; + } + if ( !( t3.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getRef() + .equals( "GO:0006915" ) ) { + return false; + } + if ( !( t3.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getSource() + .equals( "UniProtKB" ) ) { + return false; + } + if ( !( t3.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getEvidence() + .equals( "experimental" ) ) { + return false; + } + if ( !( t3.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getType() + .equals( "function" ) ) { + return false; + } + if ( ( t3.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getConfidence() + .getValue() != 1 ) { + return false; + } + if ( !( t3.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getConfidence() + .getType().equals( "ml" ) ) { + return false; + } + if ( !( t3.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getDesc() + .equals( "apoptosis" ) ) { + return false; + } + if ( ( t3.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getProperties() + .getProperty( "AFFY:expression" ).getAppliesTo() != AppliesTo.ANNOTATION ) { + return false; + } + if ( !( t3.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getProperties() + .getProperty( "AFFY:expression" ).getDataType().equals( "xsd:double" ) ) { + return false; + } + if ( !( t3.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getProperties() + .getProperty( "AFFY:expression" ).getRef().equals( "AFFY:expression" ) ) { + return false; + } + if ( !( t3.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getProperties() + .getProperty( "AFFY:expression" ).getUnit().equals( "AFFY:x" ) ) { return false; } - if ( aa1.getResidueAt( 0 ) != 'A' ) { + if ( !( t3.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getProperties() + .getProperty( "AFFY:expression" ).getValue().equals( "0.2" ) ) { return false; } - if ( aa1.getResidueAt( 2 ) != 'K' ) { + if ( !( t3.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getProperties() + .getProperty( "MED:disease" ).getValue().equals( "lymphoma" ) ) { return false; } - if ( !new String( aa1.getMolecularSequence() ).equals( "AAKLM-XXX*ZXX" ) ) { + if ( !( t3.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 1 ) ).getRef() + .equals( "GO:0005829" ) ) { return false; } - final Sequence aa2 = BasicSequence.createAaSequence( "aa3", "ARNDCQEGHILKMFPSTWYVX*-BZOJU" ); - if ( !new String( aa2.getMolecularSequence() ).equals( "ARNDCQEGHILKMFPSTWYVX*-BZXXU" ) ) { + if ( !( t3.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 0 ) ).getDesc() + .equals( "intracellular organelle" ) ) { return false; } - final Sequence dna1 = BasicSequence.createDnaSequence( "dna1", "ACGTUX*-?RYMKWSN" ); - if ( !new String( dna1.getMolecularSequence() ).equals( "ACGTNN*-NRYMKWSN" ) ) { + if ( !( t3.getNode( "root node" ).getNodeData().getSequence().getUri( 0 ).getType().equals( "source" ) ) ) { return false; } - final Sequence rna1 = BasicSequence.createRnaSequence( "rna1", "..ACGUTX*-?RYMKWSN" ); - if ( !new String( rna1.getMolecularSequence() ).equals( "--ACGUNN*-NRYMKWSN" ) ) { + if ( !( t3.getNode( "root node" ).getNodeData().getSequence().getUri( 0 ).getDescription() + .equals( "UniProt link" ) ) ) { + return false; + } + if ( !( t3.getNode( "root node" ).getNodeData().getSequence().getLocation().equals( "12p13-p12" ) ) ) { + return false; + } + final SortedSet x = t3.getNode( "root node" ).getNodeData().getSequence().getCrossReferences(); + if ( x.size() != 4 ) { return false; } + int c = 0; + for( final Accession acc : x ) { + if ( c == 0 ) { + if ( !acc.getSource().equals( "KEGG" ) ) { + return false; + } + if ( !acc.getValue().equals( "hsa:596" ) ) { + return false; + } + } + c++; + } } catch ( final Exception e ) { - e.printStackTrace(); + e.printStackTrace( System.out ); return false; } return true; } - private static boolean testBasicDomain() { + private static boolean testBasicPhyloXMLparsingRoundtrip() { try { - final Domain pd = new BasicDomain( "id", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); - if ( !pd.getDomainId().equals( "id" ) ) { + final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); + final PhyloXmlParser xml_parser = PhyloXmlParser.createPhyloXmlParser(); + if ( USE_LOCAL_PHYLOXML_SCHEMA ) { + xml_parser.setValidateAgainstSchema( PHYLOXML_LOCAL_XSD ); + } + else { + xml_parser.setValidateAgainstSchema( PHYLOXML_REMOTE_XSD ); + } + final Phylogeny[] phylogenies_0 = factory.create( Test.PATH_TO_TEST_DATA + "phyloxml_test_t1.xml", + xml_parser ); + if ( xml_parser.getErrorCount() > 0 ) { + System.out.println( xml_parser.getErrorMessages().toString() ); return false; } - if ( pd.getNumber() != 1 ) { + if ( phylogenies_0.length != 4 ) { return false; } - if ( pd.getTotalCount() != 4 ) { + final StringBuffer t1_sb = new StringBuffer( phylogenies_0[ 0 ].toPhyloXML( 0 ) ); + final Phylogeny[] phylogenies_t1 = factory.create( t1_sb, xml_parser ); + if ( phylogenies_t1.length != 1 ) { return false; } - if ( !pd.equals( new BasicDomain( "id", 22, 111, ( short ) 1, ( short ) 4, 0.2, -12 ) ) ) { + final Phylogeny t1_rt = phylogenies_t1[ 0 ]; + if ( !t1_rt.getDistanceUnit().equals( "cc" ) ) { return false; } - final Domain a1 = new BasicDomain( "a", 1, 10, ( short ) 1, ( short ) 4, 0.1, -12 ); - final BasicDomain a1_copy = new BasicDomain( "a", 1, 10, ( short ) 1, ( short ) 4, 0.1, -12 ); - final BasicDomain a1_equal = new BasicDomain( "a", 524, 743994, ( short ) 1, ( short ) 300, 3.0005, 230 ); - final BasicDomain a2 = new BasicDomain( "a", 1, 10, ( short ) 2, ( short ) 4, 0.1, -12 ); - final BasicDomain a3 = new BasicDomain( "A", 1, 10, ( short ) 1, ( short ) 4, 0.1, -12 ); - if ( !a1.equals( a1 ) ) { + if ( !t1_rt.isRooted() ) { return false; } - if ( !a1.equals( a1_copy ) ) { + if ( t1_rt.isRerootable() ) { return false; } - if ( !a1.equals( a1_equal ) ) { + if ( !t1_rt.getType().equals( "gene_tree" ) ) { return false; } - if ( !a1.equals( a2 ) ) { + final StringBuffer t2_sb = new StringBuffer( phylogenies_0[ 1 ].toPhyloXML( 0 ) ); + final Phylogeny[] phylogenies_t2 = factory.create( t2_sb, xml_parser ); + final Phylogeny t2_rt = phylogenies_t2[ 0 ]; + if ( t2_rt.getNode( "node a" ).getNodeData().getTaxonomies().size() != 2 ) { return false; } - if ( a1.equals( a3 ) ) { + if ( !t2_rt.getNode( "node a" ).getNodeData().getTaxonomy( 0 ).getCommonName().equals( "some parasite" ) ) { return false; } - if ( a1.compareTo( a1 ) != 0 ) { + if ( !t2_rt.getNode( "node a" ).getNodeData().getTaxonomy( 1 ).getCommonName().equals( "the host" ) ) { return false; } - if ( a1.compareTo( a1_copy ) != 0 ) { + if ( t2_rt.getNode( "node a" ).getNodeData().getSequences().size() != 2 ) { return false; } - if ( a1.compareTo( a1_equal ) != 0 ) { + if ( !t2_rt.getNode( "node a" ).getNodeData().getSequence( 0 ).getMolecularSequence() + .startsWith( "actgtgggggt" ) ) { return false; } - if ( a1.compareTo( a2 ) != 0 ) { + if ( !t2_rt.getNode( "node a" ).getNodeData().getSequence( 1 ).getMolecularSequence() + .startsWith( "ctgtgatgcat" ) ) { return false; } - if ( a1.compareTo( a3 ) == 0 ) { + final StringBuffer t3_sb_0 = new StringBuffer( phylogenies_0[ 2 ].toPhyloXML( 0 ) ); + final Phylogeny[] phylogenies_1_0 = factory.create( t3_sb_0, xml_parser ); + final StringBuffer t3_sb = new StringBuffer( phylogenies_1_0[ 0 ].toPhyloXML( 0 ) ); + final Phylogeny[] phylogenies_1 = factory.create( t3_sb, xml_parser ); + if ( phylogenies_1.length != 1 ) { return false; } - } - catch ( final Exception e ) { - e.printStackTrace( System.out ); - return false; - } - return true; - } - - private static boolean testBasicNodeMethods() { - try { - if ( PhylogenyNode.getNodeCount() != 0 ) { + final Phylogeny t3_rt = phylogenies_1[ 0 ]; + if ( !t3_rt.getName().equals( "t3" ) ) { return false; } - final PhylogenyNode n1 = new PhylogenyNode(); - final PhylogenyNode n2 = PhylogenyNode - .createInstanceFromNhxString( "", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ); - final PhylogenyNode n3 = PhylogenyNode - .createInstanceFromNhxString( "n3", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ); - final PhylogenyNode n4 = PhylogenyNode - .createInstanceFromNhxString( "n4:0.01", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ); - if ( n1.isHasAssignedEvent() ) { + if ( t3_rt.getNumberOfExternalNodes() != 4 ) { return false; } - if ( PhylogenyNode.getNodeCount() != 4 ) { + if ( !t3_rt.getIdentifier().getValue().equals( "1-1" ) ) { return false; } - if ( n3.getIndicator() != 0 ) { + if ( !t3_rt.getIdentifier().getProvider().equals( "treebank" ) ) { return false; } - if ( n3.getNumberOfExternalNodes() != 1 ) { + if ( !t3_rt.getNode( "root node" ).getNodeData().getSequence().getType().equals( "protein" ) ) { return false; } - if ( !n3.isExternal() ) { + if ( !t3_rt.getNode( "root node" ).getNodeData().getSequence().getName() + .equals( "Apoptosis facilitator Bcl-2-like 14 protein" ) ) { return false; } - if ( !n3.isRoot() ) { + if ( !t3_rt.getNode( "root node" ).getNodeData().getSequence().getSymbol().equals( "BCL2L14" ) ) { return false; } - if ( !n4.getName().equals( "n4" ) ) { + if ( !t3_rt.getNode( "root node" ).getNodeData().getSequence().getAccession().getValue().equals( "Q9BZR8" ) ) { return false; } - } - catch ( final Exception e ) { - e.printStackTrace( System.out ); - return false; - } - return true; - } - - private static boolean testBasicPhyloXMLparsing() { - try { - final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); - final PhyloXmlParser xml_parser = PhyloXmlParser.createPhyloXmlParser(); - final Phylogeny[] phylogenies_0 = factory.create( Test.PATH_TO_TEST_DATA + "phyloxml_test_t1.xml", - xml_parser ); - if ( xml_parser.getErrorCount() > 0 ) { - System.out.println( xml_parser.getErrorMessages().toString() ); + if ( !t3_rt.getNode( "root node" ).getNodeData().getSequence().getAccession().getSource() + .equals( "UniProtKB" ) ) { return false; } - if ( phylogenies_0.length != 4 ) { + if ( !( t3_rt.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getDesc() + .equals( "apoptosis" ) ) { return false; } - final Phylogeny t1 = phylogenies_0[ 0 ]; - final Phylogeny t2 = phylogenies_0[ 1 ]; - final Phylogeny t3 = phylogenies_0[ 2 ]; - final Phylogeny t4 = phylogenies_0[ 3 ]; - if ( t1.getNumberOfExternalNodes() != 1 ) { + if ( !( t3_rt.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getRef() + .equals( "GO:0006915" ) ) { return false; } - if ( !t1.isRooted() ) { + if ( !( t3_rt.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getSource() + .equals( "UniProtKB" ) ) { return false; } - if ( t1.isRerootable() ) { + if ( !( t3_rt.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getEvidence() + .equals( "experimental" ) ) { return false; } - if ( !t1.getType().equals( "gene_tree" ) ) { + if ( !( t3_rt.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getType() + .equals( "function" ) ) { return false; } - if ( t2.getNumberOfExternalNodes() != 2 ) { + if ( ( t3_rt.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getConfidence() + .getValue() != 1 ) { return false; } - if ( !isEqual( t2.getNode( "node a" ).getDistanceToParent(), 1.0 ) ) { + if ( !( t3_rt.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getConfidence() + .getType().equals( "ml" ) ) { return false; } - if ( !isEqual( t2.getNode( "node b" ).getDistanceToParent(), 2.0 ) ) { + if ( !( t3_rt.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getDesc() + .equals( "apoptosis" ) ) { return false; } - if ( t2.getNode( "node a" ).getNodeData().getTaxonomies().size() != 2 ) { + if ( ( t3_rt.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getProperties() + .getProperty( "AFFY:expression" ).getAppliesTo() != AppliesTo.ANNOTATION ) { return false; } - if ( !t2.getNode( "node a" ).getNodeData().getTaxonomy( 0 ).getCommonName().equals( "some parasite" ) ) { + if ( !( t3_rt.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getProperties() + .getProperty( "AFFY:expression" ).getDataType().equals( "xsd:double" ) ) { return false; } - if ( !t2.getNode( "node a" ).getNodeData().getTaxonomy( 1 ).getCommonName().equals( "the host" ) ) { + if ( !( t3_rt.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getProperties() + .getProperty( "AFFY:expression" ).getRef().equals( "AFFY:expression" ) ) { + return false; + } + if ( !( t3_rt.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getProperties() + .getProperty( "AFFY:expression" ).getUnit().equals( "AFFY:x" ) ) { + return false; + } + if ( !( t3_rt.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getProperties() + .getProperty( "AFFY:expression" ).getValue().equals( "0.2" ) ) { + return false; + } + if ( !( t3_rt.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getProperties() + .getProperty( "MED:disease" ).getValue().equals( "lymphoma" ) ) { + return false; + } + if ( !( t3_rt.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 1 ) ).getRef() + .equals( "GO:0005829" ) ) { + return false; + } + if ( !( t3_rt.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 0 ) ).getDesc() + .equals( "intracellular organelle" ) ) { + return false; + } + if ( !( t3_rt.getNode( "root node" ).getNodeData().getSequence().getUri( 0 ).getType().equals( "source" ) ) ) { + return false; + } + if ( !( t3_rt.getNode( "root node" ).getNodeData().getSequence().getUri( 0 ).getDescription() + .equals( "UniProt link" ) ) ) { + return false; + } + if ( !( t3_rt.getNode( "root node" ).getNodeData().getSequence().getLocation().equals( "12p13-p12" ) ) ) { + return false; + } + if ( !( t3_rt.getNode( "root node" ).getNodeData().getReference().getDoi().equals( "10.1038/387489a0" ) ) ) { + return false; + } + if ( !( t3_rt.getNode( "root node" ).getNodeData().getReference().getDescription() + .equals( "Aguinaldo, A. M. A.; J. M. Turbeville, L. S. Linford, M. C. Rivera, J. R. Garey, R. A. Raff, & J. A. Lake (1997). \"Evidence for a clade of nematodes, arthropods and other moulting animals\". Nature 387 (6632): 489–493." ) ) ) { + return false; + } + if ( !t3_rt.getNode( "root node" ).getNodeData().getTaxonomy().getTaxonomyCode().equals( "ECDYS" ) ) { + return false; + } + if ( !t3_rt.getNode( "root node" ).getNodeData().getTaxonomy().getScientificName().equals( "ecdysozoa" ) ) { + return false; + } + if ( !t3_rt.getNode( "root node" ).getNodeData().getTaxonomy().getCommonName().equals( "molting animals" ) ) { + return false; + } + if ( !t3_rt.getNode( "root node" ).getNodeData().getTaxonomy().getIdentifier().getValue().equals( "1" ) ) { + return false; + } + if ( !t3_rt.getNode( "root node" ).getNodeData().getTaxonomy().getIdentifier().getProvider() + .equals( "ncbi" ) ) { + return false; + } + if ( t3_rt.getNode( "node bc" ).getNodeData().getSequence().getDomainArchitecture().getTotalLength() != 124 ) { + return false; + } + if ( !t3_rt.getNode( "node bc" ).getNodeData().getSequence().getDomainArchitecture().getDomain( 0 ) + .getName().equals( "B" ) ) { + return false; + } + if ( t3_rt.getNode( "node bc" ).getNodeData().getSequence().getDomainArchitecture().getDomain( 0 ) + .getFrom() != 21 ) { + return false; + } + if ( t3_rt.getNode( "node bc" ).getNodeData().getSequence().getDomainArchitecture().getDomain( 0 ).getTo() != 44 ) { return false; } - if ( t2.getNode( "node a" ).getNodeData().getSequences().size() != 2 ) { + if ( t3_rt.getNode( "node bc" ).getNodeData().getSequence().getDomainArchitecture().getDomain( 0 ) + .getLength() != 24 ) { return false; } - if ( !t2.getNode( "node a" ).getNodeData().getSequence( 0 ).getMolecularSequence() - .startsWith( "actgtgggggt" ) ) { + if ( t3_rt.getNode( "node bc" ).getNodeData().getSequence().getDomainArchitecture().getDomain( 0 ) + .getConfidence() != 2144 ) { return false; } - if ( !t2.getNode( "node a" ).getNodeData().getSequence( 1 ).getMolecularSequence() - .startsWith( "ctgtgatgcat" ) ) { + if ( !t3_rt.getNode( "node bc" ).getNodeData().getSequence().getDomainArchitecture().getDomain( 0 ).getId() + .equals( "pfam" ) ) { return false; } - if ( t3.getNumberOfExternalNodes() != 4 ) { + if ( t3_rt.getNode( "node bb" ).getNodeData().getBinaryCharacters().getGainedCharacters().size() != 3 ) { return false; } - if ( !t1.getName().equals( "t1" ) ) { + if ( t3_rt.getNode( "node bb" ).getNodeData().getBinaryCharacters().getPresentCharacters().size() != 2 ) { return false; } - if ( !t2.getName().equals( "t2" ) ) { + if ( t3_rt.getNode( "node bb" ).getNodeData().getBinaryCharacters().getLostCharacters().size() != 1 ) { return false; } - if ( !t3.getName().equals( "t3" ) ) { + if ( !t3_rt.getNode( "node bb" ).getNodeData().getBinaryCharacters().getType().equals( "domains" ) ) { return false; } - if ( !t4.getName().equals( "t4" ) ) { + final Taxonomy taxbb = t3_rt.getNode( "node bb" ).getNodeData().getTaxonomy(); + if ( !taxbb.getAuthority().equals( "Stephenson, 1935" ) ) { return false; } - if ( !t3.getIdentifier().getValue().equals( "1-1" ) ) { + if ( !taxbb.getCommonName().equals( "starlet sea anemone" ) ) { return false; } - if ( !t3.getIdentifier().getProvider().equals( "treebank" ) ) { + if ( !taxbb.getIdentifier().getProvider().equals( "EOL" ) ) { return false; } - if ( !t3.getNode( "root node" ).getNodeData().getSequence().getType().equals( "protein" ) ) { + if ( !taxbb.getIdentifier().getValue().equals( "704294" ) ) { return false; } - if ( !t3.getNode( "root node" ).getNodeData().getSequence().getName() - .equals( "Apoptosis facilitator Bcl-2-like 14 protein" ) ) { + if ( !taxbb.getTaxonomyCode().equals( "NEMVE" ) ) { return false; } - if ( !t3.getNode( "root node" ).getNodeData().getSequence().getSymbol().equals( "BCL2L14" ) ) { + if ( !taxbb.getScientificName().equals( "Nematostella vectensis" ) ) { return false; } - if ( !t3.getNode( "root node" ).getNodeData().getSequence().getAccession().getValue().equals( "Q9BZR8" ) ) { + if ( taxbb.getSynonyms().size() != 2 ) { return false; } - if ( !t3.getNode( "root node" ).getNodeData().getSequence().getAccession().getSource().equals( "UniProtKB" ) ) { + if ( !taxbb.getSynonyms().contains( "Nematostella vectensis Stephenson1935" ) ) { return false; } - if ( !( t3.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getDesc() - .equals( "apoptosis" ) ) { + if ( !taxbb.getSynonyms().contains( "See Anemone" ) ) { return false; } - if ( !( t3.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getRef() - .equals( "GO:0006915" ) ) { + if ( !taxbb.getUri( 0 ).getDescription().equals( "EOL" ) ) { return false; } - if ( !( t3.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getSource() - .equals( "UniProtKB" ) ) { + if ( !taxbb.getUri( 0 ).getType().equals( "linkout" ) ) { return false; } - if ( !( t3.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getEvidence() - .equals( "experimental" ) ) { + if ( !taxbb.getUri( 0 ).getValue().toString().equals( "http://www.eol.org/pages/704294" ) ) { return false; } - if ( !( t3.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getType() - .equals( "function" ) ) { + if ( ( ( BinaryCharacters ) t3_rt.getNode( "node bb" ).getNodeData().getBinaryCharacters().copy() ) + .getLostCount() != BinaryCharacters.COUNT_DEFAULT ) { return false; } - if ( ( t3.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getConfidence() - .getValue() != 1 ) { + if ( t3_rt.getNode( "node b" ).getNodeData().getBinaryCharacters().getGainedCount() != 1 ) { return false; } - if ( !( t3.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getConfidence() - .getType().equals( "ml" ) ) { + if ( t3_rt.getNode( "node b" ).getNodeData().getBinaryCharacters().getGainedCharacters().size() != 1 ) { return false; } - if ( !( t3.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getDesc() - .equals( "apoptosis" ) ) { + if ( t3_rt.getNode( "node b" ).getNodeData().getBinaryCharacters().getLostCount() != 3 ) { return false; } - if ( ( t3.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getProperties() - .getProperty( "AFFY:expression" ).getAppliesTo() != AppliesTo.ANNOTATION ) { + if ( t3_rt.getNode( "node b" ).getNodeData().getBinaryCharacters().getLostCharacters().size() != 3 ) { return false; } - if ( !( t3.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getProperties() - .getProperty( "AFFY:expression" ).getDataType().equals( "xsd:double" ) ) { + if ( t3_rt.getNode( "node b" ).getNodeData().getBinaryCharacters().getPresentCount() != 2 ) { return false; } - if ( !( t3.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getProperties() - .getProperty( "AFFY:expression" ).getRef().equals( "AFFY:expression" ) ) { + if ( t3_rt.getNode( "node b" ).getNodeData().getBinaryCharacters().getPresentCharacters().size() != 2 ) { return false; } - if ( !( t3.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getProperties() - .getProperty( "AFFY:expression" ).getUnit().equals( "AFFY:x" ) ) { + if ( !t3_rt.getNode( "node b" ).getNodeData().getBinaryCharacters().getType().equals( "characters" ) ) { return false; } - if ( !( t3.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getProperties() - .getProperty( "AFFY:expression" ).getValue().equals( "0.2" ) ) { + if ( !t3_rt.getNode( "node ba" ).getNodeData().getDate().getDesc().equals( "Silurian" ) ) { return false; } - if ( !( t3.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getProperties() - .getProperty( "MED:disease" ).getValue().equals( "lymphoma" ) ) { + if ( !t3_rt.getNode( "node ba" ).getNodeData().getDate().getValue().toPlainString() + .equalsIgnoreCase( "435" ) ) { return false; } - if ( !( t3.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 1 ) ).getRef() - .equals( "GO:0005829" ) ) { + if ( !t3_rt.getNode( "node ba" ).getNodeData().getDate().getMin().toPlainString().equalsIgnoreCase( "416" ) ) { return false; } - if ( !( t3.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 0 ) ).getDesc() - .equals( "intracellular organelle" ) ) { + if ( !t3_rt.getNode( "node ba" ).getNodeData().getDate().getMax().toPlainString() + .equalsIgnoreCase( "443.7" ) ) { return false; } - if ( !( t3.getNode( "root node" ).getNodeData().getSequence().getUri( 0 ).getType().equals( "source" ) ) ) { + if ( !t3_rt.getNode( "node ba" ).getNodeData().getDate().getUnit().equals( "mya" ) ) { return false; } - if ( !( t3.getNode( "root node" ).getNodeData().getSequence().getUri( 0 ).getDescription() - .equals( "UniProt link" ) ) ) { + if ( !t3_rt.getNode( "node bb" ).getNodeData().getDate().getDesc().equals( "Triassic" ) ) { return false; } - if ( !( t3.getNode( "root node" ).getNodeData().getSequence().getLocation().equals( "12p13-p12" ) ) ) { + if ( !t3_rt.getNode( "node bc" ).getNodeData().getDate().getValue().toPlainString() + .equalsIgnoreCase( "433" ) ) { return false; } - final SortedSet x = t3.getNode( "root node" ).getNodeData().getSequence().getCrossReferences(); + final SortedSet x = t3_rt.getNode( "root node" ).getNodeData().getSequence() + .getCrossReferences(); if ( x.size() != 4 ) { return false; } @@ -1578,15 +2229,24 @@ public final class Test { return true; } - private static boolean testBasicPhyloXMLparsingRoundtrip() { + private static boolean testBasicPhyloXMLparsingValidating() { try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); - final PhyloXmlParser xml_parser = PhyloXmlParser.createPhyloXmlParser(); - if ( USE_LOCAL_PHYLOXML_SCHEMA ) { - xml_parser.setValidateAgainstSchema( PHYLOXML_LOCAL_XSD ); + PhyloXmlParser xml_parser = null; + try { + xml_parser = PhyloXmlParser.createPhyloXmlParserXsdValidating(); } - else { - xml_parser.setValidateAgainstSchema( PHYLOXML_REMOTE_XSD ); + catch ( final Exception e ) { + // Do nothing -- means were not running from jar. + } + if ( xml_parser == null ) { + xml_parser = PhyloXmlParser.createPhyloXmlParser(); + if ( USE_LOCAL_PHYLOXML_SCHEMA ) { + xml_parser.setValidateAgainstSchema( PHYLOXML_LOCAL_XSD ); + } + else { + xml_parser.setValidateAgainstSchema( PHYLOXML_REMOTE_XSD ); + } } final Phylogeny[] phylogenies_0 = factory.create( Test.PATH_TO_TEST_DATA + "phyloxml_test_t1.xml", xml_parser ); @@ -1597,323 +2257,518 @@ public final class Test { if ( phylogenies_0.length != 4 ) { return false; } - final StringBuffer t1_sb = new StringBuffer( phylogenies_0[ 0 ].toPhyloXML( 0 ) ); - final Phylogeny[] phylogenies_t1 = factory.create( t1_sb, xml_parser ); - if ( phylogenies_t1.length != 1 ) { + final Phylogeny t1 = phylogenies_0[ 0 ]; + final Phylogeny t2 = phylogenies_0[ 1 ]; + final Phylogeny t3 = phylogenies_0[ 2 ]; + final Phylogeny t4 = phylogenies_0[ 3 ]; + if ( !t1.getName().equals( "t1" ) ) { return false; } - final Phylogeny t1_rt = phylogenies_t1[ 0 ]; - if ( !t1_rt.getDistanceUnit().equals( "cc" ) ) { + if ( !t2.getName().equals( "t2" ) ) { return false; } - if ( !t1_rt.isRooted() ) { + if ( !t3.getName().equals( "t3" ) ) { return false; } - if ( t1_rt.isRerootable() ) { + if ( !t4.getName().equals( "t4" ) ) { return false; } - if ( !t1_rt.getType().equals( "gene_tree" ) ) { + if ( t1.getNumberOfExternalNodes() != 1 ) { return false; } - final StringBuffer t2_sb = new StringBuffer( phylogenies_0[ 1 ].toPhyloXML( 0 ) ); - final Phylogeny[] phylogenies_t2 = factory.create( t2_sb, xml_parser ); - final Phylogeny t2_rt = phylogenies_t2[ 0 ]; - if ( t2_rt.getNode( "node a" ).getNodeData().getTaxonomies().size() != 2 ) { + if ( t2.getNumberOfExternalNodes() != 2 ) { return false; } - if ( !t2_rt.getNode( "node a" ).getNodeData().getTaxonomy( 0 ).getCommonName().equals( "some parasite" ) ) { + if ( t3.getNumberOfExternalNodes() != 4 ) { return false; } - if ( !t2_rt.getNode( "node a" ).getNodeData().getTaxonomy( 1 ).getCommonName().equals( "the host" ) ) { + final String x2 = Test.PATH_TO_TEST_DATA + "phyloxml_test_t1.xml"; + final Phylogeny[] phylogenies_1 = factory.create( x2, xml_parser ); + if ( xml_parser.getErrorCount() > 0 ) { + System.out.println( "errors:" ); + System.out.println( xml_parser.getErrorMessages().toString() ); return false; } - if ( t2_rt.getNode( "node a" ).getNodeData().getSequences().size() != 2 ) { + if ( phylogenies_1.length != 4 ) { return false; } - if ( !t2_rt.getNode( "node a" ).getNodeData().getSequence( 0 ).getMolecularSequence() - .startsWith( "actgtgggggt" ) ) { + final Phylogeny[] phylogenies_2 = factory.create( Test.PATH_TO_TEST_DATA + "phyloxml_test_t3.xml", + xml_parser ); + if ( xml_parser.getErrorCount() > 0 ) { + System.out.println( "errors:" ); + System.out.println( xml_parser.getErrorMessages().toString() ); return false; } - if ( !t2_rt.getNode( "node a" ).getNodeData().getSequence( 1 ).getMolecularSequence() - .startsWith( "ctgtgatgcat" ) ) { + if ( phylogenies_2.length != 1 ) { return false; } - final StringBuffer t3_sb_0 = new StringBuffer( phylogenies_0[ 2 ].toPhyloXML( 0 ) ); - final Phylogeny[] phylogenies_1_0 = factory.create( t3_sb_0, xml_parser ); - final StringBuffer t3_sb = new StringBuffer( phylogenies_1_0[ 0 ].toPhyloXML( 0 ) ); - final Phylogeny[] phylogenies_1 = factory.create( t3_sb, xml_parser ); - if ( phylogenies_1.length != 1 ) { + if ( phylogenies_2[ 0 ].getNumberOfExternalNodes() != 2 ) { return false; } - final Phylogeny t3_rt = phylogenies_1[ 0 ]; - if ( !t3_rt.getName().equals( "t3" ) ) { + final Phylogeny[] phylogenies_3 = factory.create( Test.PATH_TO_TEST_DATA + "phyloxml_test_t4.xml", + xml_parser ); + if ( xml_parser.getErrorCount() > 0 ) { + System.out.println( xml_parser.getErrorMessages().toString() ); return false; } - if ( t3_rt.getNumberOfExternalNodes() != 4 ) { + if ( phylogenies_3.length != 2 ) { return false; } - if ( !t3_rt.getIdentifier().getValue().equals( "1-1" ) ) { + final Phylogeny a = phylogenies_3[ 0 ]; + if ( !a.getName().equals( "tree 4" ) ) { return false; } - if ( !t3_rt.getIdentifier().getProvider().equals( "treebank" ) ) { + if ( a.getNumberOfExternalNodes() != 3 ) { return false; } - if ( !t3_rt.getNode( "root node" ).getNodeData().getSequence().getType().equals( "protein" ) ) { + if ( !a.getNode( "node b1" ).getNodeData().getSequence().getName().equals( "b1 gene" ) ) { return false; } - if ( !t3_rt.getNode( "root node" ).getNodeData().getSequence().getName() - .equals( "Apoptosis facilitator Bcl-2-like 14 protein" ) ) { + if ( !a.getNode( "node b1" ).getNodeData().getTaxonomy().getCommonName().equals( "b1 species" ) ) { return false; } - if ( !t3_rt.getNode( "root node" ).getNodeData().getSequence().getSymbol().equals( "BCL2L14" ) ) { + final Phylogeny[] phylogenies_4 = factory.create( Test.PATH_TO_TEST_DATA + "special_characters.xml", + xml_parser ); + if ( xml_parser.getErrorCount() > 0 ) { + System.out.println( xml_parser.getErrorMessages().toString() ); + return false; + } + if ( phylogenies_4.length != 1 ) { + return false; + } + final Phylogeny s = phylogenies_4[ 0 ]; + if ( s.getNumberOfExternalNodes() != 6 ) { + return false; + } + s.getNode( "first" ); + s.getNode( "<>" ); + s.getNode( "\"\"" ); + s.getNode( "'''\"" ); + s.getNode( "\"\"\"" ); + s.getNode( "dick & doof" ); + } + catch ( final Exception e ) { + e.printStackTrace( System.out ); + return false; + } + return true; + } + + private static boolean testBasicProtein() { + try { + final BasicProtein p0 = new BasicProtein( "p0", "owl", 0 ); + final Domain a = new BasicDomain( "a", 1, 10, ( short ) 1, ( short ) 5, 0.1, -12 ); + final Domain b = new BasicDomain( "b", 11, 20, ( short ) 1, ( short ) 5, 0.1, -12 ); + final Domain c = new BasicDomain( "c", 9, 23, ( short ) 1, ( short ) 5, 0.1, -12 ); + final Domain d = new BasicDomain( "d", 15, 30, ( short ) 1, ( short ) 5, 0.1, -12 ); + final Domain e = new BasicDomain( "e", 60, 70, ( short ) 1, ( short ) 5, 0.1, -12 ); + final Domain x = new BasicDomain( "x", 100, 110, ( short ) 1, ( short ) 5, 0.1, -12 ); + final Domain y = new BasicDomain( "y", 100, 110, ( short ) 1, ( short ) 5, 0.1, -12 ); + p0.addProteinDomain( y ); + p0.addProteinDomain( e ); + p0.addProteinDomain( b ); + p0.addProteinDomain( c ); + p0.addProteinDomain( d ); + p0.addProteinDomain( a ); + p0.addProteinDomain( x ); + if ( !p0.toDomainArchitectureString( "~" ).equals( "a~b~c~d~e~x~y" ) ) { + return false; + } + if ( !p0.toDomainArchitectureString( "~", 3, "=" ).equals( "a~b~c~d~e~x~y" ) ) { + return false; + } + // + final BasicProtein aa0 = new BasicProtein( "aa", "owl", 0 ); + final Domain a1 = new BasicDomain( "a", 1, 10, ( short ) 1, ( short ) 5, 0.1, -12 ); + aa0.addProteinDomain( a1 ); + if ( !aa0.toDomainArchitectureString( "~" ).equals( "a" ) ) { + return false; + } + if ( !aa0.toDomainArchitectureString( "~", 3, "" ).equals( "a" ) ) { + return false; + } + // + final BasicProtein aa1 = new BasicProtein( "aa", "owl", 0 ); + final Domain a11 = new BasicDomain( "a", 1, 10, ( short ) 1, ( short ) 5, 0.1, -12 ); + final Domain a12 = new BasicDomain( "a", 2, 20, ( short ) 1, ( short ) 5, 0.1, -12 ); + aa1.addProteinDomain( a11 ); + aa1.addProteinDomain( a12 ); + if ( !aa1.toDomainArchitectureString( "~" ).equals( "a~a" ) ) { + return false; + } + if ( !aa1.toDomainArchitectureString( "~", 3, "" ).equals( "a~a" ) ) { return false; } - if ( !t3_rt.getNode( "root node" ).getNodeData().getSequence().getAccession().getValue().equals( "Q9BZR8" ) ) { + aa1.addProteinDomain( new BasicDomain( "a", 20, 30, ( short ) 1, ( short ) 5, 0.1, -12 ) ); + if ( !aa1.toDomainArchitectureString( "~" ).equals( "a~a~a" ) ) { return false; } - if ( !t3_rt.getNode( "root node" ).getNodeData().getSequence().getAccession().getSource() - .equals( "UniProtKB" ) ) { + if ( !aa1.toDomainArchitectureString( "~", 3, "" ).equals( "aaa" ) ) { return false; } - if ( !( t3_rt.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getDesc() - .equals( "apoptosis" ) ) { + if ( !aa1.toDomainArchitectureString( "~", 4, "" ).equals( "a~a~a" ) ) { return false; } - if ( !( t3_rt.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getRef() - .equals( "GO:0006915" ) ) { + aa1.addProteinDomain( new BasicDomain( "a", 30, 40, ( short ) 1, ( short ) 5, 0.1, -12 ) ); + if ( !aa1.toDomainArchitectureString( "~" ).equals( "a~a~a~a" ) ) { return false; } - if ( !( t3_rt.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getSource() - .equals( "UniProtKB" ) ) { + if ( !aa1.toDomainArchitectureString( "~", 3, "" ).equals( "aaa" ) ) { return false; } - if ( !( t3_rt.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getEvidence() - .equals( "experimental" ) ) { + if ( !aa1.toDomainArchitectureString( "~", 4, "" ).equals( "aaa" ) ) { return false; } - if ( !( t3_rt.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getType() - .equals( "function" ) ) { + if ( !aa1.toDomainArchitectureString( "~", 5, "" ).equals( "a~a~a~a" ) ) { return false; } - if ( ( t3_rt.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getConfidence() - .getValue() != 1 ) { + aa1.addProteinDomain( new BasicDomain( "b", 32, 40, ( short ) 1, ( short ) 5, 0.1, -12 ) ); + if ( !aa1.toDomainArchitectureString( "~" ).equals( "a~a~a~a~b" ) ) { return false; } - if ( !( t3_rt.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getConfidence() - .getType().equals( "ml" ) ) { + if ( !aa1.toDomainArchitectureString( "~", 3, "" ).equals( "aaa~b" ) ) { return false; } - if ( !( t3_rt.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getDesc() - .equals( "apoptosis" ) ) { + if ( !aa1.toDomainArchitectureString( "~", 4, "" ).equals( "aaa~b" ) ) { return false; } - if ( ( t3_rt.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getProperties() - .getProperty( "AFFY:expression" ).getAppliesTo() != AppliesTo.ANNOTATION ) { + if ( !aa1.toDomainArchitectureString( "~", 5, "" ).equals( "a~a~a~a~b" ) ) { return false; } - if ( !( t3_rt.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getProperties() - .getProperty( "AFFY:expression" ).getDataType().equals( "xsd:double" ) ) { + aa1.addProteinDomain( new BasicDomain( "c", 1, 2, ( short ) 1, ( short ) 5, 0.1, -12 ) ); + if ( !aa1.toDomainArchitectureString( "~" ).equals( "c~a~a~a~a~b" ) ) { return false; } - if ( !( t3_rt.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getProperties() - .getProperty( "AFFY:expression" ).getRef().equals( "AFFY:expression" ) ) { + if ( !aa1.toDomainArchitectureString( "~", 3, "" ).equals( "c~aaa~b" ) ) { return false; } - if ( !( t3_rt.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getProperties() - .getProperty( "AFFY:expression" ).getUnit().equals( "AFFY:x" ) ) { + if ( !aa1.toDomainArchitectureString( "~", 4, "" ).equals( "c~aaa~b" ) ) { return false; } - if ( !( t3_rt.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getProperties() - .getProperty( "AFFY:expression" ).getValue().equals( "0.2" ) ) { + if ( !aa1.toDomainArchitectureString( "~", 5, "" ).equals( "c~a~a~a~a~b" ) ) { return false; } - if ( !( t3_rt.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getProperties() - .getProperty( "MED:disease" ).getValue().equals( "lymphoma" ) ) { + // + final BasicProtein p00 = new BasicProtein( "p0", "owl", 0 ); + final Domain a0 = new BasicDomain( "a", 1, 10, ( short ) 1, ( short ) 5, 0.1, -12 ); + final Domain b0 = new BasicDomain( "b", 11, 20, ( short ) 1, ( short ) 5, 0.1, -12 ); + final Domain c0 = new BasicDomain( "c", 9, 23, ( short ) 1, ( short ) 5, 0.1, -12 ); + final Domain d0 = new BasicDomain( "d", 15, 30, ( short ) 1, ( short ) 5, 0.1, -12 ); + final Domain e0 = new BasicDomain( "e", 60, 70, ( short ) 1, ( short ) 5, 0.1, -12 ); + final Domain e1 = new BasicDomain( "e", 61, 71, ( short ) 1, ( short ) 5, 0.1, -12 ); + final Domain e2 = new BasicDomain( "e", 62, 72, ( short ) 1, ( short ) 5, 0.1, -12 ); + final Domain e3 = new BasicDomain( "e", 63, 73, ( short ) 1, ( short ) 5, 0.1, -12 ); + final Domain e4 = new BasicDomain( "e", 64, 74, ( short ) 1, ( short ) 5, 0.1, -12 ); + final Domain e5 = new BasicDomain( "e", 65, 75, ( short ) 1, ( short ) 5, 0.1, -12 ); + final Domain x0 = new BasicDomain( "x", 100, 110, ( short ) 1, ( short ) 5, 0.1, -12 ); + final Domain y0 = new BasicDomain( "y", 100, 110, ( short ) 1, ( short ) 5, 0.1, -12 ); + final Domain y1 = new BasicDomain( "y", 120, 130, ( short ) 1, ( short ) 5, 0.1, -12 ); + final Domain y2 = new BasicDomain( "y", 140, 150, ( short ) 1, ( short ) 5, 0.1, -12 ); + final Domain y3 = new BasicDomain( "y", 160, 170, ( short ) 1, ( short ) 5, 0.1, -12 ); + final Domain z0 = new BasicDomain( "z", 200, 210, ( short ) 1, ( short ) 5, 0.1, -12 ); + final Domain z1 = new BasicDomain( "z", 300, 310, ( short ) 1, ( short ) 5, 0.1, -12 ); + final Domain z2 = new BasicDomain( "z", 400, 410, ( short ) 1, ( short ) 5, 0.1, -12 ); + final Domain zz0 = new BasicDomain( "Z", 500, 510, ( short ) 1, ( short ) 5, 0.1, -12 ); + final Domain zz1 = new BasicDomain( "Z", 600, 610, ( short ) 1, ( short ) 5, 0.1, -12 ); + p00.addProteinDomain( y0 ); + p00.addProteinDomain( e0 ); + p00.addProteinDomain( b0 ); + p00.addProteinDomain( c0 ); + p00.addProteinDomain( d0 ); + p00.addProteinDomain( a0 ); + p00.addProteinDomain( x0 ); + p00.addProteinDomain( y1 ); + p00.addProteinDomain( y2 ); + p00.addProteinDomain( y3 ); + p00.addProteinDomain( e1 ); + p00.addProteinDomain( e2 ); + p00.addProteinDomain( e3 ); + p00.addProteinDomain( e4 ); + p00.addProteinDomain( e5 ); + p00.addProteinDomain( z0 ); + p00.addProteinDomain( z1 ); + p00.addProteinDomain( z2 ); + p00.addProteinDomain( zz0 ); + p00.addProteinDomain( zz1 ); + if ( !p00.toDomainArchitectureString( "~", 3, "" ).equals( "a~b~c~d~eee~x~yyy~zzz~Z~Z" ) ) { return false; } - if ( !( t3_rt.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 1 ) ).getRef() - .equals( "GO:0005829" ) ) { + if ( !p00.toDomainArchitectureString( "~", 4, "" ).equals( "a~b~c~d~eee~x~yyy~z~z~z~Z~Z" ) ) { return false; } - if ( !( t3_rt.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 0 ) ).getDesc() - .equals( "intracellular organelle" ) ) { + if ( !p00.toDomainArchitectureString( "~", 5, "" ).equals( "a~b~c~d~eee~x~y~y~y~y~z~z~z~Z~Z" ) ) { return false; } - if ( !( t3_rt.getNode( "root node" ).getNodeData().getSequence().getUri( 0 ).getType().equals( "source" ) ) ) { + if ( !p00.toDomainArchitectureString( "~", 6, "" ).equals( "a~b~c~d~eee~x~y~y~y~y~z~z~z~Z~Z" ) ) { return false; } - if ( !( t3_rt.getNode( "root node" ).getNodeData().getSequence().getUri( 0 ).getDescription() - .equals( "UniProt link" ) ) ) { + if ( !p00.toDomainArchitectureString( "~", 7, "" ).equals( "a~b~c~d~e~e~e~e~e~e~x~y~y~y~y~z~z~z~Z~Z" ) ) { return false; } - if ( !( t3_rt.getNode( "root node" ).getNodeData().getSequence().getLocation().equals( "12p13-p12" ) ) ) { + // A0 A10 B15 A20 B25 A30 B35 B40 C50 A60 C70 D80 + final Domain A0 = new BasicDomain( "A", 0, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); + final Domain A10 = new BasicDomain( "A", 10, 11, ( short ) 1, ( short ) 4, 0.1, -12 ); + final Domain B15 = new BasicDomain( "B", 11, 16, ( short ) 1, ( short ) 4, 0.1, -12 ); + final Domain A20 = new BasicDomain( "A", 20, 100, ( short ) 1, ( short ) 4, 0.1, -12 ); + final Domain B25 = new BasicDomain( "B", 25, 26, ( short ) 1, ( short ) 4, 0.1, -12 ); + final Domain A30 = new BasicDomain( "A", 30, 31, ( short ) 1, ( short ) 4, 0.1, -12 ); + final Domain B35 = new BasicDomain( "B", 31, 40, ( short ) 1, ( short ) 4, 0.1, -12 ); + final Domain B40 = new BasicDomain( "B", 40, 600, ( short ) 1, ( short ) 4, 0.1, -12 ); + final Domain C50 = new BasicDomain( "C", 50, 59, ( short ) 1, ( short ) 4, 0.1, -12 ); + final Domain A60 = new BasicDomain( "A", 60, 395, ( short ) 1, ( short ) 4, 0.1, -12 ); + final Domain C70 = new BasicDomain( "C", 70, 71, ( short ) 1, ( short ) 4, 0.1, -12 ); + final Domain D80 = new BasicDomain( "D", 80, 81, ( short ) 1, ( short ) 4, 0.1, -12 ); + final BasicProtein p = new BasicProtein( "p", "owl", 0 ); + p.addProteinDomain( B15 ); + p.addProteinDomain( C50 ); + p.addProteinDomain( A60 ); + p.addProteinDomain( A30 ); + p.addProteinDomain( C70 ); + p.addProteinDomain( B35 ); + p.addProteinDomain( B40 ); + p.addProteinDomain( A0 ); + p.addProteinDomain( A10 ); + p.addProteinDomain( A20 ); + p.addProteinDomain( B25 ); + p.addProteinDomain( D80 ); + List domains_ids = new ArrayList(); + domains_ids.add( "A" ); + domains_ids.add( "B" ); + domains_ids.add( "C" ); + if ( !p.contains( domains_ids, false ) ) { return false; } - if ( !( t3_rt.getNode( "root node" ).getNodeData().getReference().getDoi().equals( "10.1038/387489a0" ) ) ) { + if ( !p.contains( domains_ids, true ) ) { return false; } - if ( !( t3_rt.getNode( "root node" ).getNodeData().getReference().getDescription() - .equals( "Aguinaldo, A. M. A.; J. M. Turbeville, L. S. Linford, M. C. Rivera, J. R. Garey, R. A. Raff, & J. A. Lake (1997). \"Evidence for a clade of nematodes, arthropods and other moulting animals\". Nature 387 (6632): 489–493." ) ) ) { + domains_ids.add( "X" ); + if ( p.contains( domains_ids, false ) ) { return false; } - if ( !t3_rt.getNode( "root node" ).getNodeData().getTaxonomy().getTaxonomyCode().equals( "ECDYS" ) ) { + if ( p.contains( domains_ids, true ) ) { return false; } - if ( !t3_rt.getNode( "root node" ).getNodeData().getTaxonomy().getScientificName().equals( "ecdysozoa" ) ) { + domains_ids = new ArrayList(); + domains_ids.add( "A" ); + domains_ids.add( "C" ); + domains_ids.add( "D" ); + if ( !p.contains( domains_ids, false ) ) { return false; } - if ( !t3_rt.getNode( "root node" ).getNodeData().getTaxonomy().getCommonName().equals( "molting animals" ) ) { + if ( !p.contains( domains_ids, true ) ) { return false; } - if ( !t3_rt.getNode( "root node" ).getNodeData().getTaxonomy().getIdentifier().getValue().equals( "1" ) ) { + domains_ids = new ArrayList(); + domains_ids.add( "A" ); + domains_ids.add( "D" ); + domains_ids.add( "C" ); + if ( !p.contains( domains_ids, false ) ) { return false; } - if ( !t3_rt.getNode( "root node" ).getNodeData().getTaxonomy().getIdentifier().getProvider() - .equals( "ncbi" ) ) { + if ( p.contains( domains_ids, true ) ) { return false; } - if ( t3_rt.getNode( "node bc" ).getNodeData().getSequence().getDomainArchitecture().getTotalLength() != 124 ) { + domains_ids = new ArrayList(); + domains_ids.add( "A" ); + domains_ids.add( "A" ); + domains_ids.add( "B" ); + if ( !p.contains( domains_ids, false ) ) { return false; } - if ( !t3_rt.getNode( "node bc" ).getNodeData().getSequence().getDomainArchitecture().getDomain( 0 ) - .getName().equals( "B" ) ) { + if ( !p.contains( domains_ids, true ) ) { return false; } - if ( t3_rt.getNode( "node bc" ).getNodeData().getSequence().getDomainArchitecture().getDomain( 0 ) - .getFrom() != 21 ) { + domains_ids = new ArrayList(); + domains_ids.add( "A" ); + domains_ids.add( "A" ); + domains_ids.add( "A" ); + domains_ids.add( "B" ); + domains_ids.add( "B" ); + if ( !p.contains( domains_ids, false ) ) { return false; } - if ( t3_rt.getNode( "node bc" ).getNodeData().getSequence().getDomainArchitecture().getDomain( 0 ).getTo() != 44 ) { + if ( !p.contains( domains_ids, true ) ) { return false; } - if ( t3_rt.getNode( "node bc" ).getNodeData().getSequence().getDomainArchitecture().getDomain( 0 ) - .getLength() != 24 ) { + domains_ids = new ArrayList(); + domains_ids.add( "A" ); + domains_ids.add( "A" ); + domains_ids.add( "B" ); + domains_ids.add( "A" ); + domains_ids.add( "B" ); + domains_ids.add( "B" ); + domains_ids.add( "A" ); + domains_ids.add( "B" ); + domains_ids.add( "C" ); + domains_ids.add( "A" ); + domains_ids.add( "C" ); + domains_ids.add( "D" ); + if ( !p.contains( domains_ids, false ) ) { return false; } - if ( t3_rt.getNode( "node bc" ).getNodeData().getSequence().getDomainArchitecture().getDomain( 0 ) - .getConfidence() != 2144 ) { + if ( p.contains( domains_ids, true ) ) { return false; } - if ( !t3_rt.getNode( "node bc" ).getNodeData().getSequence().getDomainArchitecture().getDomain( 0 ).getId() - .equals( "pfam" ) ) { + } + catch ( final Exception e ) { + e.printStackTrace( System.out ); + return false; + } + return true; + } + + private static boolean testBasicTable() { + try { + final BasicTable t0 = new BasicTable(); + if ( t0.getNumberOfColumns() != 0 ) { return false; } - if ( t3_rt.getNode( "node bb" ).getNodeData().getBinaryCharacters().getGainedCharacters().size() != 3 ) { + if ( t0.getNumberOfRows() != 0 ) { return false; } - if ( t3_rt.getNode( "node bb" ).getNodeData().getBinaryCharacters().getPresentCharacters().size() != 2 ) { + t0.setValue( 3, 2, "23" ); + t0.setValue( 10, 1, "error" ); + t0.setValue( 10, 1, "110" ); + t0.setValue( 9, 1, "19" ); + t0.setValue( 1, 10, "101" ); + t0.setValue( 10, 10, "1010" ); + t0.setValue( 100, 10, "10100" ); + t0.setValue( 0, 0, "00" ); + if ( !t0.getValue( 3, 2 ).equals( "23" ) ) { return false; } - if ( t3_rt.getNode( "node bb" ).getNodeData().getBinaryCharacters().getLostCharacters().size() != 1 ) { + if ( !t0.getValue( 10, 1 ).equals( "110" ) ) { return false; } - if ( !t3_rt.getNode( "node bb" ).getNodeData().getBinaryCharacters().getType().equals( "domains" ) ) { + if ( !t0.getValueAsString( 1, 10 ).equals( "101" ) ) { return false; } - final Taxonomy taxbb = t3_rt.getNode( "node bb" ).getNodeData().getTaxonomy(); - if ( !taxbb.getAuthority().equals( "Stephenson, 1935" ) ) { + if ( !t0.getValueAsString( 10, 10 ).equals( "1010" ) ) { return false; } - if ( !taxbb.getCommonName().equals( "starlet sea anemone" ) ) { + if ( !t0.getValueAsString( 100, 10 ).equals( "10100" ) ) { return false; } - if ( !taxbb.getIdentifier().getProvider().equals( "EOL" ) ) { + if ( !t0.getValueAsString( 9, 1 ).equals( "19" ) ) { return false; } - if ( !taxbb.getIdentifier().getValue().equals( "704294" ) ) { + if ( !t0.getValueAsString( 0, 0 ).equals( "00" ) ) { return false; } - if ( !taxbb.getTaxonomyCode().equals( "NEMVE" ) ) { + if ( t0.getNumberOfColumns() != 101 ) { return false; } - if ( !taxbb.getScientificName().equals( "Nematostella vectensis" ) ) { + if ( t0.getNumberOfRows() != 11 ) { return false; } - if ( taxbb.getSynonyms().size() != 2 ) { + if ( t0.getValueAsString( 49, 4 ) != null ) { return false; } - if ( !taxbb.getSynonyms().contains( "Nematostella vectensis Stephenson1935" ) ) { + final String l = ForesterUtil.getLineSeparator(); + final StringBuffer source = new StringBuffer(); + source.append( "" + l ); + source.append( "# 1 1 1 1 1 1 1 1" + l ); + source.append( " 00 01 02 03" + l ); + source.append( " 10 11 12 13 " + l ); + source.append( "20 21 22 23 " + l ); + source.append( " 30 31 32 33" + l ); + source.append( "40 41 42 43" + l ); + source.append( " # 1 1 1 1 1 " + l ); + source.append( "50 51 52 53 54" + l ); + final BasicTable t1 = BasicTableParser.parse( source.toString(), ' ' ); + if ( t1.getNumberOfColumns() != 5 ) { return false; } - if ( !taxbb.getSynonyms().contains( "See Anemone" ) ) { + if ( t1.getNumberOfRows() != 6 ) { return false; } - if ( !taxbb.getUri( 0 ).getDescription().equals( "EOL" ) ) { + if ( !t1.getValueAsString( 0, 0 ).equals( "00" ) ) { return false; } - if ( !taxbb.getUri( 0 ).getType().equals( "linkout" ) ) { + if ( !t1.getValueAsString( 1, 0 ).equals( "01" ) ) { return false; } - if ( !taxbb.getUri( 0 ).getValue().toString().equals( "http://www.eol.org/pages/704294" ) ) { + if ( !t1.getValueAsString( 3, 0 ).equals( "03" ) ) { return false; } - if ( ( ( BinaryCharacters ) t3_rt.getNode( "node bb" ).getNodeData().getBinaryCharacters().copy() ) - .getLostCount() != BinaryCharacters.COUNT_DEFAULT ) { + if ( !t1.getValueAsString( 4, 5 ).equals( "54" ) ) { return false; } - if ( t3_rt.getNode( "node b" ).getNodeData().getBinaryCharacters().getGainedCount() != 1 ) { + final StringBuffer source1 = new StringBuffer(); + source1.append( "" + l ); + source1.append( "# 1; 1; 1; 1 ;1 ;1; 1 ;1;" + l ); + source1.append( " 00; 01 ;02;03" + l ); + source1.append( " 10; 11; 12; 13 " + l ); + source1.append( "20; 21; 22; 23 " + l ); + source1.append( " 30; 31; 32; 33" + l ); + source1.append( "40;41;42;43" + l ); + source1.append( " # 1 1 1 1 1 " + l ); + source1.append( ";;;50 ; ;52; 53;;54 " + l ); + final BasicTable t2 = BasicTableParser.parse( source1.toString(), ';' ); + if ( t2.getNumberOfColumns() != 5 ) { return false; } - if ( t3_rt.getNode( "node b" ).getNodeData().getBinaryCharacters().getGainedCharacters().size() != 1 ) { + if ( t2.getNumberOfRows() != 6 ) { return false; } - if ( t3_rt.getNode( "node b" ).getNodeData().getBinaryCharacters().getLostCount() != 3 ) { + if ( !t2.getValueAsString( 0, 0 ).equals( "00" ) ) { return false; } - if ( t3_rt.getNode( "node b" ).getNodeData().getBinaryCharacters().getLostCharacters().size() != 3 ) { + if ( !t2.getValueAsString( 1, 0 ).equals( "01" ) ) { return false; } - if ( t3_rt.getNode( "node b" ).getNodeData().getBinaryCharacters().getPresentCount() != 2 ) { + if ( !t2.getValueAsString( 3, 0 ).equals( "03" ) ) { return false; } - if ( t3_rt.getNode( "node b" ).getNodeData().getBinaryCharacters().getPresentCharacters().size() != 2 ) { + if ( !t2.getValueAsString( 3, 3 ).equals( "33" ) ) { return false; } - if ( !t3_rt.getNode( "node b" ).getNodeData().getBinaryCharacters().getType().equals( "characters" ) ) { + if ( !t2.getValueAsString( 3, 5 ).equals( "53" ) ) { return false; } - // - if ( !t3_rt.getNode( "node ba" ).getNodeData().getDate().getDesc().equals( "Silurian" ) ) { + if ( !t2.getValueAsString( 1, 5 ).equals( "" ) ) { return false; } - if ( !t3_rt.getNode( "node ba" ).getNodeData().getDate().getValue().toPlainString() - .equalsIgnoreCase( "435" ) ) { + final StringBuffer source2 = new StringBuffer(); + source2.append( "" + l ); + source2.append( "comment: 1; 1; 1; 1 ;1 ;1; 1 ;1;" + l ); + source2.append( " 00; 01 ;02;03" + l ); + source2.append( " 10; 11; 12; 13 " + l ); + source2.append( "20; 21; 22; 23 " + l ); + source2.append( " " + l ); + source2.append( " 30; 31; 32; 33" + l ); + source2.append( "40;41;42;43" + l ); + source2.append( " comment: 1 1 1 1 1 " + l ); + source2.append( ";;;50 ; 52; 53;;54 " + l ); + final List> tl = BasicTableParser.parse( source2.toString(), + ';', + false, + false, + "comment:", + false ); + if ( tl.size() != 2 ) { return false; } - if ( !t3_rt.getNode( "node ba" ).getNodeData().getDate().getMin().toPlainString().equalsIgnoreCase( "416" ) ) { + final BasicTable t3 = tl.get( 0 ); + final BasicTable t4 = tl.get( 1 ); + if ( t3.getNumberOfColumns() != 4 ) { return false; } - if ( !t3_rt.getNode( "node ba" ).getNodeData().getDate().getMax().toPlainString() - .equalsIgnoreCase( "443.7" ) ) { + if ( t3.getNumberOfRows() != 3 ) { return false; } - if ( !t3_rt.getNode( "node ba" ).getNodeData().getDate().getUnit().equals( "mya" ) ) { + if ( t4.getNumberOfColumns() != 4 ) { return false; } - if ( !t3_rt.getNode( "node bb" ).getNodeData().getDate().getDesc().equals( "Triassic" ) ) { + if ( t4.getNumberOfRows() != 3 ) { return false; } - if ( !t3_rt.getNode( "node bc" ).getNodeData().getDate().getValue().toPlainString() - .equalsIgnoreCase( "433" ) ) { + if ( !t3.getValueAsString( 0, 0 ).equals( "00" ) ) { return false; } - final SortedSet x = t3_rt.getNode( "root node" ).getNodeData().getSequence() - .getCrossReferences(); - if ( x.size() != 4 ) { + if ( !t4.getValueAsString( 0, 0 ).equals( "30" ) ) { return false; } - int c = 0; - for( final Accession acc : x ) { - if ( c == 0 ) { - if ( !acc.getSource().equals( "KEGG" ) ) { - return false; - } - if ( !acc.getValue().equals( "hsa:596" ) ) { - return false; - } - } - c++; - } } catch ( final Exception e ) { e.printStackTrace( System.out ); @@ -1922,384 +2777,213 @@ public final class Test { return true; } - private static boolean testBasicPhyloXMLparsingValidating() { + private static boolean testBasicTolXMLparsing() { try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); - PhyloXmlParser xml_parser = null; - try { - xml_parser = PhyloXmlParser.createPhyloXmlParserXsdValidating(); - } - catch ( final Exception e ) { - // Do nothing -- means were not running from jar. - } - if ( xml_parser == null ) { - xml_parser = PhyloXmlParser.createPhyloXmlParser(); - if ( USE_LOCAL_PHYLOXML_SCHEMA ) { - xml_parser.setValidateAgainstSchema( PHYLOXML_LOCAL_XSD ); - } - else { - xml_parser.setValidateAgainstSchema( PHYLOXML_REMOTE_XSD ); - } - } - final Phylogeny[] phylogenies_0 = factory.create( Test.PATH_TO_TEST_DATA + "phyloxml_test_t1.xml", - xml_parser ); - if ( xml_parser.getErrorCount() > 0 ) { - System.out.println( xml_parser.getErrorMessages().toString() ); + final TolParser parser = new TolParser(); + final Phylogeny[] phylogenies_0 = factory.create( Test.PATH_TO_TEST_DATA + "tol_2484.tol", parser ); + if ( parser.getErrorCount() > 0 ) { + System.out.println( parser.getErrorMessages().toString() ); return false; } - if ( phylogenies_0.length != 4 ) { + if ( phylogenies_0.length != 1 ) { return false; } final Phylogeny t1 = phylogenies_0[ 0 ]; - final Phylogeny t2 = phylogenies_0[ 1 ]; - final Phylogeny t3 = phylogenies_0[ 2 ]; - final Phylogeny t4 = phylogenies_0[ 3 ]; - if ( !t1.getName().equals( "t1" ) ) { - return false; - } - if ( !t2.getName().equals( "t2" ) ) { - return false; - } - if ( !t3.getName().equals( "t3" ) ) { - return false; - } - if ( !t4.getName().equals( "t4" ) ) { - return false; - } - if ( t1.getNumberOfExternalNodes() != 1 ) { - return false; - } - if ( t2.getNumberOfExternalNodes() != 2 ) { - return false; - } - if ( t3.getNumberOfExternalNodes() != 4 ) { - return false; - } - final String x2 = Test.PATH_TO_TEST_DATA + "phyloxml_test_t1.xml"; - final Phylogeny[] phylogenies_1 = factory.create( x2, xml_parser ); - if ( xml_parser.getErrorCount() > 0 ) { - System.out.println( "errors:" ); - System.out.println( xml_parser.getErrorMessages().toString() ); - return false; - } - if ( phylogenies_1.length != 4 ) { - return false; - } - final Phylogeny[] phylogenies_2 = factory.create( Test.PATH_TO_TEST_DATA + "phyloxml_test_t3.xml", - xml_parser ); - if ( xml_parser.getErrorCount() > 0 ) { - System.out.println( "errors:" ); - System.out.println( xml_parser.getErrorMessages().toString() ); + if ( t1.getNumberOfExternalNodes() != 5 ) { return false; } - if ( phylogenies_2.length != 1 ) { + if ( !t1.isRooted() ) { return false; } - if ( phylogenies_2[ 0 ].getNumberOfExternalNodes() != 2 ) { + if ( !t1.getRoot().getNodeData().getTaxonomy().getScientificName().equals( "Mesozoa" ) ) { return false; } - final Phylogeny[] phylogenies_3 = factory.create( Test.PATH_TO_TEST_DATA + "phyloxml_test_t4.xml", - xml_parser ); - if ( xml_parser.getErrorCount() > 0 ) { - System.out.println( xml_parser.getErrorMessages().toString() ); + if ( !t1.getRoot().getNodeData().getTaxonomy().getIdentifier().getValue().equals( "2484" ) ) { return false; } - if ( phylogenies_3.length != 2 ) { + if ( !t1.getRoot().getChildNode( 0 ).getNodeData().getTaxonomy().getScientificName().equals( "Rhombozoa" ) ) { return false; } - final Phylogeny a = phylogenies_3[ 0 ]; - if ( !a.getName().equals( "tree 4" ) ) { + if ( t1.getRoot().getChildNode( 0 ).getNumberOfDescendants() != 3 ) { return false; } - if ( a.getNumberOfExternalNodes() != 3 ) { + final Phylogeny[] phylogenies_1 = factory.create( Test.PATH_TO_TEST_DATA + "tol_2.tol", parser ); + if ( parser.getErrorCount() > 0 ) { + System.out.println( parser.getErrorMessages().toString() ); return false; } - if ( !a.getNode( "node b1" ).getNodeData().getSequence().getName().equals( "b1 gene" ) ) { + if ( phylogenies_1.length != 1 ) { return false; } - if ( !a.getNode( "node b1" ).getNodeData().getTaxonomy().getCommonName().equals( "b1 species" ) ) { + final Phylogeny t2 = phylogenies_1[ 0 ]; + if ( t2.getNumberOfExternalNodes() != 664 ) { return false; } - final Phylogeny[] phylogenies_4 = factory.create( Test.PATH_TO_TEST_DATA + "special_characters.xml", - xml_parser ); - if ( xml_parser.getErrorCount() > 0 ) { - System.out.println( xml_parser.getErrorMessages().toString() ); + if ( !t2.isRooted() ) { return false; } - if ( phylogenies_4.length != 1 ) { + if ( !t2.getRoot().getNodeData().getTaxonomy().getScientificName().equals( "Eubacteria" ) ) { return false; } - final Phylogeny s = phylogenies_4[ 0 ]; - if ( s.getNumberOfExternalNodes() != 6 ) { + if ( !t2.getRoot().getNodeData().getTaxonomy().getIdentifier().getValue().equals( "2" ) ) { return false; } - s.getNode( "first" ); - s.getNode( "<>" ); - s.getNode( "\"\"" ); - s.getNode( "'''\"" ); - s.getNode( "\"\"\"" ); - s.getNode( "dick & doof" ); - } - catch ( final Exception e ) { - e.printStackTrace( System.out ); - return false; - } - return true; - } - - private static boolean testBasicProtein() { - try { - final BasicProtein p0 = new BasicProtein( "p0", "owl", 0 ); - final Domain a = new BasicDomain( "a", 1, 10, ( short ) 1, ( short ) 5, 0.1, -12 ); - final Domain b = new BasicDomain( "b", 11, 20, ( short ) 1, ( short ) 5, 0.1, -12 ); - final Domain c = new BasicDomain( "c", 9, 23, ( short ) 1, ( short ) 5, 0.1, -12 ); - final Domain d = new BasicDomain( "d", 15, 30, ( short ) 1, ( short ) 5, 0.1, -12 ); - final Domain e = new BasicDomain( "e", 60, 70, ( short ) 1, ( short ) 5, 0.1, -12 ); - final Domain x = new BasicDomain( "x", 100, 110, ( short ) 1, ( short ) 5, 0.1, -12 ); - final Domain y = new BasicDomain( "y", 100, 110, ( short ) 1, ( short ) 5, 0.1, -12 ); - p0.addProteinDomain( y ); - p0.addProteinDomain( e ); - p0.addProteinDomain( b ); - p0.addProteinDomain( c ); - p0.addProteinDomain( d ); - p0.addProteinDomain( a ); - p0.addProteinDomain( x ); - if ( !p0.toDomainArchitectureString( "~" ).equals( "a~b~c~d~e~x~y" ) ) { + if ( t2.getRoot().getNumberOfDescendants() != 24 ) { return false; } - if ( !p0.toDomainArchitectureString( "~", 3, "=" ).equals( "a~b~c~d~e~x~y" ) ) { + if ( t2.getRoot().getNumberOfDescendants() != 24 ) { return false; } - // - final BasicProtein aa0 = new BasicProtein( "aa", "owl", 0 ); - final Domain a1 = new BasicDomain( "a", 1, 10, ( short ) 1, ( short ) 5, 0.1, -12 ); - aa0.addProteinDomain( a1 ); - if ( !aa0.toDomainArchitectureString( "~" ).equals( "a" ) ) { + if ( !t2.getRoot().getChildNode( 0 ).getNodeData().getTaxonomy().getScientificName().equals( "Aquificae" ) ) { return false; } - if ( !aa0.toDomainArchitectureString( "~", 3, "" ).equals( "a" ) ) { + if ( !t2.getRoot().getChildNode( 0 ).getChildNode( 0 ).getNodeData().getTaxonomy().getScientificName() + .equals( "Aquifex" ) ) { return false; } - // - final BasicProtein aa1 = new BasicProtein( "aa", "owl", 0 ); - final Domain a11 = new BasicDomain( "a", 1, 10, ( short ) 1, ( short ) 5, 0.1, -12 ); - final Domain a12 = new BasicDomain( "a", 2, 20, ( short ) 1, ( short ) 5, 0.1, -12 ); - aa1.addProteinDomain( a11 ); - aa1.addProteinDomain( a12 ); - if ( !aa1.toDomainArchitectureString( "~" ).equals( "a~a" ) ) { + final Phylogeny[] phylogenies_2 = factory.create( Test.PATH_TO_TEST_DATA + "tol_5.tol", parser ); + if ( parser.getErrorCount() > 0 ) { + System.out.println( parser.getErrorMessages().toString() ); return false; } - if ( !aa1.toDomainArchitectureString( "~", 3, "" ).equals( "a~a" ) ) { + if ( phylogenies_2.length != 1 ) { return false; } - aa1.addProteinDomain( new BasicDomain( "a", 20, 30, ( short ) 1, ( short ) 5, 0.1, -12 ) ); - if ( !aa1.toDomainArchitectureString( "~" ).equals( "a~a~a" ) ) { + final Phylogeny t3 = phylogenies_2[ 0 ]; + if ( t3.getNumberOfExternalNodes() != 184 ) { return false; } - if ( !aa1.toDomainArchitectureString( "~", 3, "" ).equals( "aaa" ) ) { + if ( !t3.getRoot().getNodeData().getTaxonomy().getScientificName().equals( "Viruses" ) ) { return false; } - if ( !aa1.toDomainArchitectureString( "~", 4, "" ).equals( "a~a~a" ) ) { + if ( !t3.getRoot().getNodeData().getTaxonomy().getIdentifier().getValue().equals( "5" ) ) { return false; } - aa1.addProteinDomain( new BasicDomain( "a", 30, 40, ( short ) 1, ( short ) 5, 0.1, -12 ) ); - if ( !aa1.toDomainArchitectureString( "~" ).equals( "a~a~a~a" ) ) { + if ( t3.getRoot().getNumberOfDescendants() != 6 ) { return false; } - if ( !aa1.toDomainArchitectureString( "~", 3, "" ).equals( "aaa" ) ) { + final Phylogeny[] phylogenies_3 = factory.create( Test.PATH_TO_TEST_DATA + "tol_4567.tol", parser ); + if ( parser.getErrorCount() > 0 ) { + System.out.println( parser.getErrorMessages().toString() ); return false; } - if ( !aa1.toDomainArchitectureString( "~", 4, "" ).equals( "aaa" ) ) { + if ( phylogenies_3.length != 1 ) { return false; } - if ( !aa1.toDomainArchitectureString( "~", 5, "" ).equals( "a~a~a~a" ) ) { + final Phylogeny t4 = phylogenies_3[ 0 ]; + if ( t4.getNumberOfExternalNodes() != 1 ) { return false; } - aa1.addProteinDomain( new BasicDomain( "b", 32, 40, ( short ) 1, ( short ) 5, 0.1, -12 ) ); - if ( !aa1.toDomainArchitectureString( "~" ).equals( "a~a~a~a~b" ) ) { + if ( !t4.getRoot().getNodeData().getTaxonomy().getScientificName().equals( "Marpissa decorata" ) ) { return false; } - if ( !aa1.toDomainArchitectureString( "~", 3, "" ).equals( "aaa~b" ) ) { + if ( !t4.getRoot().getNodeData().getTaxonomy().getIdentifier().getValue().equals( "4567" ) ) { return false; } - if ( !aa1.toDomainArchitectureString( "~", 4, "" ).equals( "aaa~b" ) ) { + if ( t4.getRoot().getNumberOfDescendants() != 0 ) { return false; } - if ( !aa1.toDomainArchitectureString( "~", 5, "" ).equals( "a~a~a~a~b" ) ) { + final Phylogeny[] phylogenies_4 = factory.create( Test.PATH_TO_TEST_DATA + "tol_16299.tol", parser ); + if ( parser.getErrorCount() > 0 ) { + System.out.println( parser.getErrorMessages().toString() ); return false; } - aa1.addProteinDomain( new BasicDomain( "c", 1, 2, ( short ) 1, ( short ) 5, 0.1, -12 ) ); - if ( !aa1.toDomainArchitectureString( "~" ).equals( "c~a~a~a~a~b" ) ) { + if ( phylogenies_4.length != 1 ) { return false; } - if ( !aa1.toDomainArchitectureString( "~", 3, "" ).equals( "c~aaa~b" ) ) { + final Phylogeny t5 = phylogenies_4[ 0 ]; + if ( t5.getNumberOfExternalNodes() != 13 ) { return false; } - if ( !aa1.toDomainArchitectureString( "~", 4, "" ).equals( "c~aaa~b" ) ) { + if ( !t5.getRoot().getNodeData().getTaxonomy().getScientificName().equals( "Hominidae" ) ) { return false; } - if ( !aa1.toDomainArchitectureString( "~", 5, "" ).equals( "c~a~a~a~a~b" ) ) { + if ( !t5.getRoot().getNodeData().getTaxonomy().getIdentifier().getValue().equals( "16299" ) ) { return false; } - // - final BasicProtein p00 = new BasicProtein( "p0", "owl", 0 ); - final Domain a0 = new BasicDomain( "a", 1, 10, ( short ) 1, ( short ) 5, 0.1, -12 ); - final Domain b0 = new BasicDomain( "b", 11, 20, ( short ) 1, ( short ) 5, 0.1, -12 ); - final Domain c0 = new BasicDomain( "c", 9, 23, ( short ) 1, ( short ) 5, 0.1, -12 ); - final Domain d0 = new BasicDomain( "d", 15, 30, ( short ) 1, ( short ) 5, 0.1, -12 ); - final Domain e0 = new BasicDomain( "e", 60, 70, ( short ) 1, ( short ) 5, 0.1, -12 ); - final Domain e1 = new BasicDomain( "e", 61, 71, ( short ) 1, ( short ) 5, 0.1, -12 ); - final Domain e2 = new BasicDomain( "e", 62, 72, ( short ) 1, ( short ) 5, 0.1, -12 ); - final Domain e3 = new BasicDomain( "e", 63, 73, ( short ) 1, ( short ) 5, 0.1, -12 ); - final Domain e4 = new BasicDomain( "e", 64, 74, ( short ) 1, ( short ) 5, 0.1, -12 ); - final Domain e5 = new BasicDomain( "e", 65, 75, ( short ) 1, ( short ) 5, 0.1, -12 ); - final Domain x0 = new BasicDomain( "x", 100, 110, ( short ) 1, ( short ) 5, 0.1, -12 ); - final Domain y0 = new BasicDomain( "y", 100, 110, ( short ) 1, ( short ) 5, 0.1, -12 ); - final Domain y1 = new BasicDomain( "y", 120, 130, ( short ) 1, ( short ) 5, 0.1, -12 ); - final Domain y2 = new BasicDomain( "y", 140, 150, ( short ) 1, ( short ) 5, 0.1, -12 ); - final Domain y3 = new BasicDomain( "y", 160, 170, ( short ) 1, ( short ) 5, 0.1, -12 ); - final Domain z0 = new BasicDomain( "z", 200, 210, ( short ) 1, ( short ) 5, 0.1, -12 ); - final Domain z1 = new BasicDomain( "z", 300, 310, ( short ) 1, ( short ) 5, 0.1, -12 ); - final Domain z2 = new BasicDomain( "z", 400, 410, ( short ) 1, ( short ) 5, 0.1, -12 ); - final Domain zz0 = new BasicDomain( "Z", 500, 510, ( short ) 1, ( short ) 5, 0.1, -12 ); - final Domain zz1 = new BasicDomain( "Z", 600, 610, ( short ) 1, ( short ) 5, 0.1, -12 ); - p00.addProteinDomain( y0 ); - p00.addProteinDomain( e0 ); - p00.addProteinDomain( b0 ); - p00.addProteinDomain( c0 ); - p00.addProteinDomain( d0 ); - p00.addProteinDomain( a0 ); - p00.addProteinDomain( x0 ); - p00.addProteinDomain( y1 ); - p00.addProteinDomain( y2 ); - p00.addProteinDomain( y3 ); - p00.addProteinDomain( e1 ); - p00.addProteinDomain( e2 ); - p00.addProteinDomain( e3 ); - p00.addProteinDomain( e4 ); - p00.addProteinDomain( e5 ); - p00.addProteinDomain( z0 ); - p00.addProteinDomain( z1 ); - p00.addProteinDomain( z2 ); - p00.addProteinDomain( zz0 ); - p00.addProteinDomain( zz1 ); - if ( !p00.toDomainArchitectureString( "~", 3, "" ).equals( "a~b~c~d~eee~x~yyy~zzz~Z~Z" ) ) { + if ( t5.getRoot().getNumberOfDescendants() != 2 ) { return false; } - if ( !p00.toDomainArchitectureString( "~", 4, "" ).equals( "a~b~c~d~eee~x~yyy~z~z~z~Z~Z" ) ) { + } + catch ( final Exception e ) { + e.printStackTrace( System.out ); + return false; + } + return true; + } + + private static boolean testBasicTreeMethods() { + try { + final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); + final Phylogeny t2 = factory.create( "((A:1,B:2)AB:1,(C:3,D:5)CD:3)ABCD:0.5", new NHXParser() )[ 0 ]; + if ( t2.getNumberOfExternalNodes() != 4 ) { return false; } - if ( !p00.toDomainArchitectureString( "~", 5, "" ).equals( "a~b~c~d~eee~x~y~y~y~y~z~z~z~Z~Z" ) ) { + if ( t2.getHeight() != 8.5 ) { return false; } - if ( !p00.toDomainArchitectureString( "~", 6, "" ).equals( "a~b~c~d~eee~x~y~y~y~y~z~z~z~Z~Z" ) ) { + if ( !t2.isCompletelyBinary() ) { return false; } - if ( !p00.toDomainArchitectureString( "~", 7, "" ).equals( "a~b~c~d~e~e~e~e~e~e~x~y~y~y~y~z~z~z~Z~Z" ) ) { + if ( t2.isEmpty() ) { return false; } - // A0 A10 B15 A20 B25 A30 B35 B40 C50 A60 C70 D80 - final Domain A0 = new BasicDomain( "A", 0, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); - final Domain A10 = new BasicDomain( "A", 10, 11, ( short ) 1, ( short ) 4, 0.1, -12 ); - final Domain B15 = new BasicDomain( "B", 11, 16, ( short ) 1, ( short ) 4, 0.1, -12 ); - final Domain A20 = new BasicDomain( "A", 20, 100, ( short ) 1, ( short ) 4, 0.1, -12 ); - final Domain B25 = new BasicDomain( "B", 25, 26, ( short ) 1, ( short ) 4, 0.1, -12 ); - final Domain A30 = new BasicDomain( "A", 30, 31, ( short ) 1, ( short ) 4, 0.1, -12 ); - final Domain B35 = new BasicDomain( "B", 31, 40, ( short ) 1, ( short ) 4, 0.1, -12 ); - final Domain B40 = new BasicDomain( "B", 40, 600, ( short ) 1, ( short ) 4, 0.1, -12 ); - final Domain C50 = new BasicDomain( "C", 50, 59, ( short ) 1, ( short ) 4, 0.1, -12 ); - final Domain A60 = new BasicDomain( "A", 60, 395, ( short ) 1, ( short ) 4, 0.1, -12 ); - final Domain C70 = new BasicDomain( "C", 70, 71, ( short ) 1, ( short ) 4, 0.1, -12 ); - final Domain D80 = new BasicDomain( "D", 80, 81, ( short ) 1, ( short ) 4, 0.1, -12 ); - final BasicProtein p = new BasicProtein( "p", "owl", 0 ); - p.addProteinDomain( B15 ); - p.addProteinDomain( C50 ); - p.addProteinDomain( A60 ); - p.addProteinDomain( A30 ); - p.addProteinDomain( C70 ); - p.addProteinDomain( B35 ); - p.addProteinDomain( B40 ); - p.addProteinDomain( A0 ); - p.addProteinDomain( A10 ); - p.addProteinDomain( A20 ); - p.addProteinDomain( B25 ); - p.addProteinDomain( D80 ); - List domains_ids = new ArrayList(); - domains_ids.add( "A" ); - domains_ids.add( "B" ); - domains_ids.add( "C" ); - if ( !p.contains( domains_ids, false ) ) { + final Phylogeny t3 = factory.create( "((A:1,B:2,C:10)ABC:1,(D:3,E:5)DE:3)", new NHXParser() )[ 0 ]; + if ( t3.getNumberOfExternalNodes() != 5 ) { return false; } - if ( !p.contains( domains_ids, true ) ) { + if ( t3.getHeight() != 11 ) { return false; } - domains_ids.add( "X" ); - if ( p.contains( domains_ids, false ) ) { + if ( t3.isCompletelyBinary() ) { return false; } - if ( p.contains( domains_ids, true ) ) { + final PhylogenyNode n = t3.getNode( "ABC" ); + final Phylogeny t4 = factory.create( "((A:1,B:2,C:10)ABC:1,(D:3,E:5)DE:3,(F,G,H,I))", new NHXParser() )[ 0 ]; + if ( t4.getNumberOfExternalNodes() != 9 ) { return false; } - domains_ids = new ArrayList(); - domains_ids.add( "A" ); - domains_ids.add( "C" ); - domains_ids.add( "D" ); - if ( !p.contains( domains_ids, false ) ) { + if ( t4.getHeight() != 11 ) { return false; } - if ( !p.contains( domains_ids, true ) ) { + if ( t4.isCompletelyBinary() ) { return false; } - domains_ids = new ArrayList(); - domains_ids.add( "A" ); - domains_ids.add( "D" ); - domains_ids.add( "C" ); - if ( !p.contains( domains_ids, false ) ) { + final StringBuffer sb5 = new StringBuffer( "(((A11:2)A1:2,(A21:1,A22:2,A23)A2:11,A3:2)A:2,B:10,C:3,D:8)" ); + final Phylogeny t5 = factory.create( sb5, new NHXParser() )[ 0 ]; + if ( t5.getNumberOfExternalNodes() != 8 ) { return false; } - if ( p.contains( domains_ids, true ) ) { + if ( t5.getHeight() != 15 ) { return false; } - domains_ids = new ArrayList(); - domains_ids.add( "A" ); - domains_ids.add( "A" ); - domains_ids.add( "B" ); - if ( !p.contains( domains_ids, false ) ) { + final StringBuffer sb6 = new StringBuffer( "(X,Y,Z,(((A111)A11:2)A1:2,(X,Y,Z,A21:1,A22:2,A23)A2:11,A3:2)A:2,B:10,C:3,D:8)" ); + final Phylogeny t6 = factory.create( sb6, new NHXParser() )[ 0 ]; + if ( t6.getHeight() != 15 ) { return false; } - if ( !p.contains( domains_ids, true ) ) { + final StringBuffer sb7 = new StringBuffer( "(((A11:2)A1:2,(A21:1,A22:2,A23)A2:11,A3:2)A:2,B:10,C:15,D:8)" ); + final Phylogeny t7 = factory.create( sb7, new NHXParser() )[ 0 ]; + if ( t7.getHeight() != 15 ) { return false; } - domains_ids = new ArrayList(); - domains_ids.add( "A" ); - domains_ids.add( "A" ); - domains_ids.add( "A" ); - domains_ids.add( "B" ); - domains_ids.add( "B" ); - if ( !p.contains( domains_ids, false ) ) { + final StringBuffer sb8 = new StringBuffer( "(((A11:11)A1:2,(A21:2,A22:2,A23,A24,AA:)A2:11,A3:2)A:2,B:15,C:15,D:15)" ); + final Phylogeny t8 = factory.create( sb8, new NHXParser() )[ 0 ]; + if ( t8.getNumberOfExternalNodes() != 10 ) { return false; } - if ( !p.contains( domains_ids, true ) ) { + if ( t8.getHeight() != 15 ) { return false; } - domains_ids = new ArrayList(); - domains_ids.add( "A" ); - domains_ids.add( "A" ); - domains_ids.add( "B" ); - domains_ids.add( "A" ); - domains_ids.add( "B" ); - domains_ids.add( "B" ); - domains_ids.add( "A" ); - domains_ids.add( "B" ); - domains_ids.add( "C" ); - domains_ids.add( "A" ); - domains_ids.add( "C" ); - domains_ids.add( "D" ); - if ( !p.contains( domains_ids, false ) ) { + final char[] a9 = new char[] { 'a' }; + final Phylogeny t9 = factory.create( a9, new NHXParser() )[ 0 ]; + if ( t9.getHeight() != 0 ) { return false; } - if ( p.contains( domains_ids, true ) ) { + final char[] a10 = new char[] { 'a', ':', '6' }; + final Phylogeny t10 = factory.create( a10, new NHXParser() )[ 0 ]; + if ( t10.getHeight() != 6 ) { return false; } } @@ -2310,156 +2994,189 @@ public final class Test { return true; } - private static boolean testBasicTable() { + private static boolean testConfidenceAssessor() { try { - final BasicTable t0 = new BasicTable(); - if ( t0.getNumberOfColumns() != 0 ) { - return false; - } - if ( t0.getNumberOfRows() != 0 ) { - return false; - } - t0.setValue( 3, 2, "23" ); - t0.setValue( 10, 1, "error" ); - t0.setValue( 10, 1, "110" ); - t0.setValue( 9, 1, "19" ); - t0.setValue( 1, 10, "101" ); - t0.setValue( 10, 10, "1010" ); - t0.setValue( 100, 10, "10100" ); - t0.setValue( 0, 0, "00" ); - if ( !t0.getValue( 3, 2 ).equals( "23" ) ) { + final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); + final Phylogeny t0 = factory.create( "((((A,B)ab,C)abc,D)abcd,E)abcde", new NHXParser() )[ 0 ]; + final Phylogeny[] ev0 = factory + .create( "((((A,B),C),D),E);((((A,B),C),D),E);((((A,B),C),D),E);((((A,B),C),D),E);", + new NHXParser() ); + ConfidenceAssessor.evaluate( "bootstrap", ev0, t0, false, 1, 0, 2 ); + if ( !isEqual( t0.getNode( "ab" ).getBranchData().getConfidence( 0 ).getValue(), 3 ) ) { return false; } - if ( !t0.getValue( 10, 1 ).equals( "110" ) ) { + if ( !isEqual( t0.getNode( "abc" ).getBranchData().getConfidence( 0 ).getValue(), 3 ) ) { return false; } - if ( !t0.getValueAsString( 1, 10 ).equals( "101" ) ) { + final Phylogeny t1 = factory.create( "((((A,B)ab[&&NHX:B=50],C)abc,D)abcd,E)abcde", new NHXParser() )[ 0 ]; + final Phylogeny[] ev1 = factory + .create( "((((A,B),C),D),E);((A,B),((E,D),C));(((A,B),C),(E,D));(A,(((E,D),C),B));(B,(A,((E,D),C)));(C,((E,D),(A,B)));(D,(E,((A,B),C)));", + new NHXParser() ); + ConfidenceAssessor.evaluate( "bootstrap", ev1, t1, false, 1 ); + if ( !isEqual( t1.getNode( "ab" ).getBranchData().getConfidence( 1 ).getValue(), 7 ) ) { return false; } - if ( !t0.getValueAsString( 10, 10 ).equals( "1010" ) ) { + if ( !isEqual( t1.getNode( "abc" ).getBranchData().getConfidence( 0 ).getValue(), 7 ) ) { return false; } - if ( !t0.getValueAsString( 100, 10 ).equals( "10100" ) ) { + final Phylogeny t_b = factory.create( "((((A,C)ac,D)acd,E)acde,B)abcde", new NHXParser() )[ 0 ]; + final Phylogeny[] ev_b = factory + .create( "((A,C),X);((A,X),C);(A,C);((((A,B),C),D),E);((A,B),((E,D),C));(((A,B),C),(E,D));(A,(((E,D),C),B));(B,(A,((E,D),C)));(C,((E,D),(A,B)));(D,(E,((A,B),C)));((((A,C)ac,D)acd,E)acde,B)abcd", + new NHXParser() ); + ConfidenceAssessor.evaluate( "bootstrap", ev_b, t_b, false, 1 ); + if ( !isEqual( t_b.getNode( "ac" ).getBranchData().getConfidence( 0 ).getValue(), 4 ) ) { return false; } - if ( !t0.getValueAsString( 9, 1 ).equals( "19" ) ) { + if ( !isEqual( t_b.getNode( "acd" ).getBranchData().getConfidence( 0 ).getValue(), 1 ) ) { return false; } - if ( !t0.getValueAsString( 0, 0 ).equals( "00" ) ) { + // + final Phylogeny t1x = factory.create( "((((A,B)ab,C)abc,D)abcd,E)abcde", new NHXParser() )[ 0 ]; + final Phylogeny[] ev1x = factory + .create( "((((A,B),C),D),E);((A,B),((E,D),C));(((A,B),C),(E,D));(A,(((E,D),C),B));(B,(A,((E,D),C)));(C,((E,D),(A,B)));(D,(E,((A,B),C)));", + new NHXParser() ); + ConfidenceAssessor.evaluate( "bootstrap", ev1x, t1x, true, 1 ); + if ( !isEqual( t1x.getNode( "ab" ).getBranchData().getConfidence( 0 ).getValue(), 7 ) ) { return false; } - if ( t0.getNumberOfColumns() != 101 ) { + if ( !isEqual( t1x.getNode( "abc" ).getBranchData().getConfidence( 0 ).getValue(), 7 ) ) { return false; } - if ( t0.getNumberOfRows() != 11 ) { + final Phylogeny t_bx = factory.create( "((((A,C)ac,D)acd,E)acde,B)abcde", new NHXParser() )[ 0 ]; + final Phylogeny[] ev_bx = factory + .create( "((((A,B),C),D),E);((A,B),((E,D),C));(((A,B),C),(E,D));(A,(((E,D),C),B));(B,(A,((E,D),C)));(C,((E,D),(A,B)));(D,(E,((A,B),C)));((((A,C)ac,D)acd,E)acde,B)abcd", + new NHXParser() ); + ConfidenceAssessor.evaluate( "bootstrap", ev_bx, t_bx, true, 1 ); + if ( !isEqual( t_bx.getNode( "ac" ).getBranchData().getConfidence( 0 ).getValue(), 1 ) ) { return false; } - if ( t0.getValueAsString( 49, 4 ) != null ) { + if ( !isEqual( t_bx.getNode( "acd" ).getBranchData().getConfidence( 0 ).getValue(), 1 ) ) { return false; } - final String l = ForesterUtil.getLineSeparator(); - final StringBuffer source = new StringBuffer(); - source.append( "" + l ); - source.append( "# 1 1 1 1 1 1 1 1" + l ); - source.append( " 00 01 02 03" + l ); - source.append( " 10 11 12 13 " + l ); - source.append( "20 21 22 23 " + l ); - source.append( " 30 31 32 33" + l ); - source.append( "40 41 42 43" + l ); - source.append( " # 1 1 1 1 1 " + l ); - source.append( "50 51 52 53 54" + l ); - final BasicTable t1 = BasicTableParser.parse( source.toString(), ' ' ); - if ( t1.getNumberOfColumns() != 5 ) { - return false; + final Phylogeny[] t2 = factory + .create( "((((a,b),c),d),e);(((a,b),c),(d,e));(((((a,b),c),d),e),f);((((a,b),c),(d,e)),f);(((a,b),c),d,e);((a,b,c),d,e);", + new NHXParser() ); + final Phylogeny[] ev2 = factory + .create( "((((a,b),c),d),e);((((a,b),c),d),e);((((a,b),e),d),c);((((a,b),e),d),c);(((a,b),(c,d)),e);((a,b),x);((a,b),(x,y));(a,b);(a,e);(a,b,c);", + new NHXParser() ); + for( final Phylogeny target : t2 ) { + ConfidenceAssessor.evaluate( "bootstrap", ev2, target, false, 1 ); } - if ( t1.getNumberOfRows() != 6 ) { + final Phylogeny t4 = factory.create( "((((((A,B)ab,C)abc,D)abcd,E)abcde,F)abcdef,G)abcdefg", + new NHXParser() )[ 0 ]; + final Phylogeny[] ev4 = factory.create( "(((A,B),C),(X,Y));((F,G),((A,B,C),(D,E)))", new NHXParser() ); + ConfidenceAssessor.evaluate( "bootstrap", ev4, t4, false, 1 ); + if ( !isEqual( t4.getNode( "ab" ).getBranchData().getConfidence( 0 ).getValue(), 1 ) ) { return false; } - if ( !t1.getValueAsString( 0, 0 ).equals( "00" ) ) { + if ( !isEqual( t4.getNode( "abc" ).getBranchData().getConfidence( 0 ).getValue(), 2 ) ) { return false; } - if ( !t1.getValueAsString( 1, 0 ).equals( "01" ) ) { + if ( !isEqual( t4.getNode( "abcde" ).getBranchData().getConfidence( 0 ).getValue(), 1 ) ) { return false; } - if ( !t1.getValueAsString( 3, 0 ).equals( "03" ) ) { + } + catch ( final Exception e ) { + e.printStackTrace(); + return false; + } + return true; + } + + private static boolean testCopyOfNodeData() { + try { + final PhylogenyNode n1 = PhylogenyNode + .createInstanceFromNhxString( "n5:0.1[&&NHX:S=Ecoli:E=1.1.1.1:D=Y:Co=Y:B=56:T=1:O=22:SO=33:SN=44:W=2:C=10.20.30:XN=S=tag1=value1=unit1]" ); + final PhylogenyNode n2 = n1.copyNodeData(); + if ( !n1.toNewHampshireX().equals( n2.toNewHampshireX() ) ) { return false; } - if ( !t1.getValueAsString( 4, 5 ).equals( "54" ) ) { + } + catch ( final Exception e ) { + e.printStackTrace(); + return false; + } + return true; + } + + private static boolean testCreateBalancedPhylogeny() { + try { + final Phylogeny p0 = DevelopmentTools.createBalancedPhylogeny( 6, 5 ); + if ( p0.getRoot().getNumberOfDescendants() != 5 ) { return false; } - final StringBuffer source1 = new StringBuffer(); - source1.append( "" + l ); - source1.append( "# 1; 1; 1; 1 ;1 ;1; 1 ;1;" + l ); - source1.append( " 00; 01 ;02;03" + l ); - source1.append( " 10; 11; 12; 13 " + l ); - source1.append( "20; 21; 22; 23 " + l ); - source1.append( " 30; 31; 32; 33" + l ); - source1.append( "40;41;42;43" + l ); - source1.append( " # 1 1 1 1 1 " + l ); - source1.append( ";;;50 ; ;52; 53;;54 " + l ); - final BasicTable t2 = BasicTableParser.parse( source1.toString(), ';' ); - if ( t2.getNumberOfColumns() != 5 ) { + if ( p0.getNumberOfExternalNodes() != 15625 ) { return false; } - if ( t2.getNumberOfRows() != 6 ) { + final Phylogeny p1 = DevelopmentTools.createBalancedPhylogeny( 2, 10 ); + if ( p1.getRoot().getNumberOfDescendants() != 10 ) { return false; } - if ( !t2.getValueAsString( 0, 0 ).equals( "00" ) ) { + if ( p1.getNumberOfExternalNodes() != 100 ) { return false; } - if ( !t2.getValueAsString( 1, 0 ).equals( "01" ) ) { + } + catch ( final Exception e ) { + e.printStackTrace(); + return false; + } + return true; + } + + private static boolean testCreateUriForSeqWeb() { + try { + final PhylogenyNode n = new PhylogenyNode(); + n.setName( "tr|B3RJ64" ); + if ( !TreePanelUtil.createUriForSeqWeb( n, null, null ).equals( ForesterUtil.UNIPROT_KB + "B3RJ64" ) ) { return false; } - if ( !t2.getValueAsString( 3, 0 ).equals( "03" ) ) { + n.setName( "B0LM41_HUMAN" ); + if ( !TreePanelUtil.createUriForSeqWeb( n, null, null ).equals( ForesterUtil.UNIPROT_KB + "B0LM41_HUMAN" ) ) { return false; } - if ( !t2.getValueAsString( 3, 3 ).equals( "33" ) ) { + n.setName( "NP_001025424" ); + if ( !TreePanelUtil.createUriForSeqWeb( n, null, null ).equals( ForesterUtil.NCBI_PROTEIN + "NP_001025424" ) ) { return false; } - if ( !t2.getValueAsString( 3, 5 ).equals( "53" ) ) { + n.setName( "_NM_001030253-" ); + if ( !TreePanelUtil.createUriForSeqWeb( n, null, null ).equals( ForesterUtil.NCBI_NUCCORE + "NM_001030253" ) ) { return false; } - if ( !t2.getValueAsString( 1, 5 ).equals( "" ) ) { + n.setName( "XM_002122186" ); + if ( !TreePanelUtil.createUriForSeqWeb( n, null, null ).equals( ForesterUtil.NCBI_NUCCORE + "XM_002122186" ) ) { return false; } - final StringBuffer source2 = new StringBuffer(); - source2.append( "" + l ); - source2.append( "comment: 1; 1; 1; 1 ;1 ;1; 1 ;1;" + l ); - source2.append( " 00; 01 ;02;03" + l ); - source2.append( " 10; 11; 12; 13 " + l ); - source2.append( "20; 21; 22; 23 " + l ); - source2.append( " " + l ); - source2.append( " 30; 31; 32; 33" + l ); - source2.append( "40;41;42;43" + l ); - source2.append( " comment: 1 1 1 1 1 " + l ); - source2.append( ";;;50 ; 52; 53;;54 " + l ); - final List> tl = BasicTableParser.parse( source2.toString(), - ';', - false, - false, - "comment:", - false ); - if ( tl.size() != 2 ) { + n.setName( "dgh_AAA34956_gdg" ); + if ( !TreePanelUtil.createUriForSeqWeb( n, null, null ).equals( ForesterUtil.NCBI_PROTEIN + "AAA34956" ) ) { return false; } - final BasicTable t3 = tl.get( 0 ); - final BasicTable t4 = tl.get( 1 ); - if ( t3.getNumberOfColumns() != 4 ) { + n.setName( "AAA34956" ); + if ( !TreePanelUtil.createUriForSeqWeb( n, null, null ).equals( ForesterUtil.NCBI_PROTEIN + "AAA34956" ) ) { return false; } - if ( t3.getNumberOfRows() != 3 ) { + n.setName( "GI:394892" ); + if ( !TreePanelUtil.createUriForSeqWeb( n, null, null ).equals( ForesterUtil.NCBI_GI + "394892" ) ) { + System.out.println( TreePanelUtil.createUriForSeqWeb( n, null, null ) ); return false; } - if ( t4.getNumberOfColumns() != 4 ) { + n.setName( "gi_394892" ); + if ( !TreePanelUtil.createUriForSeqWeb( n, null, null ).equals( ForesterUtil.NCBI_GI + "394892" ) ) { + System.out.println( TreePanelUtil.createUriForSeqWeb( n, null, null ) ); return false; } - if ( t4.getNumberOfRows() != 3 ) { + n.setName( "gi6335_gi_394892_56635_Gi_43" ); + if ( !TreePanelUtil.createUriForSeqWeb( n, null, null ).equals( ForesterUtil.NCBI_GI + "394892" ) ) { + System.out.println( TreePanelUtil.createUriForSeqWeb( n, null, null ) ); return false; } - if ( !t3.getValueAsString( 0, 0 ).equals( "00" ) ) { + n.setName( "P12345" ); + if ( !TreePanelUtil.createUriForSeqWeb( n, null, null ).equals( ForesterUtil.UNIPROT_KB + "P12345" ) ) { + System.out.println( TreePanelUtil.createUriForSeqWeb( n, null, null ) ); return false; } - if ( !t4.getValueAsString( 0, 0 ).equals( "30" ) ) { + n.setName( "gi_fdgjmn-3jk5-243 mnefmn fg023-0 P12345 4395jtmnsrg02345m1ggi92450jrg890j4t0j240" ); + if ( !TreePanelUtil.createUriForSeqWeb( n, null, null ).equals( ForesterUtil.UNIPROT_KB + "P12345" ) ) { + System.out.println( TreePanelUtil.createUriForSeqWeb( n, null, null ) ); return false; } } @@ -2470,213 +3187,281 @@ public final class Test { return true; } - private static boolean testBasicTolXMLparsing() { + private static boolean testDataObjects() { try { - final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); - final TolParser parser = new TolParser(); - final Phylogeny[] phylogenies_0 = factory.create( Test.PATH_TO_TEST_DATA + "tol_2484.tol", parser ); - if ( parser.getErrorCount() > 0 ) { - System.out.println( parser.getErrorMessages().toString() ); + final Confidence s0 = new Confidence(); + final Confidence s1 = new Confidence(); + if ( !s0.isEqual( s1 ) ) { return false; } - if ( phylogenies_0.length != 1 ) { + final Confidence s2 = new Confidence( 0.23, "bootstrap" ); + final Confidence s3 = new Confidence( 0.23, "bootstrap" ); + if ( s2.isEqual( s1 ) ) { return false; } - final Phylogeny t1 = phylogenies_0[ 0 ]; - if ( t1.getNumberOfExternalNodes() != 5 ) { + if ( !s2.isEqual( s3 ) ) { return false; } - if ( !t1.isRooted() ) { + final Confidence s4 = ( Confidence ) s3.copy(); + if ( !s4.isEqual( s3 ) ) { return false; } - if ( !t1.getRoot().getNodeData().getTaxonomy().getScientificName().equals( "Mesozoa" ) ) { + s3.asSimpleText(); + s3.asText(); + // Taxonomy + // ---------- + final Taxonomy t1 = new Taxonomy(); + final Taxonomy t2 = new Taxonomy(); + final Taxonomy t3 = new Taxonomy(); + final Taxonomy t4 = new Taxonomy(); + final Taxonomy t5 = new Taxonomy(); + t1.setIdentifier( new Identifier( "ecoli" ) ); + t1.setTaxonomyCode( "ECOLI" ); + t1.setScientificName( "E. coli" ); + t1.setCommonName( "coli" ); + final Taxonomy t0 = ( Taxonomy ) t1.copy(); + if ( !t1.isEqual( t0 ) ) { return false; } - if ( !t1.getRoot().getNodeData().getTaxonomy().getIdentifier().getValue().equals( "2484" ) ) { + t2.setIdentifier( new Identifier( "ecoli" ) ); + t2.setTaxonomyCode( "OTHER" ); + t2.setScientificName( "what" ); + t2.setCommonName( "something" ); + if ( !t1.isEqual( t2 ) ) { return false; } - if ( !t1.getRoot().getChildNode( 0 ).getNodeData().getTaxonomy().getScientificName().equals( "Rhombozoa" ) ) { + t2.setIdentifier( new Identifier( "nemve" ) ); + if ( t1.isEqual( t2 ) ) { return false; } - if ( t1.getRoot().getChildNode( 0 ).getNumberOfDescendants() != 3 ) { + t1.setIdentifier( null ); + t3.setTaxonomyCode( "ECOLI" ); + t3.setScientificName( "what" ); + t3.setCommonName( "something" ); + if ( !t1.isEqual( t3 ) ) { return false; } - final Phylogeny[] phylogenies_1 = factory.create( Test.PATH_TO_TEST_DATA + "tol_2.tol", parser ); - if ( parser.getErrorCount() > 0 ) { - System.out.println( parser.getErrorMessages().toString() ); + t1.setIdentifier( null ); + t1.setTaxonomyCode( "" ); + t4.setScientificName( "E. ColI" ); + t4.setCommonName( "something" ); + if ( !t1.isEqual( t4 ) ) { return false; } - if ( phylogenies_1.length != 1 ) { + t4.setScientificName( "B. subtilis" ); + t4.setCommonName( "something" ); + if ( t1.isEqual( t4 ) ) { return false; } - final Phylogeny t2 = phylogenies_1[ 0 ]; - if ( t2.getNumberOfExternalNodes() != 664 ) { + t1.setIdentifier( null ); + t1.setTaxonomyCode( "" ); + t1.setScientificName( "" ); + t5.setCommonName( "COLI" ); + if ( !t1.isEqual( t5 ) ) { + return false; + } + t5.setCommonName( "vibrio" ); + if ( t1.isEqual( t5 ) ) { + return false; + } + // Identifier + // ---------- + final Identifier id0 = new Identifier( "123", "pfam" ); + final Identifier id1 = ( Identifier ) id0.copy(); + if ( !id1.isEqual( id1 ) ) { + return false; + } + if ( !id1.isEqual( id0 ) ) { + return false; + } + if ( !id0.isEqual( id1 ) ) { + return false; + } + id1.asSimpleText(); + id1.asText(); + // ProteinDomain + // --------------- + final ProteinDomain pd0 = new ProteinDomain( "abc", 100, 200 ); + final ProteinDomain pd1 = ( ProteinDomain ) pd0.copy(); + if ( !pd1.isEqual( pd1 ) ) { return false; } - if ( !t2.isRooted() ) { + if ( !pd1.isEqual( pd0 ) ) { return false; } - if ( !t2.getRoot().getNodeData().getTaxonomy().getScientificName().equals( "Eubacteria" ) ) { + pd1.asSimpleText(); + pd1.asText(); + final ProteinDomain pd2 = new ProteinDomain( pd0.getName(), pd0.getFrom(), pd0.getTo(), "id" ); + final ProteinDomain pd3 = ( ProteinDomain ) pd2.copy(); + if ( !pd3.isEqual( pd3 ) ) { return false; } - if ( !t2.getRoot().getNodeData().getTaxonomy().getIdentifier().getValue().equals( "2" ) ) { + if ( !pd2.isEqual( pd3 ) ) { return false; } - if ( t2.getRoot().getNumberOfDescendants() != 24 ) { + if ( !pd0.isEqual( pd3 ) ) { return false; } - if ( t2.getRoot().getNumberOfDescendants() != 24 ) { + pd3.asSimpleText(); + pd3.asText(); + // DomainArchitecture + // ------------------ + final ProteinDomain d0 = new ProteinDomain( "domain0", 10, 20 ); + final ProteinDomain d1 = new ProteinDomain( "domain1", 30, 40 ); + final ProteinDomain d2 = new ProteinDomain( "domain2", 50, 60 ); + final ProteinDomain d3 = new ProteinDomain( "domain3", 70, 80 ); + final ProteinDomain d4 = new ProteinDomain( "domain4", 90, 100 ); + final ArrayList domains0 = new ArrayList(); + domains0.add( d2 ); + domains0.add( d0 ); + domains0.add( d3 ); + domains0.add( d1 ); + final DomainArchitecture ds0 = new DomainArchitecture( domains0, 110 ); + if ( ds0.getNumberOfDomains() != 4 ) { return false; } - if ( !t2.getRoot().getChildNode( 0 ).getNodeData().getTaxonomy().getScientificName().equals( "Aquificae" ) ) { + final DomainArchitecture ds1 = ( DomainArchitecture ) ds0.copy(); + if ( !ds0.isEqual( ds0 ) ) { return false; } - if ( !t2.getRoot().getChildNode( 0 ).getChildNode( 0 ).getNodeData().getTaxonomy().getScientificName() - .equals( "Aquifex" ) ) { + if ( !ds0.isEqual( ds1 ) ) { return false; } - final Phylogeny[] phylogenies_2 = factory.create( Test.PATH_TO_TEST_DATA + "tol_5.tol", parser ); - if ( parser.getErrorCount() > 0 ) { - System.out.println( parser.getErrorMessages().toString() ); + if ( ds1.getNumberOfDomains() != 4 ) { return false; } - if ( phylogenies_2.length != 1 ) { + final ArrayList domains1 = new ArrayList(); + domains1.add( d1 ); + domains1.add( d2 ); + domains1.add( d4 ); + domains1.add( d0 ); + final DomainArchitecture ds2 = new DomainArchitecture( domains1, 200 ); + if ( ds0.isEqual( ds2 ) ) { return false; } - final Phylogeny t3 = phylogenies_2[ 0 ]; - if ( t3.getNumberOfExternalNodes() != 184 ) { + ds1.asSimpleText(); + ds1.asText(); + ds1.toNHX(); + final DomainArchitecture ds3 = new DomainArchitecture( "120>30>40>0.9>b>50>60>0.4>c>10>20>0.1>a" ); + if ( !ds3.toNHX().toString().equals( ":DS=120>10>20>0.1>a>30>40>0.9>b>50>60>0.4>c" ) ) { + System.out.println( ds3.toNHX() ); return false; } - if ( !t3.getRoot().getNodeData().getTaxonomy().getScientificName().equals( "Viruses" ) ) { + if ( ds3.getNumberOfDomains() != 3 ) { return false; } - if ( !t3.getRoot().getNodeData().getTaxonomy().getIdentifier().getValue().equals( "5" ) ) { + // Event + // ----- + final Event e1 = new Event( Event.EventType.fusion ); + if ( e1.isDuplication() ) { return false; } - if ( t3.getRoot().getNumberOfDescendants() != 6 ) { + if ( !e1.isFusion() ) { return false; } - final Phylogeny[] phylogenies_3 = factory.create( Test.PATH_TO_TEST_DATA + "tol_4567.tol", parser ); - if ( parser.getErrorCount() > 0 ) { - System.out.println( parser.getErrorMessages().toString() ); + if ( !e1.asText().toString().equals( "fusion" ) ) { return false; } - if ( phylogenies_3.length != 1 ) { + if ( !e1.asSimpleText().toString().equals( "fusion" ) ) { return false; } - final Phylogeny t4 = phylogenies_3[ 0 ]; - if ( t4.getNumberOfExternalNodes() != 1 ) { + final Event e11 = new Event( Event.EventType.fusion ); + if ( !e11.isEqual( e1 ) ) { return false; } - if ( !t4.getRoot().getNodeData().getTaxonomy().getScientificName().equals( "Marpissa decorata" ) ) { + if ( !e11.toNHX().toString().equals( "" ) ) { return false; } - if ( !t4.getRoot().getNodeData().getTaxonomy().getIdentifier().getValue().equals( "4567" ) ) { + final Event e2 = new Event( Event.EventType.speciation_or_duplication ); + if ( e2.isDuplication() ) { return false; } - if ( t4.getRoot().getNumberOfDescendants() != 0 ) { + if ( !e2.isSpeciationOrDuplication() ) { return false; } - final Phylogeny[] phylogenies_4 = factory.create( Test.PATH_TO_TEST_DATA + "tol_16299.tol", parser ); - if ( parser.getErrorCount() > 0 ) { - System.out.println( parser.getErrorMessages().toString() ); + if ( !e2.asText().toString().equals( "speciation_or_duplication" ) ) { return false; } - if ( phylogenies_4.length != 1 ) { + if ( !e2.asSimpleText().toString().equals( "?" ) ) { return false; } - final Phylogeny t5 = phylogenies_4[ 0 ]; - if ( t5.getNumberOfExternalNodes() != 13 ) { + if ( !e2.toNHX().toString().equals( ":D=?" ) ) { return false; } - if ( !t5.getRoot().getNodeData().getTaxonomy().getScientificName().equals( "Hominidae" ) ) { + if ( e11.isEqual( e2 ) ) { return false; } - if ( !t5.getRoot().getNodeData().getTaxonomy().getIdentifier().getValue().equals( "16299" ) ) { + final Event e2c = ( Event ) e2.copy(); + if ( !e2c.isEqual( e2 ) ) { return false; } - if ( t5.getRoot().getNumberOfDescendants() != 2 ) { + Event e3 = new Event( 1, 2, 3 ); + if ( e3.isDuplication() ) { return false; } - } - catch ( final Exception e ) { - e.printStackTrace( System.out ); - return false; - } - return true; - } - - private static boolean testBasicTreeMethods() { - try { - final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); - final Phylogeny t2 = factory.create( "((A:1,B:2)AB:1,(C:3,D:5)CD:3)ABCD:0.5", new NHXParser() )[ 0 ]; - if ( t2.getNumberOfExternalNodes() != 4 ) { + if ( e3.isSpeciation() ) { return false; } - if ( t2.getHeight() != 8.5 ) { + if ( e3.isGeneLoss() ) { return false; } - if ( !t2.isCompletelyBinary() ) { + if ( !e3.asText().toString().equals( "duplications [1] speciations [2] gene-losses [3]" ) ) { return false; } - if ( t2.isEmpty() ) { + final Event e3c = ( Event ) e3.copy(); + final Event e3cc = ( Event ) e3c.copy(); + if ( !e3c.asSimpleText().toString().equals( "D2S3L" ) ) { return false; } - final Phylogeny t3 = factory.create( "((A:1,B:2,C:10)ABC:1,(D:3,E:5)DE:3)", new NHXParser() )[ 0 ]; - if ( t3.getNumberOfExternalNodes() != 5 ) { + e3 = null; + if ( !e3c.isEqual( e3cc ) ) { return false; } - if ( t3.getHeight() != 11 ) { + Event e4 = new Event( 1, 2, 3 ); + if ( !e4.asText().toString().equals( "duplications [1] speciations [2] gene-losses [3]" ) ) { return false; } - if ( t3.isCompletelyBinary() ) { + if ( !e4.asSimpleText().toString().equals( "D2S3L" ) ) { return false; } - final PhylogenyNode n = t3.getNode( "ABC" ); - final Phylogeny t4 = factory.create( "((A:1,B:2,C:10)ABC:1,(D:3,E:5)DE:3,(F,G,H,I))", new NHXParser() )[ 0 ]; - if ( t4.getNumberOfExternalNodes() != 9 ) { + final Event e4c = ( Event ) e4.copy(); + e4 = null; + final Event e4cc = ( Event ) e4c.copy(); + if ( !e4cc.asText().toString().equals( "duplications [1] speciations [2] gene-losses [3]" ) ) { return false; } - if ( t4.getHeight() != 11 ) { + if ( !e4c.isEqual( e4cc ) ) { return false; } - if ( t4.isCompletelyBinary() ) { + final Event e5 = new Event(); + if ( !e5.isUnassigned() ) { return false; } - final StringBuffer sb5 = new StringBuffer( "(((A11:2)A1:2,(A21:1,A22:2,A23)A2:11,A3:2)A:2,B:10,C:3,D:8)" ); - final Phylogeny t5 = factory.create( sb5, new NHXParser() )[ 0 ]; - if ( t5.getNumberOfExternalNodes() != 8 ) { + if ( !e5.asText().toString().equals( "unassigned" ) ) { return false; } - if ( t5.getHeight() != 15 ) { + if ( !e5.asSimpleText().toString().equals( "" ) ) { return false; } - final StringBuffer sb6 = new StringBuffer( "(X,Y,Z,(((A111)A11:2)A1:2,(X,Y,Z,A21:1,A22:2,A23)A2:11,A3:2)A:2,B:10,C:3,D:8)" ); - final Phylogeny t6 = factory.create( sb6, new NHXParser() )[ 0 ]; - if ( t6.getHeight() != 15 ) { + final Event e6 = new Event( 1, 0, 0 ); + if ( !e6.asText().toString().equals( "duplication" ) ) { return false; } - final StringBuffer sb7 = new StringBuffer( "(((A11:2)A1:2,(A21:1,A22:2,A23)A2:11,A3:2)A:2,B:10,C:15,D:8)" ); - final Phylogeny t7 = factory.create( sb7, new NHXParser() )[ 0 ]; - if ( t7.getHeight() != 15 ) { + if ( !e6.asSimpleText().toString().equals( "D" ) ) { return false; } - final StringBuffer sb8 = new StringBuffer( "(((A11:11)A1:2,(A21:2,A22:2,A23,A24,AA:)A2:11,A3:2)A:2,B:15,C:15,D:15)" ); - final Phylogeny t8 = factory.create( sb8, new NHXParser() )[ 0 ]; - if ( t8.getNumberOfExternalNodes() != 10 ) { + final Event e7 = new Event( 0, 1, 0 ); + if ( !e7.asText().toString().equals( "speciation" ) ) { return false; } - if ( t8.getHeight() != 15 ) { + if ( !e7.asSimpleText().toString().equals( "S" ) ) { return false; } - final char[] a9 = new char[] { 'a' }; - final Phylogeny t9 = factory.create( a9, new NHXParser() )[ 0 ]; - if ( t9.getHeight() != 0 ) { + final Event e8 = new Event( 0, 0, 1 ); + if ( !e8.asText().toString().equals( "gene-loss" ) ) { return false; } - final char[] a10 = new char[] { 'a', ':', '6' }; - final Phylogeny t10 = factory.create( a10, new NHXParser() )[ 0 ]; - if ( t10.getHeight() != 6 ) { + if ( !e8.asSimpleText().toString().equals( "L" ) ) { return false; } } @@ -2687,813 +3472,790 @@ public final class Test { return true; } - private static boolean testConfidenceAssessor() { + private static boolean testDeletionOfExternalNodes() { try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); - final Phylogeny t0 = factory.create( "((((A,B)ab,C)abc,D)abcd,E)abcde", new NHXParser() )[ 0 ]; - final Phylogeny[] ev0 = factory - .create( "((((A,B),C),D),E);((((A,B),C),D),E);((((A,B),C),D),E);((((A,B),C),D),E);", - new NHXParser() ); - ConfidenceAssessor.evaluate( "bootstrap", ev0, t0, false, 1, 0, 2 ); - if ( !isEqual( t0.getNode( "ab" ).getBranchData().getConfidence( 0 ).getValue(), 3 ) ) { + final Phylogeny t0 = factory.create( "A", new NHXParser() )[ 0 ]; + final PhylogenyWriter w = new PhylogenyWriter(); + if ( t0.isEmpty() ) { return false; } - if ( !isEqual( t0.getNode( "abc" ).getBranchData().getConfidence( 0 ).getValue(), 3 ) ) { + if ( t0.getNumberOfExternalNodes() != 1 ) { return false; } - final Phylogeny t1 = factory.create( "((((A,B)ab[&&NHX:B=50],C)abc,D)abcd,E)abcde", new NHXParser() )[ 0 ]; - final Phylogeny[] ev1 = factory - .create( "((((A,B),C),D),E);((A,B),((E,D),C));(((A,B),C),(E,D));(A,(((E,D),C),B));(B,(A,((E,D),C)));(C,((E,D),(A,B)));(D,(E,((A,B),C)));", - new NHXParser() ); - ConfidenceAssessor.evaluate( "bootstrap", ev1, t1, false, 1 ); - if ( !isEqual( t1.getNode( "ab" ).getBranchData().getConfidence( 1 ).getValue(), 7 ) ) { + t0.deleteSubtree( t0.getNode( "A" ), false ); + if ( t0.getNumberOfExternalNodes() != 0 ) { return false; } - if ( !isEqual( t1.getNode( "abc" ).getBranchData().getConfidence( 0 ).getValue(), 7 ) ) { + if ( !t0.isEmpty() ) { return false; } - final Phylogeny t_b = factory.create( "((((A,C)ac,D)acd,E)acde,B)abcde", new NHXParser() )[ 0 ]; - final Phylogeny[] ev_b = factory - .create( "((A,C),X);((A,X),C);(A,C);((((A,B),C),D),E);((A,B),((E,D),C));(((A,B),C),(E,D));(A,(((E,D),C),B));(B,(A,((E,D),C)));(C,((E,D),(A,B)));(D,(E,((A,B),C)));((((A,C)ac,D)acd,E)acde,B)abcd", - new NHXParser() ); - ConfidenceAssessor.evaluate( "bootstrap", ev_b, t_b, false, 1 ); - if ( !isEqual( t_b.getNode( "ac" ).getBranchData().getConfidence( 0 ).getValue(), 4 ) ) { + final Phylogeny t1 = factory.create( "(A,B)r", new NHXParser() )[ 0 ]; + if ( t1.getNumberOfExternalNodes() != 2 ) { return false; } - if ( !isEqual( t_b.getNode( "acd" ).getBranchData().getConfidence( 0 ).getValue(), 1 ) ) { + t1.deleteSubtree( t1.getNode( "A" ), false ); + if ( t1.getNumberOfExternalNodes() != 1 ) { return false; } - // - final Phylogeny t1x = factory.create( "((((A,B)ab,C)abc,D)abcd,E)abcde", new NHXParser() )[ 0 ]; - final Phylogeny[] ev1x = factory - .create( "((((A,B),C),D),E);((A,B),((E,D),C));(((A,B),C),(E,D));(A,(((E,D),C),B));(B,(A,((E,D),C)));(C,((E,D),(A,B)));(D,(E,((A,B),C)));", - new NHXParser() ); - ConfidenceAssessor.evaluate( "bootstrap", ev1x, t1x, true, 1 ); - if ( !isEqual( t1x.getNode( "ab" ).getBranchData().getConfidence( 0 ).getValue(), 7 ) ) { + if ( !t1.getNode( "B" ).getName().equals( "B" ) ) { return false; } - if ( !isEqual( t1x.getNode( "abc" ).getBranchData().getConfidence( 0 ).getValue(), 7 ) ) { + t1.deleteSubtree( t1.getNode( "B" ), false ); + if ( t1.getNumberOfExternalNodes() != 1 ) { return false; } - final Phylogeny t_bx = factory.create( "((((A,C)ac,D)acd,E)acde,B)abcde", new NHXParser() )[ 0 ]; - final Phylogeny[] ev_bx = factory - .create( "((((A,B),C),D),E);((A,B),((E,D),C));(((A,B),C),(E,D));(A,(((E,D),C),B));(B,(A,((E,D),C)));(C,((E,D),(A,B)));(D,(E,((A,B),C)));((((A,C)ac,D)acd,E)acde,B)abcd", - new NHXParser() ); - ConfidenceAssessor.evaluate( "bootstrap", ev_bx, t_bx, true, 1 ); - if ( !isEqual( t_bx.getNode( "ac" ).getBranchData().getConfidence( 0 ).getValue(), 1 ) ) { + t1.deleteSubtree( t1.getNode( "r" ), false ); + if ( !t1.isEmpty() ) { return false; } - if ( !isEqual( t_bx.getNode( "acd" ).getBranchData().getConfidence( 0 ).getValue(), 1 ) ) { + final Phylogeny t2 = factory.create( "((A,B),C)", new NHXParser() )[ 0 ]; + if ( t2.getNumberOfExternalNodes() != 3 ) { return false; } - // - final Phylogeny[] t2 = factory - .create( "((((a,b),c),d),e);(((a,b),c),(d,e));(((((a,b),c),d),e),f);((((a,b),c),(d,e)),f);(((a,b),c),d,e);((a,b,c),d,e);", - new NHXParser() ); - final Phylogeny[] ev2 = factory - .create( "((((a,b),c),d),e);((((a,b),c),d),e);((((a,b),e),d),c);((((a,b),e),d),c);(((a,b),(c,d)),e);((a,b),x);((a,b),(x,y));(a,b);(a,e);(a,b,c);", - new NHXParser() ); - for( final Phylogeny target : t2 ) { - ConfidenceAssessor.evaluate( "bootstrap", ev2, target, false, 1 ); + t2.deleteSubtree( t2.getNode( "B" ), false ); + if ( t2.getNumberOfExternalNodes() != 2 ) { + return false; + } + t2.toNewHampshireX(); + PhylogenyNode n = t2.getNode( "A" ); + if ( !n.getNextExternalNode().getName().equals( "C" ) ) { + return false; + } + t2.deleteSubtree( t2.getNode( "A" ), false ); + if ( t2.getNumberOfExternalNodes() != 2 ) { + return false; + } + t2.deleteSubtree( t2.getNode( "C" ), true ); + if ( t2.getNumberOfExternalNodes() != 1 ) { + return false; + } + final Phylogeny t3 = factory.create( "((A,B),(C,D))", new NHXParser() )[ 0 ]; + if ( t3.getNumberOfExternalNodes() != 4 ) { + return false; } - // - final Phylogeny t4 = factory.create( "((((((A,B)ab,C)abc,D)abcd,E)abcde,F)abcdef,G)abcdefg", - new NHXParser() )[ 0 ]; - final Phylogeny[] ev4 = factory.create( "(((A,B),C),(X,Y));((F,G),((A,B,C),(D,E)))", new NHXParser() ); - ConfidenceAssessor.evaluate( "bootstrap", ev4, t4, false, 1 ); - if ( !isEqual( t4.getNode( "ab" ).getBranchData().getConfidence( 0 ).getValue(), 1 ) ) { + t3.deleteSubtree( t3.getNode( "B" ), true ); + if ( t3.getNumberOfExternalNodes() != 3 ) { return false; } - if ( !isEqual( t4.getNode( "abc" ).getBranchData().getConfidence( 0 ).getValue(), 2 ) ) { + n = t3.getNode( "A" ); + if ( !n.getNextExternalNode().getName().equals( "C" ) ) { return false; } - if ( !isEqual( t4.getNode( "abcde" ).getBranchData().getConfidence( 0 ).getValue(), 1 ) ) { + n = n.getNextExternalNode(); + if ( !n.getNextExternalNode().getName().equals( "D" ) ) { return false; } - } - catch ( final Exception e ) { - e.printStackTrace(); - return false; - } - return true; - } - - private static boolean testCopyOfNodeData() { - try { - final PhylogenyNode n1 = PhylogenyNode - .createInstanceFromNhxString( "n5:0.1[&&NHX:S=Ecoli:E=1.1.1.1:D=Y:Co=Y:B=56:T=1:O=22:SO=33:SN=44:W=2:C=10.20.30:XN=S=tag1=value1=unit1]" ); - final PhylogenyNode n2 = n1.copyNodeData(); - if ( !n1.toNewHampshireX().equals( n2.toNewHampshireX() ) ) { + t3.deleteSubtree( t3.getNode( "A" ), true ); + if ( t3.getNumberOfExternalNodes() != 2 ) { return false; } - } - catch ( final Exception e ) { - e.printStackTrace(); - return false; - } - return true; - } - - private static boolean testTreeCopy() { - try { - final String str_0 = "((((a,b),c),d)[&&NHX:S=lizards],e[&&NHX:S=reptiles])r[&&NHX:S=animals]"; - final Phylogeny t0 = Phylogeny.createInstanceFromNhxString( str_0 ); - final Phylogeny t1 = t0.copy(); - if ( !t1.toNewHampshireX().equals( t0.toNewHampshireX() ) ) { + n = t3.getNode( "C" ); + if ( !n.getNextExternalNode().getName().equals( "D" ) ) { return false; } - if ( !t1.toNewHampshireX().equals( str_0 ) ) { + t3.deleteSubtree( t3.getNode( "C" ), true ); + if ( t3.getNumberOfExternalNodes() != 1 ) { return false; } - t0.deleteSubtree( t0.getNode( "c" ), true ); - t0.deleteSubtree( t0.getNode( "a" ), true ); - t0.getRoot().getNodeData().getTaxonomy().setScientificName( "metazoa" ); - t0.getNode( "b" ).setName( "Bee" ); - if ( !t0.toNewHampshireX().equals( "((Bee,d)[&&NHX:S=lizards],e[&&NHX:S=reptiles])r[&&NHX:S=metazoa]" ) ) { + t3.deleteSubtree( t3.getNode( "D" ), true ); + if ( t3.getNumberOfExternalNodes() != 0 ) { return false; } - if ( !t1.toNewHampshireX().equals( str_0 ) ) { + final Phylogeny t4 = factory.create( "((A,((B11,B12),B2)),(C,D))", new NHXParser() )[ 0 ]; + if ( t4.getNumberOfExternalNodes() != 6 ) { return false; } - t0.deleteSubtree( t0.getNode( "e" ), true ); - t0.deleteSubtree( t0.getNode( "Bee" ), true ); - t0.deleteSubtree( t0.getNode( "d" ), true ); - if ( !t1.toNewHampshireX().equals( str_0 ) ) { + t4.deleteSubtree( t4.getNode( "B2" ), true ); + if ( t4.getNumberOfExternalNodes() != 5 ) { return false; } - } - catch ( final Exception e ) { - e.printStackTrace(); - return false; - } - return true; - } - - private static boolean testCreateBalancedPhylogeny() { - try { - final Phylogeny p0 = DevelopmentTools.createBalancedPhylogeny( 6, 5 ); - if ( p0.getRoot().getNumberOfDescendants() != 5 ) { + String s = w.toNewHampshire( t4, true ).toString(); + if ( !s.equals( "((A,(B11,B12)),(C,D));" ) ) { return false; } - if ( p0.getNumberOfExternalNodes() != 15625 ) { + t4.deleteSubtree( t4.getNode( "B11" ), true ); + if ( t4.getNumberOfExternalNodes() != 4 ) { return false; } - final Phylogeny p1 = DevelopmentTools.createBalancedPhylogeny( 2, 10 ); - if ( p1.getRoot().getNumberOfDescendants() != 10 ) { + t4.deleteSubtree( t4.getNode( "C" ), true ); + if ( t4.getNumberOfExternalNodes() != 3 ) { return false; } - if ( p1.getNumberOfExternalNodes() != 100 ) { + n = t4.getNode( "A" ); + n = n.getNextExternalNode(); + if ( !n.getName().equals( "B12" ) ) { return false; } - } - catch ( final Exception e ) { - e.printStackTrace(); - return false; - } - return true; - } - - private static boolean testCreateUriForSeqWeb() { - try { - final PhylogenyNode n = new PhylogenyNode(); - n.setName( "tr|B3RJ64" ); - if ( !TreePanelUtil.createUriForSeqWeb( n, null, null ).equals( ForesterUtil.UNIPROT_KB + "B3RJ64" ) ) { + n = n.getNextExternalNode(); + if ( !n.getName().equals( "D" ) ) { return false; } - n.setName( "B0LM41_HUMAN" ); - if ( !TreePanelUtil.createUriForSeqWeb( n, null, null ).equals( ForesterUtil.UNIPROT_KB + "B0LM41_HUMAN" ) ) { + s = w.toNewHampshire( t4, true ).toString(); + if ( !s.equals( "((A,B12),D);" ) ) { return false; } - n.setName( "NP_001025424" ); - if ( !TreePanelUtil.createUriForSeqWeb( n, null, null ).equals( ForesterUtil.NCBI_PROTEIN + "NP_001025424" ) ) { + final Phylogeny t5 = factory.create( "((A,((B11,B12),B2)),(C,D))", new NHXParser() )[ 0 ]; + t5.deleteSubtree( t5.getNode( "A" ), true ); + if ( t5.getNumberOfExternalNodes() != 5 ) { return false; } - n.setName( "_NM_001030253-" ); - if ( !TreePanelUtil.createUriForSeqWeb( n, null, null ).equals( ForesterUtil.NCBI_NUCCORE + "NM_001030253" ) ) { + s = w.toNewHampshire( t5, true ).toString(); + if ( !s.equals( "(((B11,B12),B2),(C,D));" ) ) { return false; } - n.setName( "XM_002122186" ); - if ( !TreePanelUtil.createUriForSeqWeb( n, null, null ).equals( ForesterUtil.NCBI_NUCCORE + "XM_002122186" ) ) { + final Phylogeny t6 = factory.create( "((A,((B11,B12),B2)),(C,D))", new NHXParser() )[ 0 ]; + t6.deleteSubtree( t6.getNode( "B11" ), true ); + if ( t6.getNumberOfExternalNodes() != 5 ) { return false; } - n.setName( "dgh_AAA34956_gdg" ); - if ( !TreePanelUtil.createUriForSeqWeb( n, null, null ).equals( ForesterUtil.NCBI_PROTEIN + "AAA34956" ) ) { + s = w.toNewHampshire( t6, false ).toString(); + if ( !s.equals( "((A,(B12,B2)),(C,D));" ) ) { return false; } - n.setName( "AAA34956" ); - if ( !TreePanelUtil.createUriForSeqWeb( n, null, null ).equals( ForesterUtil.NCBI_PROTEIN + "AAA34956" ) ) { + final Phylogeny t7 = factory.create( "((A,((B11,B12),B2)),(C,D))", new NHXParser() )[ 0 ]; + t7.deleteSubtree( t7.getNode( "B12" ), true ); + if ( t7.getNumberOfExternalNodes() != 5 ) { return false; } - n.setName( "GI:394892" ); - if ( !TreePanelUtil.createUriForSeqWeb( n, null, null ).equals( ForesterUtil.NCBI_GI + "394892" ) ) { - System.out.println( TreePanelUtil.createUriForSeqWeb( n, null, null ) ); + s = w.toNewHampshire( t7, true ).toString(); + if ( !s.equals( "((A,(B11,B2)),(C,D));" ) ) { return false; } - n.setName( "gi_394892" ); - if ( !TreePanelUtil.createUriForSeqWeb( n, null, null ).equals( ForesterUtil.NCBI_GI + "394892" ) ) { - System.out.println( TreePanelUtil.createUriForSeqWeb( n, null, null ) ); + final Phylogeny t8 = factory.create( "((A,((B11,B12),B2)),(C,D))", new NHXParser() )[ 0 ]; + t8.deleteSubtree( t8.getNode( "B2" ), true ); + if ( t8.getNumberOfExternalNodes() != 5 ) { return false; } - n.setName( "gi6335_gi_394892_56635_Gi_43" ); - if ( !TreePanelUtil.createUriForSeqWeb( n, null, null ).equals( ForesterUtil.NCBI_GI + "394892" ) ) { - System.out.println( TreePanelUtil.createUriForSeqWeb( n, null, null ) ); + s = w.toNewHampshire( t8, false ).toString(); + if ( !s.equals( "((A,(B11,B12)),(C,D));" ) ) { return false; } - n.setName( "P12345" ); - if ( !TreePanelUtil.createUriForSeqWeb( n, null, null ).equals( ForesterUtil.UNIPROT_KB + "P12345" ) ) { - System.out.println( TreePanelUtil.createUriForSeqWeb( n, null, null ) ); + final Phylogeny t9 = factory.create( "((A,((B11,B12),B2)),(C,D))", new NHXParser() )[ 0 ]; + t9.deleteSubtree( t9.getNode( "C" ), true ); + if ( t9.getNumberOfExternalNodes() != 5 ) { return false; } - n.setName( "gi_fdgjmn-3jk5-243 mnefmn fg023-0 P12345 4395jtmnsrg02345m1ggi92450jrg890j4t0j240" ); - if ( !TreePanelUtil.createUriForSeqWeb( n, null, null ).equals( ForesterUtil.UNIPROT_KB + "P12345" ) ) { - System.out.println( TreePanelUtil.createUriForSeqWeb( n, null, null ) ); + s = w.toNewHampshire( t9, true ).toString(); + if ( !s.equals( "((A,((B11,B12),B2)),D);" ) ) { return false; } - } - catch ( final Exception e ) { - e.printStackTrace( System.out ); - return false; - } - return true; - } - - private static boolean testDataObjects() { - try { - final Confidence s0 = new Confidence(); - final Confidence s1 = new Confidence(); - if ( !s0.isEqual( s1 ) ) { + final Phylogeny t10 = factory.create( "((A,((B11,B12),B2)),(C,D))", new NHXParser() )[ 0 ]; + t10.deleteSubtree( t10.getNode( "D" ), true ); + if ( t10.getNumberOfExternalNodes() != 5 ) { return false; } - final Confidence s2 = new Confidence( 0.23, "bootstrap" ); - final Confidence s3 = new Confidence( 0.23, "bootstrap" ); - if ( s2.isEqual( s1 ) ) { + s = w.toNewHampshire( t10, true ).toString(); + if ( !s.equals( "((A,((B11,B12),B2)),C);" ) ) { return false; } - if ( !s2.isEqual( s3 ) ) { + final Phylogeny t11 = factory.create( "(A,B,C)", new NHXParser() )[ 0 ]; + t11.deleteSubtree( t11.getNode( "A" ), true ); + if ( t11.getNumberOfExternalNodes() != 2 ) { return false; } - final Confidence s4 = ( Confidence ) s3.copy(); - if ( !s4.isEqual( s3 ) ) { + s = w.toNewHampshire( t11, true ).toString(); + if ( !s.equals( "(B,C);" ) ) { return false; } - s3.asSimpleText(); - s3.asText(); - // Taxonomy - // ---------- - final Taxonomy t1 = new Taxonomy(); - final Taxonomy t2 = new Taxonomy(); - final Taxonomy t3 = new Taxonomy(); - final Taxonomy t4 = new Taxonomy(); - final Taxonomy t5 = new Taxonomy(); - t1.setIdentifier( new Identifier( "ecoli" ) ); - t1.setTaxonomyCode( "ECOLI" ); - t1.setScientificName( "E. coli" ); - t1.setCommonName( "coli" ); - final Taxonomy t0 = ( Taxonomy ) t1.copy(); - if ( !t1.isEqual( t0 ) ) { + t11.deleteSubtree( t11.getNode( "C" ), true ); + if ( t11.getNumberOfExternalNodes() != 1 ) { return false; } - t2.setIdentifier( new Identifier( "ecoli" ) ); - t2.setTaxonomyCode( "OTHER" ); - t2.setScientificName( "what" ); - t2.setCommonName( "something" ); - if ( !t1.isEqual( t2 ) ) { + s = w.toNewHampshire( t11, false ).toString(); + if ( !s.equals( "B;" ) ) { return false; } - t2.setIdentifier( new Identifier( "nemve" ) ); - if ( t1.isEqual( t2 ) ) { + final Phylogeny t12 = factory.create( "((A1,A2,A3),(B1,B2,B3),(C1,C2,C3))", new NHXParser() )[ 0 ]; + t12.deleteSubtree( t12.getNode( "B2" ), true ); + if ( t12.getNumberOfExternalNodes() != 8 ) { return false; } - t1.setIdentifier( null ); - t3.setTaxonomyCode( "ECOLI" ); - t3.setScientificName( "what" ); - t3.setCommonName( "something" ); - if ( !t1.isEqual( t3 ) ) { + s = w.toNewHampshire( t12, true ).toString(); + if ( !s.equals( "((A1,A2,A3),(B1,B3),(C1,C2,C3));" ) ) { return false; } - t1.setIdentifier( null ); - t1.setTaxonomyCode( "" ); - t4.setScientificName( "E. ColI" ); - t4.setCommonName( "something" ); - if ( !t1.isEqual( t4 ) ) { + t12.deleteSubtree( t12.getNode( "B3" ), true ); + if ( t12.getNumberOfExternalNodes() != 7 ) { return false; } - t4.setScientificName( "B. subtilis" ); - t4.setCommonName( "something" ); - if ( t1.isEqual( t4 ) ) { + s = w.toNewHampshire( t12, true ).toString(); + if ( !s.equals( "((A1,A2,A3),B1,(C1,C2,C3));" ) ) { return false; } - t1.setIdentifier( null ); - t1.setTaxonomyCode( "" ); - t1.setScientificName( "" ); - t5.setCommonName( "COLI" ); - if ( !t1.isEqual( t5 ) ) { + t12.deleteSubtree( t12.getNode( "C3" ), true ); + if ( t12.getNumberOfExternalNodes() != 6 ) { return false; } - t5.setCommonName( "vibrio" ); - if ( t1.isEqual( t5 ) ) { + s = w.toNewHampshire( t12, true ).toString(); + if ( !s.equals( "((A1,A2,A3),B1,(C1,C2));" ) ) { return false; } - // Identifier - // ---------- - final Identifier id0 = new Identifier( "123", "pfam" ); - final Identifier id1 = ( Identifier ) id0.copy(); - if ( !id1.isEqual( id1 ) ) { + t12.deleteSubtree( t12.getNode( "A1" ), true ); + if ( t12.getNumberOfExternalNodes() != 5 ) { return false; } - if ( !id1.isEqual( id0 ) ) { + s = w.toNewHampshire( t12, true ).toString(); + if ( !s.equals( "((A2,A3),B1,(C1,C2));" ) ) { return false; } - if ( !id0.isEqual( id1 ) ) { + t12.deleteSubtree( t12.getNode( "B1" ), true ); + if ( t12.getNumberOfExternalNodes() != 4 ) { return false; } - id1.asSimpleText(); - id1.asText(); - // ProteinDomain - // --------------- - final ProteinDomain pd0 = new ProteinDomain( "abc", 100, 200 ); - final ProteinDomain pd1 = ( ProteinDomain ) pd0.copy(); - if ( !pd1.isEqual( pd1 ) ) { + s = w.toNewHampshire( t12, true ).toString(); + if ( !s.equals( "((A2,A3),(C1,C2));" ) ) { return false; } - if ( !pd1.isEqual( pd0 ) ) { + t12.deleteSubtree( t12.getNode( "A3" ), true ); + if ( t12.getNumberOfExternalNodes() != 3 ) { return false; } - pd1.asSimpleText(); - pd1.asText(); - final ProteinDomain pd2 = new ProteinDomain( pd0.getName(), pd0.getFrom(), pd0.getTo(), "id" ); - final ProteinDomain pd3 = ( ProteinDomain ) pd2.copy(); - if ( !pd3.isEqual( pd3 ) ) { + s = w.toNewHampshire( t12, true ).toString(); + if ( !s.equals( "(A2,(C1,C2));" ) ) { return false; } - if ( !pd2.isEqual( pd3 ) ) { + t12.deleteSubtree( t12.getNode( "A2" ), true ); + if ( t12.getNumberOfExternalNodes() != 2 ) { return false; } - if ( !pd0.isEqual( pd3 ) ) { + s = w.toNewHampshire( t12, true ).toString(); + if ( !s.equals( "(C1,C2);" ) ) { return false; } - pd3.asSimpleText(); - pd3.asText(); - // DomainArchitecture - // ------------------ - final ProteinDomain d0 = new ProteinDomain( "domain0", 10, 20 ); - final ProteinDomain d1 = new ProteinDomain( "domain1", 30, 40 ); - final ProteinDomain d2 = new ProteinDomain( "domain2", 50, 60 ); - final ProteinDomain d3 = new ProteinDomain( "domain3", 70, 80 ); - final ProteinDomain d4 = new ProteinDomain( "domain4", 90, 100 ); - final ArrayList domains0 = new ArrayList(); - domains0.add( d2 ); - domains0.add( d0 ); - domains0.add( d3 ); - domains0.add( d1 ); - final DomainArchitecture ds0 = new DomainArchitecture( domains0, 110 ); - if ( ds0.getNumberOfDomains() != 4 ) { + final Phylogeny t13 = factory.create( "(A,B,C,(D:1.0,E:2.0):3.0)", new NHXParser() )[ 0 ]; + t13.deleteSubtree( t13.getNode( "D" ), true ); + if ( t13.getNumberOfExternalNodes() != 4 ) { return false; } - final DomainArchitecture ds1 = ( DomainArchitecture ) ds0.copy(); - if ( !ds0.isEqual( ds0 ) ) { + s = w.toNewHampshire( t13, true ).toString(); + if ( !s.equals( "(A,B,C,E:5.0);" ) ) { return false; } - if ( !ds0.isEqual( ds1 ) ) { + final Phylogeny t14 = factory.create( "((A,B,C,(D:0.1,E:0.4):1.0),F)", new NHXParser() )[ 0 ]; + t14.deleteSubtree( t14.getNode( "E" ), true ); + if ( t14.getNumberOfExternalNodes() != 5 ) { return false; } - if ( ds1.getNumberOfDomains() != 4 ) { + s = w.toNewHampshire( t14, true ).toString(); + if ( !s.equals( "((A,B,C,D:1.1),F);" ) ) { return false; } - final ArrayList domains1 = new ArrayList(); - domains1.add( d1 ); - domains1.add( d2 ); - domains1.add( d4 ); - domains1.add( d0 ); - final DomainArchitecture ds2 = new DomainArchitecture( domains1, 200 ); - if ( ds0.isEqual( ds2 ) ) { + final Phylogeny t15 = factory.create( "((A1,A2,A3,A4),(B1,B2,B3,B4),(C1,C2,C3,C4))", new NHXParser() )[ 0 ]; + t15.deleteSubtree( t15.getNode( "B2" ), true ); + if ( t15.getNumberOfExternalNodes() != 11 ) { return false; } - ds1.asSimpleText(); - ds1.asText(); - ds1.toNHX(); - final DomainArchitecture ds3 = new DomainArchitecture( "120>30>40>0.9>b>50>60>0.4>c>10>20>0.1>a" ); - if ( !ds3.toNHX().toString().equals( ":DS=120>10>20>0.1>a>30>40>0.9>b>50>60>0.4>c" ) ) { - System.out.println( ds3.toNHX() ); + t15.deleteSubtree( t15.getNode( "B1" ), true ); + if ( t15.getNumberOfExternalNodes() != 10 ) { return false; } - if ( ds3.getNumberOfDomains() != 3 ) { + t15.deleteSubtree( t15.getNode( "B3" ), true ); + if ( t15.getNumberOfExternalNodes() != 9 ) { return false; } - // Event - // ----- - final Event e1 = new Event( Event.EventType.fusion ); - if ( e1.isDuplication() ) { + t15.deleteSubtree( t15.getNode( "B4" ), true ); + if ( t15.getNumberOfExternalNodes() != 8 ) { return false; } - if ( !e1.isFusion() ) { + t15.deleteSubtree( t15.getNode( "A1" ), true ); + if ( t15.getNumberOfExternalNodes() != 7 ) { return false; } - if ( !e1.asText().toString().equals( "fusion" ) ) { + t15.deleteSubtree( t15.getNode( "C4" ), true ); + if ( t15.getNumberOfExternalNodes() != 6 ) { return false; } - if ( !e1.asSimpleText().toString().equals( "fusion" ) ) { + } + catch ( final Exception e ) { + e.printStackTrace( System.out ); + return false; + } + return true; + } + + private static boolean testDescriptiveStatistics() { + try { + final DescriptiveStatistics dss1 = new BasicDescriptiveStatistics(); + dss1.addValue( 82 ); + dss1.addValue( 78 ); + dss1.addValue( 70 ); + dss1.addValue( 58 ); + dss1.addValue( 42 ); + if ( dss1.getN() != 5 ) { return false; } - final Event e11 = new Event( Event.EventType.fusion ); - if ( !e11.isEqual( e1 ) ) { + if ( !Test.isEqual( dss1.getMin(), 42 ) ) { return false; } - if ( !e11.toNHX().toString().equals( "" ) ) { + if ( !Test.isEqual( dss1.getMax(), 82 ) ) { return false; } - final Event e2 = new Event( Event.EventType.speciation_or_duplication ); - if ( e2.isDuplication() ) { + if ( !Test.isEqual( dss1.arithmeticMean(), 66 ) ) { return false; } - if ( !e2.isSpeciationOrDuplication() ) { + if ( !Test.isEqual( dss1.sampleStandardDeviation(), 16.24807680927192 ) ) { return false; } - if ( !e2.asText().toString().equals( "speciation_or_duplication" ) ) { + if ( !Test.isEqual( dss1.median(), 70 ) ) { return false; } - if ( !e2.asSimpleText().toString().equals( "?" ) ) { + if ( !Test.isEqual( dss1.midrange(), 62 ) ) { return false; } - if ( !e2.toNHX().toString().equals( ":D=?" ) ) { + if ( !Test.isEqual( dss1.sampleVariance(), 264 ) ) { return false; } - if ( e11.isEqual( e2 ) ) { + if ( !Test.isEqual( dss1.pearsonianSkewness(), -0.7385489458759964 ) ) { return false; } - final Event e2c = ( Event ) e2.copy(); - if ( !e2c.isEqual( e2 ) ) { + if ( !Test.isEqual( dss1.coefficientOfVariation(), 0.24618298195866547 ) ) { return false; } - Event e3 = new Event( 1, 2, 3 ); - if ( e3.isDuplication() ) { + if ( !Test.isEqual( dss1.sampleStandardUnit( 66 - 16.24807680927192 ), -1.0 ) ) { return false; } - if ( e3.isSpeciation() ) { + if ( !Test.isEqual( dss1.getValue( 1 ), 78 ) ) { return false; } - if ( e3.isGeneLoss() ) { + dss1.addValue( 123 ); + if ( !Test.isEqual( dss1.arithmeticMean(), 75.5 ) ) { return false; } - if ( !e3.asText().toString().equals( "duplications [1] speciations [2] gene-losses [3]" ) ) { + if ( !Test.isEqual( dss1.getMax(), 123 ) ) { return false; } - final Event e3c = ( Event ) e3.copy(); - final Event e3cc = ( Event ) e3c.copy(); - if ( !e3c.asSimpleText().toString().equals( "D2S3L" ) ) { + if ( !Test.isEqual( dss1.standardErrorOfMean(), 11.200446419674531 ) ) { return false; } - e3 = null; - if ( !e3c.isEqual( e3cc ) ) { + final DescriptiveStatistics dss2 = new BasicDescriptiveStatistics(); + dss2.addValue( -1.85 ); + dss2.addValue( 57.5 ); + dss2.addValue( 92.78 ); + dss2.addValue( 57.78 ); + if ( !Test.isEqual( dss2.median(), 57.64 ) ) { return false; } - Event e4 = new Event( 1, 2, 3 ); - if ( !e4.asText().toString().equals( "duplications [1] speciations [2] gene-losses [3]" ) ) { + if ( !Test.isEqual( dss2.sampleStandardDeviation(), 39.266984753946495 ) ) { return false; } - if ( !e4.asSimpleText().toString().equals( "D2S3L" ) ) { + final double[] a = dss2.getDataAsDoubleArray(); + if ( !Test.isEqual( a[ 3 ], 57.78 ) ) { return false; } - final Event e4c = ( Event ) e4.copy(); - e4 = null; - final Event e4cc = ( Event ) e4c.copy(); - if ( !e4cc.asText().toString().equals( "duplications [1] speciations [2] gene-losses [3]" ) ) { + dss2.addValue( -100 ); + if ( !Test.isEqual( dss2.sampleStandardDeviation(), 75.829111296388 ) ) { return false; } - if ( !e4c.isEqual( e4cc ) ) { + if ( !Test.isEqual( dss2.sampleVariance(), 5750.05412 ) ) { return false; } - final Event e5 = new Event(); - if ( !e5.isUnassigned() ) { + final double[] ds = new double[ 14 ]; + ds[ 0 ] = 34; + ds[ 1 ] = 23; + ds[ 2 ] = 1; + ds[ 3 ] = 32; + ds[ 4 ] = 11; + ds[ 5 ] = 2; + ds[ 6 ] = 12; + ds[ 7 ] = 33; + ds[ 8 ] = 13; + ds[ 9 ] = 22; + ds[ 10 ] = 21; + ds[ 11 ] = 35; + ds[ 12 ] = 24; + ds[ 13 ] = 31; + final int[] bins = BasicDescriptiveStatistics.performBinning( ds, 0, 40, 4 ); + if ( bins.length != 4 ) { return false; } - if ( !e5.asText().toString().equals( "unassigned" ) ) { + if ( bins[ 0 ] != 2 ) { return false; } - if ( !e5.asSimpleText().toString().equals( "" ) ) { + if ( bins[ 1 ] != 3 ) { return false; } - final Event e6 = new Event( 1, 0, 0 ); - if ( !e6.asText().toString().equals( "duplication" ) ) { + if ( bins[ 2 ] != 4 ) { return false; } - if ( !e6.asSimpleText().toString().equals( "D" ) ) { + if ( bins[ 3 ] != 5 ) { return false; } - final Event e7 = new Event( 0, 1, 0 ); - if ( !e7.asText().toString().equals( "speciation" ) ) { + final double[] ds1 = new double[ 9 ]; + ds1[ 0 ] = 10.0; + ds1[ 1 ] = 19.0; + ds1[ 2 ] = 9.999; + ds1[ 3 ] = 0.0; + ds1[ 4 ] = 39.9; + ds1[ 5 ] = 39.999; + ds1[ 6 ] = 30.0; + ds1[ 7 ] = 19.999; + ds1[ 8 ] = 30.1; + final int[] bins1 = BasicDescriptiveStatistics.performBinning( ds1, 0, 40, 4 ); + if ( bins1.length != 4 ) { return false; } - if ( !e7.asSimpleText().toString().equals( "S" ) ) { + if ( bins1[ 0 ] != 2 ) { return false; } - final Event e8 = new Event( 0, 0, 1 ); - if ( !e8.asText().toString().equals( "gene-loss" ) ) { + if ( bins1[ 1 ] != 3 ) { return false; } - if ( !e8.asSimpleText().toString().equals( "L" ) ) { + if ( bins1[ 2 ] != 0 ) { return false; } - } - catch ( final Exception e ) { - e.printStackTrace( System.out ); - return false; - } - return true; - } - - private static boolean testDeletionOfExternalNodes() { - try { - final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); - final Phylogeny t0 = factory.create( "A", new NHXParser() )[ 0 ]; - final PhylogenyWriter w = new PhylogenyWriter(); - if ( t0.isEmpty() ) { + if ( bins1[ 3 ] != 4 ) { return false; } - if ( t0.getNumberOfExternalNodes() != 1 ) { + final int[] bins1_1 = BasicDescriptiveStatistics.performBinning( ds1, 0, 40, 3 ); + if ( bins1_1.length != 3 ) { return false; } - t0.deleteSubtree( t0.getNode( "A" ), false ); - if ( t0.getNumberOfExternalNodes() != 0 ) { + if ( bins1_1[ 0 ] != 3 ) { return false; } - if ( !t0.isEmpty() ) { + if ( bins1_1[ 1 ] != 2 ) { return false; } - final Phylogeny t1 = factory.create( "(A,B)r", new NHXParser() )[ 0 ]; - if ( t1.getNumberOfExternalNodes() != 2 ) { + if ( bins1_1[ 2 ] != 4 ) { return false; } - t1.deleteSubtree( t1.getNode( "A" ), false ); - if ( t1.getNumberOfExternalNodes() != 1 ) { + final int[] bins1_2 = BasicDescriptiveStatistics.performBinning( ds1, 1, 39, 3 ); + if ( bins1_2.length != 3 ) { return false; } - if ( !t1.getNode( "B" ).getName().equals( "B" ) ) { + if ( bins1_2[ 0 ] != 2 ) { return false; } - t1.deleteSubtree( t1.getNode( "B" ), false ); - if ( t1.getNumberOfExternalNodes() != 1 ) { + if ( bins1_2[ 1 ] != 2 ) { return false; } - t1.deleteSubtree( t1.getNode( "r" ), false ); - if ( !t1.isEmpty() ) { + if ( bins1_2[ 2 ] != 2 ) { return false; } - final Phylogeny t2 = factory.create( "((A,B),C)", new NHXParser() )[ 0 ]; - if ( t2.getNumberOfExternalNodes() != 3 ) { + final DescriptiveStatistics dss3 = new BasicDescriptiveStatistics(); + dss3.addValue( 1 ); + dss3.addValue( 1 ); + dss3.addValue( 1 ); + dss3.addValue( 2 ); + dss3.addValue( 3 ); + dss3.addValue( 4 ); + dss3.addValue( 5 ); + dss3.addValue( 5 ); + dss3.addValue( 5 ); + dss3.addValue( 6 ); + dss3.addValue( 7 ); + dss3.addValue( 8 ); + dss3.addValue( 9 ); + dss3.addValue( 10 ); + dss3.addValue( 10 ); + dss3.addValue( 10 ); + final AsciiHistogram histo = new AsciiHistogram( dss3 ); + histo.toStringBuffer( 10, '=', 40, 5 ); + histo.toStringBuffer( 3, 8, 10, '=', 40, 5, null ); + } + catch ( final Exception e ) { + e.printStackTrace( System.out ); + return false; + } + return true; + } + + private static boolean testDir( final String file ) { + try { + final File f = new File( file ); + if ( !f.exists() ) { return false; } - t2.deleteSubtree( t2.getNode( "B" ), false ); - if ( t2.getNumberOfExternalNodes() != 2 ) { + if ( !f.isDirectory() ) { return false; } - t2.toNewHampshireX(); - PhylogenyNode n = t2.getNode( "A" ); - if ( !n.getNextExternalNode().getName().equals( "C" ) ) { + if ( !f.canRead() ) { return false; } - t2.deleteSubtree( t2.getNode( "A" ), false ); - if ( t2.getNumberOfExternalNodes() != 2 ) { + } + catch ( final Exception e ) { + return false; + } + return true; + } + + private static boolean testEbiEntryRetrieval() { + try { + final SequenceDatabaseEntry entry = SequenceDbWsTools.obtainEntry( "AAK41263" ); + if ( !entry.getAccession().equals( "AAK41263" ) ) { + System.out.println( entry.getAccession() ); return false; } - t2.deleteSubtree( t2.getNode( "C" ), true ); - if ( t2.getNumberOfExternalNodes() != 1 ) { + if ( !entry.getTaxonomyScientificName().equals( "Sulfolobus solfataricus P2" ) ) { + System.out.println( entry.getTaxonomyScientificName() ); return false; } - final Phylogeny t3 = factory.create( "((A,B),(C,D))", new NHXParser() )[ 0 ]; - if ( t3.getNumberOfExternalNodes() != 4 ) { + if ( !entry.getSequenceName() + .equals( "Sulfolobus solfataricus P2 Glycogen debranching enzyme, hypothetical (treX-like)" ) ) { + System.out.println( entry.getSequenceName() ); return false; } - t3.deleteSubtree( t3.getNode( "B" ), true ); - if ( t3.getNumberOfExternalNodes() != 3 ) { + if ( !entry.getGeneName().equals( "treX-like" ) ) { + System.out.println( entry.getGeneName() ); return false; } - n = t3.getNode( "A" ); - if ( !n.getNextExternalNode().getName().equals( "C" ) ) { + if ( !entry.getTaxonomyIdentifier().equals( "273057" ) ) { + System.out.println( entry.getTaxonomyIdentifier() ); return false; } - n = n.getNextExternalNode(); - if ( !n.getNextExternalNode().getName().equals( "D" ) ) { + if ( !entry.getAnnotations().first().getRefValue().equals( "3.2.1.33" ) ) { + System.out.println( entry.getAnnotations().first().getRefValue() ); return false; } - t3.deleteSubtree( t3.getNode( "A" ), true ); - if ( t3.getNumberOfExternalNodes() != 2 ) { + if ( !entry.getAnnotations().first().getRefSource().equals( "EC" ) ) { + System.out.println( entry.getAnnotations().first().getRefSource() ); return false; } - n = t3.getNode( "C" ); - if ( !n.getNextExternalNode().getName().equals( "D" ) ) { + if ( entry.getCrossReferences().size() != 5 ) { return false; } - t3.deleteSubtree( t3.getNode( "C" ), true ); - if ( t3.getNumberOfExternalNodes() != 1 ) { + final SequenceDatabaseEntry entry1 = SequenceDbWsTools.obtainEntry( "ABJ16409" ); + if ( !entry1.getAccession().equals( "ABJ16409" ) ) { return false; } - t3.deleteSubtree( t3.getNode( "D" ), true ); - if ( t3.getNumberOfExternalNodes() != 0 ) { + if ( !entry1.getTaxonomyScientificName().equals( "Felis catus" ) ) { + System.out.println( entry1.getTaxonomyScientificName() ); return false; } - final Phylogeny t4 = factory.create( "((A,((B11,B12),B2)),(C,D))", new NHXParser() )[ 0 ]; - if ( t4.getNumberOfExternalNodes() != 6 ) { + if ( !entry1.getSequenceName().equals( "Felis catus (domestic cat) partial BCL2" ) ) { + System.out.println( entry1.getSequenceName() ); return false; } - t4.deleteSubtree( t4.getNode( "B2" ), true ); - if ( t4.getNumberOfExternalNodes() != 5 ) { + if ( !entry1.getTaxonomyIdentifier().equals( "9685" ) ) { + System.out.println( entry1.getTaxonomyIdentifier() ); return false; } - String s = w.toNewHampshire( t4, false, true ).toString(); - if ( !s.equals( "((A,(B11,B12)),(C,D));" ) ) { + if ( !entry1.getGeneName().equals( "BCL2" ) ) { + System.out.println( entry1.getGeneName() ); return false; } - t4.deleteSubtree( t4.getNode( "B11" ), true ); - if ( t4.getNumberOfExternalNodes() != 4 ) { + if ( entry1.getCrossReferences().size() != 6 ) { return false; } - t4.deleteSubtree( t4.getNode( "C" ), true ); - if ( t4.getNumberOfExternalNodes() != 3 ) { + final SequenceDatabaseEntry entry2 = SequenceDbWsTools.obtainEntry( "NM_184234" ); + if ( !entry2.getAccession().equals( "NM_184234" ) ) { return false; } - n = t4.getNode( "A" ); - n = n.getNextExternalNode(); - if ( !n.getName().equals( "B12" ) ) { + if ( !entry2.getTaxonomyScientificName().equals( "Homo sapiens" ) ) { + System.out.println( entry2.getTaxonomyScientificName() ); return false; } - n = n.getNextExternalNode(); - if ( !n.getName().equals( "D" ) ) { + if ( !entry2.getSequenceName() + .equals( "Homo sapiens RNA binding motif protein 39 (RBM39), transcript variant 1, mRNA" ) ) { + System.out.println( entry2.getSequenceName() ); return false; } - s = w.toNewHampshire( t4, false, true ).toString(); - if ( !s.equals( "((A,B12),D);" ) ) { + if ( !entry2.getTaxonomyIdentifier().equals( "9606" ) ) { + System.out.println( entry2.getTaxonomyIdentifier() ); return false; } - final Phylogeny t5 = factory.create( "((A,((B11,B12),B2)),(C,D))", new NHXParser() )[ 0 ]; - t5.deleteSubtree( t5.getNode( "A" ), true ); - if ( t5.getNumberOfExternalNodes() != 5 ) { + if ( !entry2.getGeneName().equals( "RBM39" ) ) { + System.out.println( entry2.getGeneName() ); return false; } - s = w.toNewHampshire( t5, false, true ).toString(); - if ( !s.equals( "(((B11,B12),B2),(C,D));" ) ) { + if ( entry2.getCrossReferences().size() != 3 ) { return false; } - final Phylogeny t6 = factory.create( "((A,((B11,B12),B2)),(C,D))", new NHXParser() )[ 0 ]; - t6.deleteSubtree( t6.getNode( "B11" ), true ); - if ( t6.getNumberOfExternalNodes() != 5 ) { + // + final SequenceDatabaseEntry entry3 = SequenceDbWsTools.obtainEntry( "HM043801" ); + if ( !entry3.getAccession().equals( "HM043801" ) ) { return false; } - s = w.toNewHampshire( t6, false, false ).toString(); - if ( !s.equals( "((A,(B12,B2)),(C,D));" ) ) { + if ( !entry3.getTaxonomyScientificName().equals( "Bursaphelenchus xylophilus" ) ) { + System.out.println( entry3.getTaxonomyScientificName() ); return false; } - final Phylogeny t7 = factory.create( "((A,((B11,B12),B2)),(C,D))", new NHXParser() )[ 0 ]; - t7.deleteSubtree( t7.getNode( "B12" ), true ); - if ( t7.getNumberOfExternalNodes() != 5 ) { + if ( !entry3.getSequenceName().equals( "Bursaphelenchus xylophilus RAF gene, complete cds" ) ) { + System.out.println( entry3.getSequenceName() ); return false; } - s = w.toNewHampshire( t7, false, true ).toString(); - if ( !s.equals( "((A,(B11,B2)),(C,D));" ) ) { + if ( !entry3.getTaxonomyIdentifier().equals( "6326" ) ) { + System.out.println( entry3.getTaxonomyIdentifier() ); return false; } - final Phylogeny t8 = factory.create( "((A,((B11,B12),B2)),(C,D))", new NHXParser() )[ 0 ]; - t8.deleteSubtree( t8.getNode( "B2" ), true ); - if ( t8.getNumberOfExternalNodes() != 5 ) { + if ( !entry3.getSequenceSymbol().equals( "RAF" ) ) { + System.out.println( entry3.getSequenceSymbol() ); return false; } - s = w.toNewHampshire( t8, false, false ).toString(); - if ( !s.equals( "((A,(B11,B12)),(C,D));" ) ) { + if ( !ForesterUtil.isEmpty( entry3.getGeneName() ) ) { return false; } - final Phylogeny t9 = factory.create( "((A,((B11,B12),B2)),(C,D))", new NHXParser() )[ 0 ]; - t9.deleteSubtree( t9.getNode( "C" ), true ); - if ( t9.getNumberOfExternalNodes() != 5 ) { + if ( entry3.getCrossReferences().size() != 8 ) { return false; } - s = w.toNewHampshire( t9, false, true ).toString(); - if ( !s.equals( "((A,((B11,B12),B2)),D);" ) ) { + final SequenceDatabaseEntry entry4 = SequenceDbWsTools.obtainEntry( "AAA36557.1" ); + if ( !entry4.getAccession().equals( "AAA36557" ) ) { return false; } - final Phylogeny t10 = factory.create( "((A,((B11,B12),B2)),(C,D))", new NHXParser() )[ 0 ]; - t10.deleteSubtree( t10.getNode( "D" ), true ); - if ( t10.getNumberOfExternalNodes() != 5 ) { + if ( !entry4.getTaxonomyScientificName().equals( "Homo sapiens" ) ) { + System.out.println( entry4.getTaxonomyScientificName() ); return false; } - s = w.toNewHampshire( t10, false, true ).toString(); - if ( !s.equals( "((A,((B11,B12),B2)),C);" ) ) { + if ( !entry4.getSequenceName().equals( "Homo sapiens (human) ras protein" ) ) { + System.out.println( entry4.getSequenceName() ); return false; } - final Phylogeny t11 = factory.create( "(A,B,C)", new NHXParser() )[ 0 ]; - t11.deleteSubtree( t11.getNode( "A" ), true ); - if ( t11.getNumberOfExternalNodes() != 2 ) { + if ( !entry4.getTaxonomyIdentifier().equals( "9606" ) ) { + System.out.println( entry4.getTaxonomyIdentifier() ); return false; } - s = w.toNewHampshire( t11, false, true ).toString(); - if ( !s.equals( "(B,C);" ) ) { + if ( !entry4.getGeneName().equals( "ras" ) ) { + System.out.println( entry4.getGeneName() ); return false; } - t11.deleteSubtree( t11.getNode( "C" ), true ); - if ( t11.getNumberOfExternalNodes() != 1 ) { + // if ( !entry4.getChromosome().equals( "ras" ) ) { + // System.out.println( entry4.getChromosome() ); + // return false; + // } + // if ( !entry4.getMap().equals( "ras" ) ) { + // System.out.println( entry4.getMap() ); + // return false; + // } + //TODO FIXME gi... + // + //TODO fails: + // final SequenceDatabaseEntry entry5 = SequenceDbWsTools.obtainEntry( "M30539" ); + // if ( !entry5.getAccession().equals( "HM043801" ) ) { + // return false; + // } + final SequenceDatabaseEntry entry5 = SequenceDbWsTools.obtainEntry( "AAZ45343.1" ); + if ( !entry5.getAccession().equals( "AAZ45343" ) ) { return false; } - s = w.toNewHampshire( t11, false, false ).toString(); - if ( !s.equals( "B;" ) ) { + if ( !entry5.getTaxonomyScientificName().equals( "Dechloromonas aromatica RCB" ) ) { + System.out.println( entry5.getTaxonomyScientificName() ); return false; } - final Phylogeny t12 = factory.create( "((A1,A2,A3),(B1,B2,B3),(C1,C2,C3))", new NHXParser() )[ 0 ]; - t12.deleteSubtree( t12.getNode( "B2" ), true ); - if ( t12.getNumberOfExternalNodes() != 8 ) { + if ( !entry5.getSequenceName().equals( "Dechloromonas aromatica RCB 1,4-alpha-glucan branching enzyme" ) ) { + System.out.println( entry5.getSequenceName() ); return false; } - s = w.toNewHampshire( t12, false, true ).toString(); - if ( !s.equals( "((A1,A2,A3),(B1,B3),(C1,C2,C3));" ) ) { + if ( !entry5.getTaxonomyIdentifier().equals( "159087" ) ) { + System.out.println( entry5.getTaxonomyIdentifier() ); return false; } - t12.deleteSubtree( t12.getNode( "B3" ), true ); - if ( t12.getNumberOfExternalNodes() != 7 ) { + } + catch ( final IOException e ) { + System.out.println(); + System.out.println( "the following might be due to absence internet connection:" ); + e.printStackTrace( System.out ); + return true; + } + catch ( final Exception e ) { + e.printStackTrace(); + return false; + } + return true; + } + + private static boolean testExternalNodeRelatedMethods() { + try { + final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); + final Phylogeny t1 = factory.create( "((A,B),(C,D))", new NHXParser() )[ 0 ]; + PhylogenyNode n = t1.getNode( "A" ); + n = n.getNextExternalNode(); + if ( !n.getName().equals( "B" ) ) { return false; } - s = w.toNewHampshire( t12, false, true ).toString(); - if ( !s.equals( "((A1,A2,A3),B1,(C1,C2,C3));" ) ) { + n = n.getNextExternalNode(); + if ( !n.getName().equals( "C" ) ) { return false; } - t12.deleteSubtree( t12.getNode( "C3" ), true ); - if ( t12.getNumberOfExternalNodes() != 6 ) { + n = n.getNextExternalNode(); + if ( !n.getName().equals( "D" ) ) { return false; } - s = w.toNewHampshire( t12, false, true ).toString(); - if ( !s.equals( "((A1,A2,A3),B1,(C1,C2));" ) ) { - return false; + n = t1.getNode( "B" ); + while ( !n.isLastExternalNode() ) { + n = n.getNextExternalNode(); } - t12.deleteSubtree( t12.getNode( "A1" ), true ); - if ( t12.getNumberOfExternalNodes() != 5 ) { + final Phylogeny t2 = factory.create( "(((A,B),C),D)", new NHXParser() )[ 0 ]; + n = t2.getNode( "A" ); + n = n.getNextExternalNode(); + if ( !n.getName().equals( "B" ) ) { return false; } - s = w.toNewHampshire( t12, false, true ).toString(); - if ( !s.equals( "((A2,A3),B1,(C1,C2));" ) ) { + n = n.getNextExternalNode(); + if ( !n.getName().equals( "C" ) ) { return false; } - t12.deleteSubtree( t12.getNode( "B1" ), true ); - if ( t12.getNumberOfExternalNodes() != 4 ) { + n = n.getNextExternalNode(); + if ( !n.getName().equals( "D" ) ) { return false; } - s = w.toNewHampshire( t12, false, true ).toString(); - if ( !s.equals( "((A2,A3),(C1,C2));" ) ) { - return false; + n = t2.getNode( "B" ); + while ( !n.isLastExternalNode() ) { + n = n.getNextExternalNode(); } - t12.deleteSubtree( t12.getNode( "A3" ), true ); - if ( t12.getNumberOfExternalNodes() != 3 ) { + final Phylogeny t3 = factory.create( "(((A,B),(C,D)),((E,F),(G,H)))", new NHXParser() )[ 0 ]; + n = t3.getNode( "A" ); + n = n.getNextExternalNode(); + if ( !n.getName().equals( "B" ) ) { return false; } - s = w.toNewHampshire( t12, false, true ).toString(); - if ( !s.equals( "(A2,(C1,C2));" ) ) { + n = n.getNextExternalNode(); + if ( !n.getName().equals( "C" ) ) { return false; } - t12.deleteSubtree( t12.getNode( "A2" ), true ); - if ( t12.getNumberOfExternalNodes() != 2 ) { + n = n.getNextExternalNode(); + if ( !n.getName().equals( "D" ) ) { return false; } - s = w.toNewHampshire( t12, false, true ).toString(); - if ( !s.equals( "(C1,C2);" ) ) { + n = n.getNextExternalNode(); + if ( !n.getName().equals( "E" ) ) { return false; } - final Phylogeny t13 = factory.create( "(A,B,C,(D:1.0,E:2.0):3.0)", new NHXParser() )[ 0 ]; - t13.deleteSubtree( t13.getNode( "D" ), true ); - if ( t13.getNumberOfExternalNodes() != 4 ) { + n = n.getNextExternalNode(); + if ( !n.getName().equals( "F" ) ) { return false; } - s = w.toNewHampshire( t13, false, true ).toString(); - if ( !s.equals( "(A,B,C,E:5.0);" ) ) { + n = n.getNextExternalNode(); + if ( !n.getName().equals( "G" ) ) { return false; } - final Phylogeny t14 = factory.create( "((A,B,C,(D:0.1,E:0.4):1.0),F)", new NHXParser() )[ 0 ]; - t14.deleteSubtree( t14.getNode( "E" ), true ); - if ( t14.getNumberOfExternalNodes() != 5 ) { + n = n.getNextExternalNode(); + if ( !n.getName().equals( "H" ) ) { return false; } - s = w.toNewHampshire( t14, false, true ).toString(); - if ( !s.equals( "((A,B,C,D:1.1),F);" ) ) { + n = t3.getNode( "B" ); + while ( !n.isLastExternalNode() ) { + n = n.getNextExternalNode(); + } + final Phylogeny t4 = factory.create( "((A,B),(C,D))", new NHXParser() )[ 0 ]; + for( final PhylogenyNodeIterator iter = t4.iteratorExternalForward(); iter.hasNext(); ) { + final PhylogenyNode node = iter.next(); + } + final Phylogeny t5 = factory.create( "(((A,B),(C,D)),((E,F),(G,H)))", new NHXParser() )[ 0 ]; + for( final PhylogenyNodeIterator iter = t5.iteratorExternalForward(); iter.hasNext(); ) { + final PhylogenyNode node = iter.next(); + } + final Phylogeny t6 = factory.create( "((((((A))),(((B))),((C)),((((D)))),E)),((F)))", new NHXParser() )[ 0 ]; + final PhylogenyNodeIterator iter = t6.iteratorExternalForward(); + if ( !iter.next().getName().equals( "A" ) ) { return false; } - final Phylogeny t15 = factory.create( "((A1,A2,A3,A4),(B1,B2,B3,B4),(C1,C2,C3,C4))", new NHXParser() )[ 0 ]; - t15.deleteSubtree( t15.getNode( "B2" ), true ); - if ( t15.getNumberOfExternalNodes() != 11 ) { + if ( !iter.next().getName().equals( "B" ) ) { return false; } - t15.deleteSubtree( t15.getNode( "B1" ), true ); - if ( t15.getNumberOfExternalNodes() != 10 ) { + if ( !iter.next().getName().equals( "C" ) ) { return false; } - t15.deleteSubtree( t15.getNode( "B3" ), true ); - if ( t15.getNumberOfExternalNodes() != 9 ) { + if ( !iter.next().getName().equals( "D" ) ) { return false; } - t15.deleteSubtree( t15.getNode( "B4" ), true ); - if ( t15.getNumberOfExternalNodes() != 8 ) { + if ( !iter.next().getName().equals( "E" ) ) { return false; } - t15.deleteSubtree( t15.getNode( "A1" ), true ); - if ( t15.getNumberOfExternalNodes() != 7 ) { + if ( !iter.next().getName().equals( "F" ) ) { return false; } - t15.deleteSubtree( t15.getNode( "C4" ), true ); - if ( t15.getNumberOfExternalNodes() != 6 ) { + if ( iter.hasNext() ) { return false; } } @@ -3504,364 +4266,238 @@ public final class Test { return true; } - private static boolean testDescriptiveStatistics() { + private static boolean testExtractSNFromNodeName() { try { - final DescriptiveStatistics dss1 = new BasicDescriptiveStatistics(); - dss1.addValue( 82 ); - dss1.addValue( 78 ); - dss1.addValue( 70 ); - dss1.addValue( 58 ); - dss1.addValue( 42 ); - if ( dss1.getN() != 5 ) { - return false; - } - if ( !Test.isEqual( dss1.getMin(), 42 ) ) { - return false; - } - if ( !Test.isEqual( dss1.getMax(), 82 ) ) { - return false; - } - if ( !Test.isEqual( dss1.arithmeticMean(), 66 ) ) { - return false; - } - if ( !Test.isEqual( dss1.sampleStandardDeviation(), 16.24807680927192 ) ) { - return false; - } - if ( !Test.isEqual( dss1.median(), 70 ) ) { - return false; - } - if ( !Test.isEqual( dss1.midrange(), 62 ) ) { - return false; - } - if ( !Test.isEqual( dss1.sampleVariance(), 264 ) ) { + if ( !ParserUtils.extractScientificNameFromNodeName( "BCDO2_Mus_musculus" ).equals( "Mus musculus" ) ) { return false; } - if ( !Test.isEqual( dss1.pearsonianSkewness(), -0.7385489458759964 ) ) { + if ( !ParserUtils.extractScientificNameFromNodeName( "BCDO2 Mus musculus" ).equals( "Mus musculus" ) ) { return false; } - if ( !Test.isEqual( dss1.coefficientOfVariation(), 0.24618298195866547 ) ) { + if ( !ParserUtils.extractScientificNameFromNodeName( "Mus_musculus_BCDO2" ).equals( "Mus musculus" ) ) { return false; } - if ( !Test.isEqual( dss1.sampleStandardUnit( 66 - 16.24807680927192 ), -1.0 ) ) { + if ( !ParserUtils.extractScientificNameFromNodeName( "Mus musculus musculus BCDO2" ) + .equals( "Mus musculus musculus" ) ) { return false; } - if ( !Test.isEqual( dss1.getValue( 1 ), 78 ) ) { + if ( !ParserUtils.extractScientificNameFromNodeName( "Mus_musculus_musculus_BCDO2" ) + .equals( "Mus musculus musculus" ) ) { return false; } - dss1.addValue( 123 ); - if ( !Test.isEqual( dss1.arithmeticMean(), 75.5 ) ) { + if ( !ParserUtils.extractScientificNameFromNodeName( "BCDO2 Mus musculus musculus" ) + .equals( "Mus musculus musculus" ) ) { return false; } - if ( !Test.isEqual( dss1.getMax(), 123 ) ) { + if ( !ParserUtils.extractScientificNameFromNodeName( "Bcl Mus musculus musculus" ) + .equals( "Mus musculus musculus" ) ) { return false; } - if ( !Test.isEqual( dss1.standardErrorOfMean(), 11.200446419674531 ) ) { + if ( ParserUtils.extractScientificNameFromNodeName( "vcl Mus musculus musculus" ) != null ) { return false; } - final DescriptiveStatistics dss2 = new BasicDescriptiveStatistics(); - dss2.addValue( -1.85 ); - dss2.addValue( 57.5 ); - dss2.addValue( 92.78 ); - dss2.addValue( 57.78 ); - if ( !Test.isEqual( dss2.median(), 57.64 ) ) { + if ( !ParserUtils.extractScientificNameFromNodeName( "could_be_anything_Mus_musculus_musculus_BCDO2" ) + .equals( "Mus musculus musculus" ) ) { return false; } - if ( !Test.isEqual( dss2.sampleStandardDeviation(), 39.266984753946495 ) ) { + if ( !ParserUtils.extractScientificNameFromNodeName( "could_be_anything_Mus_musculus_musculus_Musculus" ) + .equals( "Mus musculus musculus" ) ) { return false; } - final double[] a = dss2.getDataAsDoubleArray(); - if ( !Test.isEqual( a[ 3 ], 57.78 ) ) { + if ( ParserUtils.extractScientificNameFromNodeName( "could_be_anything_Mus_musculus_musculus_musculus" ) != null ) { return false; } - dss2.addValue( -100 ); - if ( !Test.isEqual( dss2.sampleStandardDeviation(), 75.829111296388 ) ) { + if ( ParserUtils.extractScientificNameFromNodeName( "musculus" ) != null ) { return false; } - if ( !Test.isEqual( dss2.sampleVariance(), 5750.05412 ) ) { + if ( ParserUtils.extractScientificNameFromNodeName( "mus_musculus" ) != null ) { return false; } - final double[] ds = new double[ 14 ]; - ds[ 0 ] = 34; - ds[ 1 ] = 23; - ds[ 2 ] = 1; - ds[ 3 ] = 32; - ds[ 4 ] = 11; - ds[ 5 ] = 2; - ds[ 6 ] = 12; - ds[ 7 ] = 33; - ds[ 8 ] = 13; - ds[ 9 ] = 22; - ds[ 10 ] = 21; - ds[ 11 ] = 35; - ds[ 12 ] = 24; - ds[ 13 ] = 31; - final int[] bins = BasicDescriptiveStatistics.performBinning( ds, 0, 40, 4 ); - if ( bins.length != 4 ) { + if ( ParserUtils.extractScientificNameFromNodeName( "mus_musculus_musculus" ) != null ) { return false; } - if ( bins[ 0 ] != 2 ) { + if ( !ParserUtils.extractScientificNameFromNodeName( "Mus_musculus_musculus_1" ) + .equals( "Mus musculus musculus" ) ) { return false; } - if ( bins[ 1 ] != 3 ) { + if ( !ParserUtils.extractScientificNameFromNodeName( "Mus_musculus_1" ).equals( "Mus musculus" ) ) { return false; } - if ( bins[ 2 ] != 4 ) { + if ( ParserUtils.extractScientificNameFromNodeName( "Mus_musculus_bcl" ) != null ) { return false; } - if ( bins[ 3 ] != 5 ) { + if ( !ParserUtils.extractScientificNameFromNodeName( "Mus_musculus_BCL" ).equals( "Mus musculus" ) ) { return false; } - final double[] ds1 = new double[ 9 ]; - ds1[ 0 ] = 10.0; - ds1[ 1 ] = 19.0; - ds1[ 2 ] = 9.999; - ds1[ 3 ] = 0.0; - ds1[ 4 ] = 39.9; - ds1[ 5 ] = 39.999; - ds1[ 6 ] = 30.0; - ds1[ 7 ] = 19.999; - ds1[ 8 ] = 30.1; - final int[] bins1 = BasicDescriptiveStatistics.performBinning( ds1, 0, 40, 4 ); - if ( bins1.length != 4 ) { + if ( ParserUtils.extractScientificNameFromNodeName( "Mus musculus bcl" ) != null ) { return false; } - if ( bins1[ 0 ] != 2 ) { + if ( !ParserUtils.extractScientificNameFromNodeName( "Mus musculus BCL" ).equals( "Mus musculus" ) ) { return false; } - if ( bins1[ 1 ] != 3 ) { + if ( !ParserUtils.extractScientificNameFromNodeName( "Mus musculus xBCL" ).equals( "Mus musculus" ) ) { return false; } - if ( bins1[ 2 ] != 0 ) { + if ( !ParserUtils.extractScientificNameFromNodeName( "Mus musculus x1" ).equals( "Mus musculus" ) ) { return false; } - if ( bins1[ 3 ] != 4 ) { + if ( !ParserUtils.extractScientificNameFromNodeName( " -XS12_Mus_musculus_12" ).equals( "Mus musculus" ) ) { return false; } - final int[] bins1_1 = BasicDescriptiveStatistics.performBinning( ds1, 0, 40, 3 ); - if ( bins1_1.length != 3 ) { + if ( !ParserUtils.extractScientificNameFromNodeName( " -1234_Mus_musculus_12 affrre e" ) + .equals( "Mus musculus" ) ) { return false; } - if ( bins1_1[ 0 ] != 3 ) { + if ( !ParserUtils.extractScientificNameFromNodeName( " -1234_Mus_musculus_12_affrre_e" ) + .equals( "Mus musculus" ) ) { return false; } - if ( bins1_1[ 1 ] != 2 ) { + if ( !ParserUtils.extractScientificNameFromNodeName( "Mus_musculus" ).equals( "Mus musculus" ) ) { return false; } - if ( bins1_1[ 2 ] != 4 ) { + if ( !ParserUtils.extractScientificNameFromNodeName( "Mus_musculus_musculus_2bcl2" ) + .equals( "Mus musculus musculus" ) ) { return false; } - final int[] bins1_2 = BasicDescriptiveStatistics.performBinning( ds1, 1, 39, 3 ); - if ( bins1_2.length != 3 ) { + if ( !ParserUtils.extractScientificNameFromNodeName( "Mus_musculus_musculus_2bcl2" ) + .equals( "Mus musculus musculus" ) ) { return false; } - if ( bins1_2[ 0 ] != 2 ) { + if ( !ParserUtils.extractScientificNameFromNodeName( "Mus_musculus_musculus_bcl2" ) + .equals( "Mus musculus musculus" ) ) { return false; } - if ( bins1_2[ 1 ] != 2 ) { + if ( !ParserUtils.extractScientificNameFromNodeName( "Mus_musculus_123" ).equals( "Mus musculus" ) ) { return false; } - if ( bins1_2[ 2 ] != 2 ) { + if ( !ParserUtils.extractScientificNameFromNodeName( "Pilostyles mexicana Mexico Breedlove 27233" ) + .equals( "Pilostyles mexicana" ) ) { return false; } - final DescriptiveStatistics dss3 = new BasicDescriptiveStatistics(); - dss3.addValue( 1 ); - dss3.addValue( 1 ); - dss3.addValue( 1 ); - dss3.addValue( 2 ); - dss3.addValue( 3 ); - dss3.addValue( 4 ); - dss3.addValue( 5 ); - dss3.addValue( 5 ); - dss3.addValue( 5 ); - dss3.addValue( 6 ); - dss3.addValue( 7 ); - dss3.addValue( 8 ); - dss3.addValue( 9 ); - dss3.addValue( 10 ); - dss3.addValue( 10 ); - dss3.addValue( 10 ); - final AsciiHistogram histo = new AsciiHistogram( dss3 ); - histo.toStringBuffer( 10, '=', 40, 5 ); - histo.toStringBuffer( 3, 8, 10, '=', 40, 5, null ); - } - catch ( final Exception e ) { - e.printStackTrace( System.out ); - return false; - } - return true; - } - - private static boolean testDir( final String file ) { - try { - final File f = new File( file ); - if ( !f.exists() ) { + if ( !ParserUtils.extractScientificNameFromNodeName( "Escherichia_coli_strain_K12/DH10B" ) + .equals( "Escherichia coli strain K12/DH10B" ) ) { return false; } - if ( !f.isDirectory() ) { + if ( !ParserUtils.extractScientificNameFromNodeName( "Escherichia_coli_str_K12/DH10B" ) + .equals( "Escherichia coli str. K12/DH10B" ) ) { return false; } - if ( !f.canRead() ) { + if ( !ParserUtils.extractScientificNameFromNodeName( "Escherichia coli str. K12/DH10B" ) + .equals( "Escherichia coli str. K12/DH10B" ) ) { return false; } - } - catch ( final Exception e ) { - return false; - } - return true; - } - - private static boolean testGenbankAccessorParsing() { - //The format for GenBank Accession numbers are: - //Nucleotide: 1 letter + 5 numerals OR 2 letters + 6 numerals - //Protein: 3 letters + 5 numerals - //http://www.ncbi.nlm.nih.gov/Sequin/acc.html - if ( !SequenceAccessionTools.parseGenbankAccessorFromString( "AY423861" ).equals( "AY423861" ) ) { - return false; - } - if ( !SequenceAccessionTools.parseGenbankAccessorFromString( ".AY423861.2" ).equals( "AY423861.2" ) ) { - return false; - } - if ( !SequenceAccessionTools.parseGenbankAccessorFromString( "345_.AY423861.24_345" ).equals( "AY423861.24" ) ) { - return false; - } - if ( SequenceAccessionTools.parseGenbankAccessorFromString( "AAY423861" ) != null ) { - return false; - } - if ( SequenceAccessionTools.parseGenbankAccessorFromString( "AY4238612" ) != null ) { - return false; - } - if ( SequenceAccessionTools.parseGenbankAccessorFromString( "AAY4238612" ) != null ) { - return false; - } - if ( SequenceAccessionTools.parseGenbankAccessorFromString( "Y423861" ) != null ) { - return false; - } - if ( !SequenceAccessionTools.parseGenbankAccessorFromString( "S12345" ).equals( "S12345" ) ) { - return false; - } - if ( !SequenceAccessionTools.parseGenbankAccessorFromString( "|S12345|" ).equals( "S12345" ) ) { - return false; - } - if ( SequenceAccessionTools.parseGenbankAccessorFromString( "|S123456" ) != null ) { - return false; - } - if ( SequenceAccessionTools.parseGenbankAccessorFromString( "ABC123456" ) != null ) { - return false; - } - if ( !SequenceAccessionTools.parseGenbankAccessorFromString( "ABC12345" ).equals( "ABC12345" ) ) { - return false; - } - if ( !SequenceAccessionTools.parseGenbankAccessorFromString( "&ABC12345&" ).equals( "ABC12345" ) ) { - return false; - } - if ( SequenceAccessionTools.parseGenbankAccessorFromString( "ABCD12345" ) != null ) { - return false; - } - return true; - } - - private static boolean testExternalNodeRelatedMethods() { - try { - final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); - final Phylogeny t1 = factory.create( "((A,B),(C,D))", new NHXParser() )[ 0 ]; - PhylogenyNode n = t1.getNode( "A" ); - n = n.getNextExternalNode(); - if ( !n.getName().equals( "B" ) ) { + if ( !ParserUtils.extractScientificNameFromNodeName( "Arabidopsis_lyrata_subsp_lyrata" ) + .equals( "Arabidopsis lyrata subsp. lyrata" ) ) { return false; } - n = n.getNextExternalNode(); - if ( !n.getName().equals( "C" ) ) { + if ( !ParserUtils.extractScientificNameFromNodeName( "Arabidopsis lyrata subsp. lyrata" ) + .equals( "Arabidopsis lyrata subsp. lyrata" ) ) { return false; } - n = n.getNextExternalNode(); - if ( !n.getName().equals( "D" ) ) { + if ( !ParserUtils.extractScientificNameFromNodeName( "Arabidopsis lyrata subsp. lyrata 395" ) + .equals( "Arabidopsis lyrata subsp. lyrata" ) ) { return false; } - n = t1.getNode( "B" ); - while ( !n.isLastExternalNode() ) { - n = n.getNextExternalNode(); + if ( !ParserUtils.extractScientificNameFromNodeName( "Arabidopsis lyrata subsp. lyrata bcl2" ) + .equals( "Arabidopsis lyrata subsp. lyrata" ) ) { + return false; } - final Phylogeny t2 = factory.create( "(((A,B),C),D)", new NHXParser() )[ 0 ]; - n = t2.getNode( "A" ); - n = n.getNextExternalNode(); - if ( !n.getName().equals( "B" ) ) { + if ( !ParserUtils.extractScientificNameFromNodeName( "Arabidopsis lyrata subsp lyrata bcl2" ) + .equals( "Arabidopsis lyrata subsp. lyrata" ) ) { return false; } - n = n.getNextExternalNode(); - if ( !n.getName().equals( "C" ) ) { + if ( !ParserUtils.extractScientificNameFromNodeName( "Arabidopsis lyrata subspecies lyrata bcl2" ) + .equals( "Arabidopsis lyrata subspecies lyrata" ) ) { return false; } - n = n.getNextExternalNode(); - if ( !n.getName().equals( "D" ) ) { + if ( !ParserUtils.extractScientificNameFromNodeName( "Verbascum sinuatum var. adenosepalum bcl2" ) + .equals( "Verbascum sinuatum var. adenosepalum" ) ) { return false; } - n = t2.getNode( "B" ); - while ( !n.isLastExternalNode() ) { - n = n.getNextExternalNode(); + if ( !ParserUtils.extractScientificNameFromNodeName( "Escherichia coli (strain K12)" ) + .equals( "Escherichia coli (strain K12)" ) ) { + return false; } - final Phylogeny t3 = factory.create( "(((A,B),(C,D)),((E,F),(G,H)))", new NHXParser() )[ 0 ]; - n = t3.getNode( "A" ); - n = n.getNextExternalNode(); - if ( !n.getName().equals( "B" ) ) { + if ( !ParserUtils.extractScientificNameFromNodeName( "Escherichia coli (strain K12) bcl2" ) + .equals( "Escherichia coli (strain K12)" ) ) { return false; } - n = n.getNextExternalNode(); - if ( !n.getName().equals( "C" ) ) { + if ( !ParserUtils.extractScientificNameFromNodeName( "Escherichia coli (str. K12)" ) + .equals( "Escherichia coli (str. K12)" ) ) { return false; } - n = n.getNextExternalNode(); - if ( !n.getName().equals( "D" ) ) { + if ( !ParserUtils.extractScientificNameFromNodeName( "Escherichia coli (str K12)" ) + .equals( "Escherichia coli (str. K12)" ) ) { return false; } - n = n.getNextExternalNode(); - if ( !n.getName().equals( "E" ) ) { + if ( !ParserUtils.extractScientificNameFromNodeName( "Escherichia coli (str. K12) bcl2" ) + .equals( "Escherichia coli (str. K12)" ) ) { return false; } - n = n.getNextExternalNode(); - if ( !n.getName().equals( "F" ) ) { + if ( !ParserUtils.extractScientificNameFromNodeName( "Escherichia coli (var K12) bcl2" ) + .equals( "Escherichia coli (var. K12)" ) ) { return false; } - n = n.getNextExternalNode(); - if ( !n.getName().equals( "G" ) ) { + if ( !ParserUtils.extractScientificNameFromNodeName( "Escherichia coli str. K-12 substr. MG1655star" ) + .equals( "Escherichia coli str. K-12 substr. MG1655star" ) ) { return false; } - n = n.getNextExternalNode(); - if ( !n.getName().equals( "H" ) ) { + if ( !ParserUtils.extractScientificNameFromNodeName( "Escherichia coli str K-12 substr MG1655star" ) + .equals( "Escherichia coli str. K-12 substr. MG1655star" ) ) { return false; } - n = t3.getNode( "B" ); - while ( !n.isLastExternalNode() ) { - n = n.getNextExternalNode(); + if ( !ParserUtils + .extractScientificNameFromNodeName( "could be anything Escherichia coli str K-12 substr MG1655star" ) + .equals( "Escherichia coli str. K-12 substr. MG1655star" ) ) { + return false; } - final Phylogeny t4 = factory.create( "((A,B),(C,D))", new NHXParser() )[ 0 ]; - for( final PhylogenyNodeIterator iter = t4.iteratorExternalForward(); iter.hasNext(); ) { - final PhylogenyNode node = iter.next(); + if ( !ParserUtils.extractScientificNameFromNodeName( "Escherichia coli str K-12 substr MG1655star gene1" ) + .equals( "Escherichia coli str. K-12 substr. MG1655star" ) ) { + return false; } - final Phylogeny t5 = factory.create( "(((A,B),(C,D)),((E,F),(G,H)))", new NHXParser() )[ 0 ]; - for( final PhylogenyNodeIterator iter = t5.iteratorExternalForward(); iter.hasNext(); ) { - final PhylogenyNode node = iter.next(); + if ( !ParserUtils + .extractScientificNameFromNodeName( "could be anything Escherichia coli str K-12 substr MG1655star GENE1" ) + .equals( "Escherichia coli str. K-12 substr. MG1655star" ) ) { + return false; } - final Phylogeny t6 = factory.create( "((((((A))),(((B))),((C)),((((D)))),E)),((F)))", new NHXParser() )[ 0 ]; - final PhylogenyNodeIterator iter = t6.iteratorExternalForward(); - if ( !iter.next().getName().equals( "A" ) ) { + if ( !ParserUtils.extractScientificNameFromNodeName( "Escherichia_coli_str_K-12_substr_MG1655star" ) + .equals( "Escherichia coli str. K-12 substr. MG1655star" ) ) { return false; } - if ( !iter.next().getName().equals( "B" ) ) { + if ( !ParserUtils.extractScientificNameFromNodeName( "Escherichia_coli_str_K-12_substr_MG1655star" ) + .equals( "Escherichia coli str. K-12 substr. MG1655star" ) ) { return false; } - if ( !iter.next().getName().equals( "C" ) ) { + if ( !ParserUtils.extractScientificNameFromNodeName( "Macrocera sp." ).equals( "Macrocera sp." ) ) { return false; } - if ( !iter.next().getName().equals( "D" ) ) { + if ( !ParserUtils.extractScientificNameFromNodeName( "Macrocera sp. 123" ).equals( "Macrocera sp." ) ) { return false; } - if ( !iter.next().getName().equals( "E" ) ) { + if ( !ParserUtils.extractScientificNameFromNodeName( "Macrocera sp. K12" ).equals( "Macrocera sp." ) ) { return false; } - if ( !iter.next().getName().equals( "F" ) ) { + if ( !ParserUtils.extractScientificNameFromNodeName( "something Macrocera sp. K12" ) + .equals( "Macrocera sp." ) ) { return false; } - if ( iter.hasNext() ) { + if ( !ParserUtils.extractScientificNameFromNodeName( "Macrocera sp" ).equals( "Macrocera sp." ) ) { + return false; + } + if ( !ParserUtils.extractScientificNameFromNodeName( "Sesamum rigidum ssp merenskyanum 07 48" ) + .equals( "Sesamum rigidum subsp. merenskyanum" ) ) { + return false; + } + if ( !ParserUtils.extractScientificNameFromNodeName( "Sesamum rigidum ssp. merenskyanum" ) + .equals( "Sesamum rigidum subsp. merenskyanum" ) ) { + return false; + } + if ( !ParserUtils.extractScientificNameFromNodeName( "Sesamum rigidum (ssp. merenskyanum)" ) + .equals( "Sesamum rigidum (subsp. merenskyanum)" ) ) { + return false; + } + if ( !ParserUtils.extractScientificNameFromNodeName( "Sesamum rigidum (ssp merenskyanum)" ) + .equals( "Sesamum rigidum (subsp. merenskyanum)" ) ) { return false; } } @@ -3872,24 +4508,34 @@ public final class Test { return true; } - private static boolean testExtractSNFromNodeName() { + private static boolean testExtractTaxonomyDataFromNodeName() { try { - if ( !ParserUtils.extractScientificNameFromNodeName( "BCDO2_Mus_musculus" ).equals( "Mus musculus" ) ) { + PhylogenyNode n = new PhylogenyNode( "tr|B1AM49|B1AM49_HUMAN" ); + if ( !ParserUtils.extractTaxonomyDataFromNodeName( n, TAXONOMY_EXTRACTION.AGGRESSIVE ).equals( "HUMAN" ) ) { return false; } - if ( !ParserUtils.extractScientificNameFromNodeName( "BCDO2_Mus_musculus_musculus" ) - .equals( "Mus musculus musculus" ) ) { + n = new PhylogenyNode( "tr|B1AM49|B1AM49_HUMAN~1-2" ); + if ( !ParserUtils.extractTaxonomyDataFromNodeName( n, TAXONOMY_EXTRACTION.AGGRESSIVE ).equals( "HUMAN" ) ) { return false; } - if ( !ParserUtils.extractScientificNameFromNodeName( "BCDO2_Mus_musculus_musculus-12" ) - .equals( "Mus musculus musculus" ) ) { + n = new PhylogenyNode( "tr|B1AM49|HNRPR_HUMAN" ); + if ( !ParserUtils.extractTaxonomyDataFromNodeName( n, TAXONOMY_EXTRACTION.AGGRESSIVE ).equals( "HUMAN" ) ) { return false; } - if ( !ParserUtils.extractScientificNameFromNodeName( " -XS12_Mus_musculus-12" ).equals( "Mus musculus" ) ) { + n = new PhylogenyNode( "tr|B1AM49|HNRPR_HUMAN|" ); + if ( !ParserUtils.extractTaxonomyDataFromNodeName( n, TAXONOMY_EXTRACTION.AGGRESSIVE ).equals( "HUMAN" ) ) { return false; } - if ( !ParserUtils.extractScientificNameFromNodeName( " -1234_Mus_musculus-12 affrre e" ) - .equals( "Mus musculus" ) ) { + n = new PhylogenyNode( "tr|B1AM49|HNRPR_HUMAN~12" ); + if ( !ParserUtils.extractTaxonomyDataFromNodeName( n, TAXONOMY_EXTRACTION.AGGRESSIVE ).equals( "HUMAN" ) ) { + return false; + } + n = new PhylogenyNode( "HNRPR_HUMAN" ); + if ( !ParserUtils.extractTaxonomyDataFromNodeName( n, TAXONOMY_EXTRACTION.AGGRESSIVE ).equals( "HUMAN" ) ) { + return false; + } + n = new PhylogenyNode( "HNRPR_HUMAN_X" ); + if ( !ParserUtils.extractTaxonomyDataFromNodeName( n, TAXONOMY_EXTRACTION.AGGRESSIVE ).equals( "HUMAN" ) ) { return false; } } @@ -4236,7 +4882,7 @@ public final class Test { if ( !msa_0.getSequenceAsString( 1 ).toString().equalsIgnoreCase( "DKXASDFXSFXFKFKSXDFKSLX" ) ) { return false; } - if ( !msa_0.getSequenceAsString( 2 ).toString().equalsIgnoreCase( "SXDFKSXLFSFPWEXPRXWXERR" ) ) { + if ( !msa_0.getSequenceAsString( 2 ).toString().equalsIgnoreCase( "SXDFKSXLFSFPWEXPROWXERR" ) ) { return false; } if ( !msa_0.getSequenceAsString( 3 ).toString().equalsIgnoreCase( "AAAAAAAAAAAAAAAAAAAAAAA" ) ) { @@ -4253,6 +4899,56 @@ public final class Test { return true; } + private static boolean testGenbankAccessorParsing() { + //The format for GenBank Accession numbers are: + //Nucleotide: 1 letter + 5 numerals OR 2 letters + 6 numerals + //Protein: 3 letters + 5 numerals + //http://www.ncbi.nlm.nih.gov/Sequin/acc.html + if ( !SequenceAccessionTools.parseGenbankAccessorFromString( "AY423861" ).equals( "AY423861" ) ) { + return false; + } + if ( !SequenceAccessionTools.parseGenbankAccessorFromString( ".AY423861.2" ).equals( "AY423861.2" ) ) { + return false; + } + if ( !SequenceAccessionTools.parseGenbankAccessorFromString( "345_.AY423861.24_345" ).equals( "AY423861.24" ) ) { + return false; + } + if ( SequenceAccessionTools.parseGenbankAccessorFromString( "AAY423861" ) != null ) { + return false; + } + if ( SequenceAccessionTools.parseGenbankAccessorFromString( "AY4238612" ) != null ) { + return false; + } + if ( SequenceAccessionTools.parseGenbankAccessorFromString( "AAY4238612" ) != null ) { + return false; + } + if ( SequenceAccessionTools.parseGenbankAccessorFromString( "Y423861" ) != null ) { + return false; + } + if ( !SequenceAccessionTools.parseGenbankAccessorFromString( "S12345" ).equals( "S12345" ) ) { + return false; + } + if ( !SequenceAccessionTools.parseGenbankAccessorFromString( "|S12345|" ).equals( "S12345" ) ) { + return false; + } + if ( SequenceAccessionTools.parseGenbankAccessorFromString( "|S123456" ) != null ) { + return false; + } + if ( SequenceAccessionTools.parseGenbankAccessorFromString( "ABC123456" ) != null ) { + return false; + } + if ( !SequenceAccessionTools.parseGenbankAccessorFromString( "ABC12345" ).equals( "ABC12345" ) ) { + return false; + } + if ( !SequenceAccessionTools.parseGenbankAccessorFromString( "&ABC12345&" ).equals( "ABC12345" ) ) { + return false; + } + if ( SequenceAccessionTools.parseGenbankAccessorFromString( "ABCD12345" ) != null ) { + return false; + } + return true; + } + private static boolean testGeneralMsaParser() { try { final String msa_str_0 = "seq1 abcd\n\nseq2 efgh\n"; @@ -4776,14 +5472,15 @@ public final class Test { private static boolean testGetLCA2() { try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); - final Phylogeny p_a = factory.create( "(a)", new NHXParser() )[ 0 ]; + // final Phylogeny p_a = factory.create( "(a)", new NHXParser() )[ 0 ]; + final Phylogeny p_a = NHXParser.parse( "(a)" )[ 0 ]; PhylogenyMethods.preOrderReId( p_a ); final PhylogenyNode p_a_1 = PhylogenyMethods.calculateLCAonTreeWithIdsInPreOrder( p_a.getNode( "a" ), p_a.getNode( "a" ) ); if ( !p_a_1.getName().equals( "a" ) ) { return false; } - final Phylogeny p_b = factory.create( "((a)b)", new NHXParser() )[ 0 ]; + final Phylogeny p_b = NHXParser.parse( "((a)b)" )[ 0 ]; PhylogenyMethods.preOrderReId( p_b ); final PhylogenyNode p_b_1 = PhylogenyMethods.calculateLCAonTreeWithIdsInPreOrder( p_b.getNode( "b" ), p_b.getNode( "a" ) ); @@ -5473,11 +6170,11 @@ public final class Test { private static boolean testMsaQualityMethod() { try { - final Sequence s0 = BasicSequence.createAaSequence( "a", "ABAXEFGHIJ" ); - final Sequence s1 = BasicSequence.createAaSequence( "b", "ABBXEFGHIJ" ); - final Sequence s2 = BasicSequence.createAaSequence( "c", "AXCXEFGHIJ" ); - final Sequence s3 = BasicSequence.createAaSequence( "d", "AXDDEFGHIJ" ); - final List l = new ArrayList(); + final MolecularSequence s0 = BasicSequence.createAaSequence( "a", "ABAXEFGHIJJE-" ); + final MolecularSequence s1 = BasicSequence.createAaSequence( "b", "ABBXEFGHIJJBB" ); + final MolecularSequence s2 = BasicSequence.createAaSequence( "c", "AXCXEFGHIJJ--" ); + final MolecularSequence s3 = BasicSequence.createAaSequence( "d", "AXDDEFGHIJ---" ); + final List l = new ArrayList(); l.add( s0 ); l.add( s1 ); l.add( s2 ); @@ -5495,6 +6192,236 @@ public final class Test { if ( !isEqual( 0.75, MsaMethods.calculateIdentityRatio( msa, 3 ) ) ) { return false; } + if ( !isEqual( 0.75, MsaMethods.calculateIdentityRatio( msa, 10 ) ) ) { + return false; + } + if ( !isEqual( 0.25, MsaMethods.calculateIdentityRatio( msa, 11 ) ) ) { + return false; + } + if ( !isEqual( 0.25, MsaMethods.calculateIdentityRatio( msa, 12 ) ) ) { + return false; + } + } + catch ( final Exception e ) { + e.printStackTrace( System.out ); + return false; + } + return true; + } + + private static boolean testMsaEntropy() { + try { + final MolecularSequence s0 = BasicSequence.createAaSequence( "a", "AAAAAAA" ); + final MolecularSequence s1 = BasicSequence.createAaSequence( "b", "AAAIACC" ); + final MolecularSequence s2 = BasicSequence.createAaSequence( "c", "AAIIIIF" ); + final MolecularSequence s3 = BasicSequence.createAaSequence( "d", "AIIIVVW" ); + final List l = new ArrayList(); + l.add( s0 ); + l.add( s1 ); + l.add( s2 ); + l.add( s3 ); + final Msa msa = BasicMsa.createInstance( l ); + System.out.println( MsaMethods.calcNormalizedShannonsEntropy( 20, msa, 0 ) ); + System.out.println( MsaMethods.calcNormalizedShannonsEntropy( 20, msa, 1 ) ); + System.out.println( MsaMethods.calcNormalizedShannonsEntropy( 20, msa, 2 ) ); + System.out.println( MsaMethods.calcNormalizedShannonsEntropy( 20, msa, 3 ) ); + System.out.println( MsaMethods.calcNormalizedShannonsEntropy( 20, msa, 4 ) ); + System.out.println( MsaMethods.calcNormalizedShannonsEntropy( 20, msa, 5 ) ); + System.out.println( MsaMethods.calcNormalizedShannonsEntropy( 20, msa, 6 ) ); + System.out.println(); + System.out.println( MsaMethods.calcNormalizedShannonsEntropy( 6, msa, 0 ) ); + System.out.println( MsaMethods.calcNormalizedShannonsEntropy( 6, msa, 1 ) ); + System.out.println( MsaMethods.calcNormalizedShannonsEntropy( 6, msa, 2 ) ); + System.out.println( MsaMethods.calcNormalizedShannonsEntropy( 6, msa, 3 ) ); + System.out.println( MsaMethods.calcNormalizedShannonsEntropy( 6, msa, 4 ) ); + System.out.println( MsaMethods.calcNormalizedShannonsEntropy( 6, msa, 5 ) ); + System.out.println( MsaMethods.calcNormalizedShannonsEntropy( 6, msa, 6 ) ); + final List l2 = new ArrayList(); + l2.add( BasicSequence.createAaSequence( "1", "AAAAAAA" ) ); + l2.add( BasicSequence.createAaSequence( "2", "AAAIACC" ) ); + l2.add( BasicSequence.createAaSequence( "3", "AAIIIIF" ) ); + l2.add( BasicSequence.createAaSequence( "4", "AIIIVVW" ) ); + l2.add( BasicSequence.createAaSequence( "5", "AAAAAAA" ) ); + l2.add( BasicSequence.createAaSequence( "6", "AAAIACC" ) ); + l2.add( BasicSequence.createAaSequence( "7", "AAIIIIF" ) ); + l2.add( BasicSequence.createAaSequence( "8", "AIIIVVW" ) ); + l2.add( BasicSequence.createAaSequence( "9", "AAAAAAA" ) ); + l2.add( BasicSequence.createAaSequence( "10", "AAAIACC" ) ); + l2.add( BasicSequence.createAaSequence( "11", "AAIIIIF" ) ); + l2.add( BasicSequence.createAaSequence( "12", "AIIIVVW" ) ); + l2.add( BasicSequence.createAaSequence( "13", "AAIIIIF" ) ); + l2.add( BasicSequence.createAaSequence( "14", "AIIIVVW" ) ); + l2.add( BasicSequence.createAaSequence( "15", "AAAAAAA" ) ); + l2.add( BasicSequence.createAaSequence( "16", "AAAIACC" ) ); + l2.add( BasicSequence.createAaSequence( "17", "AAIIIIF" ) ); + l2.add( BasicSequence.createAaSequence( "18", "AIIIVVW" ) ); + l2.add( BasicSequence.createAaSequence( "19", "AAAAAAA" ) ); + l2.add( BasicSequence.createAaSequence( "20", "AAAIACC" ) ); + l2.add( BasicSequence.createAaSequence( "21", "AAIIIIF" ) ); + l2.add( BasicSequence.createAaSequence( "22", "AIIIVVW" ) ); + final Msa msa2 = BasicMsa.createInstance( l2 ); + System.out.println(); + System.out.println( MsaMethods.calcNormalizedShannonsEntropy( 20, msa2, 0 ) ); + System.out.println( MsaMethods.calcNormalizedShannonsEntropy( 20, msa2, 1 ) ); + System.out.println( MsaMethods.calcNormalizedShannonsEntropy( 20, msa2, 2 ) ); + } + catch ( final Exception e ) { + e.printStackTrace( System.out ); + return false; + } + return true; + } + + private static boolean testDeleteableMsa() { + try { + final MolecularSequence s0 = BasicSequence.createAaSequence( "a", "AAAA" ); + final MolecularSequence s1 = BasicSequence.createAaSequence( "b", "BAAA" ); + final MolecularSequence s2 = BasicSequence.createAaSequence( "c", "CAAA" ); + final MolecularSequence s3 = BasicSequence.createAaSequence( "d", "DAAA" ); + final MolecularSequence s4 = BasicSequence.createAaSequence( "e", "EAAA" ); + final MolecularSequence s5 = BasicSequence.createAaSequence( "f", "FAAA" ); + final List l0 = new ArrayList(); + l0.add( s0 ); + l0.add( s1 ); + l0.add( s2 ); + l0.add( s3 ); + l0.add( s4 ); + l0.add( s5 ); + final DeleteableMsa dmsa0 = DeleteableMsa.createInstance( l0 ); + dmsa0.deleteRow( "b", false ); + if ( !dmsa0.getIdentifier( 1 ).equals( "c" ) ) { + return false; + } + dmsa0.deleteRow( "e", false ); + dmsa0.deleteRow( "a", false ); + dmsa0.deleteRow( "f", false ); + if ( dmsa0.getLength() != 4 ) { + return false; + } + if ( dmsa0.getNumberOfSequences() != 2 ) { + return false; + } + if ( !dmsa0.getIdentifier( 0 ).equals( "c" ) ) { + return false; + } + if ( !dmsa0.getIdentifier( 1 ).equals( "d" ) ) { + return false; + } + if ( dmsa0.getResidueAt( 0, 0 ) != 'C' ) { + return false; + } + if ( !dmsa0.getSequenceAsString( 0 ).toString().equals( "CAAA" ) ) { + return false; + } + if ( dmsa0.getColumnAt( 0 ).size() != 2 ) { + return false; + } + dmsa0.deleteRow( "c", false ); + dmsa0.deleteRow( "d", false ); + if ( dmsa0.getNumberOfSequences() != 0 ) { + return false; + } + // + final MolecularSequence s_0 = BasicSequence.createAaSequence( "a", "--A---B-C--X----" ); + final MolecularSequence s_1 = BasicSequence.createAaSequence( "b", "--B-----C-------" ); + final MolecularSequence s_2 = BasicSequence.createAaSequence( "c", "--C--AB-C------Z" ); + final MolecularSequence s_3 = BasicSequence.createAaSequence( "d", "--D--AA-C-------" ); + final MolecularSequence s_4 = BasicSequence.createAaSequence( "e", "--E--AA-C-------" ); + final MolecularSequence s_5 = BasicSequence.createAaSequence( "f", "--F--AB-CD--Y---" ); + final List l1 = new ArrayList(); + l1.add( s_0 ); + l1.add( s_1 ); + l1.add( s_2 ); + l1.add( s_3 ); + l1.add( s_4 ); + l1.add( s_5 ); + final DeleteableMsa dmsa1 = DeleteableMsa.createInstance( l1 ); + dmsa1.deleteGapOnlyColumns(); + dmsa1.deleteRow( "a", false ); + dmsa1.deleteRow( "f", false ); + dmsa1.deleteRow( "d", false ); + dmsa1.deleteGapOnlyColumns(); + if ( !dmsa1.getSequenceAsString( 0 ).toString().equals( "B--C-" ) ) { + return false; + } + if ( !dmsa1.getSequenceAsString( 1 ).toString().equals( "CABCZ" ) ) { + return false; + } + if ( !dmsa1.getSequenceAsString( 2 ).toString().equals( "EAAC-" ) ) { + return false; + } + dmsa1.deleteRow( "c", false ); + dmsa1.deleteGapOnlyColumns(); + final Writer w0 = new StringWriter(); + dmsa1.write( w0, MSA_FORMAT.FASTA ); + final Writer w1 = new StringWriter(); + dmsa1.write( w1, MSA_FORMAT.PHYLIP ); + if ( !dmsa1.getSequenceAsString( 0 ).toString().equals( "B--C" ) ) { + return false; + } + if ( !dmsa1.getSequenceAsString( 1 ).toString().equals( "EAAC" ) ) { + return false; + } + final MolecularSequence s__0 = BasicSequence.createAaSequence( "a", "A------" ); + final MolecularSequence s__1 = BasicSequence.createAaSequence( "b", "BB-----" ); + final MolecularSequence s__2 = BasicSequence.createAaSequence( "c", "CCC----" ); + final MolecularSequence s__3 = BasicSequence.createAaSequence( "d", "DDDD---" ); + final MolecularSequence s__4 = BasicSequence.createAaSequence( "e", "EEEEE--" ); + final MolecularSequence s__5 = BasicSequence.createAaSequence( "f", "FFFFFF-" ); + final List l2 = new ArrayList(); + l2.add( s__0 ); + l2.add( s__1 ); + l2.add( s__2 ); + l2.add( s__3 ); + l2.add( s__4 ); + l2.add( s__5 ); + final DeleteableMsa dmsa2 = DeleteableMsa.createInstance( l2 ); + dmsa2.deleteGapColumns( 0.5 ); + if ( !dmsa2.getSequenceAsString( 0 ).toString().equals( "A---" ) ) { + return false; + } + if ( !dmsa2.getSequenceAsString( 1 ).toString().equals( "BB--" ) ) { + return false; + } + if ( !dmsa2.getSequenceAsString( 2 ).toString().equals( "CCC-" ) ) { + return false; + } + dmsa2.deleteGapColumns( 0.2 ); + if ( !dmsa2.getSequenceAsString( 0 ).toString().equals( "A-" ) ) { + return false; + } + if ( !dmsa2.getSequenceAsString( 1 ).toString().equals( "BB" ) ) { + return false; + } + if ( !dmsa2.getSequenceAsString( 2 ).toString().equals( "CC" ) ) { + return false; + } + dmsa2.deleteGapColumns( 0 ); + dmsa2.deleteRow( "a", false ); + dmsa2.deleteRow( "b", false ); + dmsa2.deleteRow( "f", false ); + dmsa2.deleteRow( "e", false ); + dmsa2.setIdentifier( 0, "new_c" ); + dmsa2.setIdentifier( 1, "new_d" ); + dmsa2.setResidueAt( 0, 0, 'x' ); + final MolecularSequence s = dmsa2.deleteRow( "new_d", true ); + if ( !s.getMolecularSequenceAsString().equals( "D" ) ) { + return false; + } + final Writer w = new StringWriter(); + dmsa2.write( w, MSA_FORMAT.PHYLIP ); + final String phylip = w.toString(); + if ( !phylip.equals( "1 1" + ForesterUtil.LINE_SEPARATOR + "new_c x" + ForesterUtil.LINE_SEPARATOR ) ) { + System.out.println( phylip ); + return false; + } + final Writer w2 = new StringWriter(); + dmsa2.write( w2, MSA_FORMAT.FASTA ); + final String fasta = w2.toString(); + if ( !fasta.equals( ">new_c" + ForesterUtil.LINE_SEPARATOR + "x" + ForesterUtil.LINE_SEPARATOR ) ) { + System.out.println( fasta ); + return false; + } } catch ( final Exception e ) { e.printStackTrace( System.out ); @@ -5562,8 +6489,6 @@ public final class Test { if ( !ext.get( 4 ).getName().equals( "h" ) ) { return false; } - // - // ext.clear(); final StringBuffer sb2 = new StringBuffer( "((a,b)ab,(((c,d)cd,e)cde,(f,(g,h)gh)fgh)cdefgh)abcdefgh" ); final Phylogeny t2 = factory.create( sb2, new NHXParser() )[ 0 ]; @@ -5592,8 +6517,6 @@ public final class Test { if ( !ext.get( 3 ).getName().equals( "gh" ) ) { return false; } - // - // ext.clear(); final StringBuffer sb3 = new StringBuffer( "((a,b)ab,(((c,d)cd,e)cde,(f,(g,h)gh)fgh)cdefgh)abcdefgh" ); final Phylogeny t3 = factory.create( sb3, new NHXParser() )[ 0 ]; @@ -5620,8 +6543,6 @@ public final class Test { if ( !ext.get( 2 ).getName().equals( "fgh" ) ) { return false; } - // - // ext.clear(); final StringBuffer sb4 = new StringBuffer( "((a,b)ab,(((c,d)cd,e)cde,(f,(g,h)gh)fgh)cdefgh)abcdefgh" ); final Phylogeny t4 = factory.create( sb4, new NHXParser() )[ 0 ]; @@ -5638,8 +6559,6 @@ public final class Test { if ( n.getNextExternalNodeWhileTakingIntoAccountCollapsedNodes() != null ) { return false; } - // - // final StringBuffer sb5 = new StringBuffer( "((a,b)ab,(((c,d)cd,e)cde,(f,(g,h))fgh)cdefgh)abcdefgh" ); final Phylogeny t5 = factory.create( sb5, new NHXParser() )[ 0 ]; ext.clear(); @@ -5675,8 +6594,6 @@ public final class Test { if ( !ext.get( 7 ).getName().equals( "h" ) ) { return false; } - // - // final StringBuffer sb6 = new StringBuffer( "((a,b)ab,(((c,d)cd,e)cde,(f,(g,h))fgh)cdefgh)abcdefgh" ); final Phylogeny t6 = factory.create( sb6, new NHXParser() )[ 0 ]; ext.clear(); @@ -5710,8 +6627,6 @@ public final class Test { if ( !ext.get( 6 ).getName().equals( "h" ) ) { return false; } - // - // final StringBuffer sb7 = new StringBuffer( "((a,b)ab,(((c,d)cd,e)cde,(f,(g,h))fgh)cdefgh)abcdefgh" ); final Phylogeny t7 = factory.create( sb7, new NHXParser() )[ 0 ]; ext.clear(); @@ -5745,8 +6660,6 @@ public final class Test { if ( !ext.get( 6 ).getName().equals( "h" ) ) { return false; } - // - // final StringBuffer sb8 = new StringBuffer( "((a,b)ab,(((c,d)cd,e)cde,(f,(g,h))fgh)cdefgh)abcdefgh" ); final Phylogeny t8 = factory.create( sb8, new NHXParser() )[ 0 ]; ext.clear(); @@ -5783,8 +6696,6 @@ public final class Test { if ( !ext.get( 6 ).getName().equals( "h" ) ) { return false; } - // - // final StringBuffer sb9 = new StringBuffer( "((a,b)ab,(((c,d)cd,e)cde,(f,(g,h)gh)fgh)cdefgh)abcdefgh" ); final Phylogeny t9 = factory.create( sb9, new NHXParser() )[ 0 ]; ext.clear(); @@ -5818,8 +6729,6 @@ public final class Test { if ( !ext.get( 6 ).getName().equals( "gh" ) ) { return false; } - // - // final StringBuffer sb10 = new StringBuffer( "((a,b)ab,(((c,d)cd,e)cde,(f,(g,h)gh)fgh)cdefgh)abcdefgh" ); final Phylogeny t10 = factory.create( sb10, new NHXParser() )[ 0 ]; ext.clear(); @@ -5855,8 +6764,6 @@ public final class Test { if ( !ext.get( 6 ).getName().equals( "gh" ) ) { return false; } - // - // final StringBuffer sb11 = new StringBuffer( "((a,b)ab,(((c,d)cd,e)cde,(f,(g,h)gh)fgh)cdefgh)abcdefgh" ); final Phylogeny t11 = factory.create( sb11, new NHXParser() )[ 0 ]; ext.clear(); @@ -5888,8 +6795,6 @@ public final class Test { if ( !ext.get( 5 ).getName().equals( "fgh" ) ) { return false; } - // - // final StringBuffer sb12 = new StringBuffer( "((a,b)ab,(((c,d)cd,e)cde,(f,(g,h)gh)fgh)cdefgh)abcdefgh" ); final Phylogeny t12 = factory.create( sb12, new NHXParser() )[ 0 ]; ext.clear(); @@ -5924,8 +6829,6 @@ public final class Test { if ( !ext.get( 5 ).getName().equals( "fgh" ) ) { return false; } - // - // final StringBuffer sb13 = new StringBuffer( "((a,b)ab,(((c,d)cd,e)cde,(f,(g,h)gh)fgh)cdefgh)abcdefgh" ); final Phylogeny t13 = factory.create( sb13, new NHXParser() )[ 0 ]; ext.clear(); @@ -5956,8 +6859,6 @@ public final class Test { if ( !ext.get( 4 ).getName().equals( "fgh" ) ) { return false; } - // - // final StringBuffer sb14 = new StringBuffer( "((a,b,0)ab,(((c,d)cd,e)cde,(f,(g,h,1,2)gh,0)fgh)cdefgh)abcdefgh" ); final Phylogeny t14 = factory.create( sb14, new NHXParser() )[ 0 ]; ext.clear(); @@ -5988,8 +6889,6 @@ public final class Test { if ( !ext.get( 4 ).getName().equals( "fgh" ) ) { return false; } - // - // final StringBuffer sb15 = new StringBuffer( "((a,b,0)ab,(((c,d)cd,e)cde,x,(f,(g,h,1,2)gh,0)fgh)cdefgh)abcdefgh" ); final Phylogeny t15 = factory.create( sb15, new NHXParser() )[ 0 ]; ext.clear(); @@ -6352,37 +7251,66 @@ public final class Test { if ( !phylogenies[ 14 ].getName().equals( "tree 14" ) ) { return false; } - if ( !phylogenies[ 14 ].isRooted() ) { + if ( !phylogenies[ 14 ].isRooted() ) { + return false; + } + if ( phylogenies[ 14 ].getNumberOfExternalNodes() != 10 ) { + return false; + } + if ( !phylogenies[ 15 ].getName().equals( "tree 15" ) ) { + return false; + } + if ( phylogenies[ 15 ].isRooted() ) { + return false; + } + if ( phylogenies[ 15 ].getNumberOfExternalNodes() != 10 ) { + return false; + } + if ( !phylogenies[ 16 ].getName().equals( "tree 16" ) ) { + return false; + } + if ( !phylogenies[ 16 ].isRooted() ) { + return false; + } + if ( phylogenies[ 16 ].getNumberOfExternalNodes() != 10 ) { + return false; + } + if ( !phylogenies[ 17 ].getName().equals( "tree 17" ) ) { return false; } - if ( phylogenies[ 14 ].getNumberOfExternalNodes() != 10 ) { + if ( phylogenies[ 17 ].isRooted() ) { return false; } - if ( !phylogenies[ 15 ].getName().equals( "tree 15" ) ) { + if ( phylogenies[ 17 ].getNumberOfExternalNodes() != 10 ) { return false; } - if ( phylogenies[ 15 ].isRooted() ) { + final NexusPhylogeniesParser p2 = new NexusPhylogeniesParser(); + phylogenies = null; + phylogenies = factory.create( Test.PATH_TO_TEST_DATA + "S15613.nex", p2 ); + if ( phylogenies.length != 9 ) { return false; } - if ( phylogenies[ 15 ].getNumberOfExternalNodes() != 10 ) { + if ( !isEqual( 0.48039661496919533, phylogenies[ 0 ].getNode( "Diadocidia_spinosula" ) + .getDistanceToParent() ) ) { return false; } - if ( !phylogenies[ 16 ].getName().equals( "tree 16" ) ) { + if ( !isEqual( 0.3959796191512233, phylogenies[ 0 ].getNode( "Diadocidia_stanfordensis" ) + .getDistanceToParent() ) ) { return false; } - if ( !phylogenies[ 16 ].isRooted() ) { + if ( !phylogenies[ 0 ].getName().equals( "Family Diadocidiidae MLT (Imported_tree_0)" ) ) { return false; } - if ( phylogenies[ 16 ].getNumberOfExternalNodes() != 10 ) { + if ( !phylogenies[ 1 ].getName().equals( "Family Diadocidiidae BAT (con_50_majrule)" ) ) { return false; } - if ( !phylogenies[ 17 ].getName().equals( "tree 17" ) ) { + if ( !phylogenies[ 2 ].getName().equals( "Family Diadocidiidae BAT (con_50_majrule)" ) ) { return false; } - if ( phylogenies[ 17 ].isRooted() ) { + if ( !isEqual( 0.065284, phylogenies[ 7 ].getNode( "Bradysia_amoena" ).getDistanceToParent() ) ) { return false; } - if ( phylogenies[ 17 ].getNumberOfExternalNodes() != 10 ) { + if ( !isEqual( 0.065284, phylogenies[ 8 ].getNode( "Bradysia_amoena" ).getDistanceToParent() ) ) { return false; } } @@ -6417,7 +7345,6 @@ public final class Test { if ( phy != null ) { return false; } - // p.reset(); if ( !p.hasNext() ) { return false; @@ -6439,7 +7366,6 @@ public final class Test { if ( phy != null ) { return false; } - //// p.setSource( Test.PATH_TO_TEST_DATA + "nexus_test_2.nex" ); if ( !p.hasNext() ) { return false; @@ -6461,7 +7387,6 @@ public final class Test { if ( phy != null ) { return false; } - // p.reset(); if ( !p.hasNext() ) { return false; @@ -6483,7 +7408,6 @@ public final class Test { if ( phy != null ) { return false; } - //// p.setSource( Test.PATH_TO_TEST_DATA + "nexus_test_3.nex" ); if ( !p.hasNext() ) { return false; @@ -6530,15 +7454,12 @@ public final class Test { if ( phy != null ) { return false; } - //// + // p.setSource( Test.PATH_TO_TEST_DATA + "nexus_test_4_1.nex" ); - // if ( phylogenies.length != 18 ) { - // return false; - // } - //0 if ( !p.hasNext() ) { return false; } + //0 phy = p.next(); if ( phy == null ) { return false; @@ -6572,6 +7493,7 @@ public final class Test { return false; } if ( phy.getNumberOfExternalNodes() != 3 ) { + System.out.println( phy.toString() ); return false; } if ( !phy.getName().equals( "" ) ) { @@ -6949,6 +7871,82 @@ public final class Test { if ( phy.isRooted() ) { return false; } + // + final NexusPhylogeniesParser p2 = new NexusPhylogeniesParser(); + p2.setSource( Test.PATH_TO_TEST_DATA + "S15613.nex" ); + // 0 + if ( !p2.hasNext() ) { + return false; + } + phy = p2.next(); + if ( !isEqual( 0.48039661496919533, phy.getNode( "Diadocidia_spinosula" ).getDistanceToParent() ) ) { + return false; + } + if ( !isEqual( 0.3959796191512233, phy.getNode( "Diadocidia_stanfordensis" ).getDistanceToParent() ) ) { + return false; + } + // 1 + if ( !p2.hasNext() ) { + return false; + } + phy = p2.next(); + // 2 + if ( !p2.hasNext() ) { + return false; + } + phy = p2.next(); + // 3 + if ( !p2.hasNext() ) { + return false; + } + phy = p2.next(); + // 4 + if ( !p2.hasNext() ) { + return false; + } + phy = p2.next(); + // 5 + if ( !p2.hasNext() ) { + return false; + } + phy = p2.next(); + // 6 + if ( !p2.hasNext() ) { + return false; + } + phy = p2.next(); + // 7 + if ( !p2.hasNext() ) { + return false; + } + phy = p2.next(); + // 8 + if ( !p2.hasNext() ) { + return false; + } + phy = p2.next(); + if ( !isEqual( 0.065284, phy.getNode( "Bradysia_amoena" ).getDistanceToParent() ) ) { + return false; + } + if ( p2.hasNext() ) { + return false; + } + phy = p2.next(); + if ( phy != null ) { + return false; + } + // 0 + p2.reset(); + if ( !p2.hasNext() ) { + return false; + } + phy = p2.next(); + if ( !isEqual( 0.48039661496919533, phy.getNode( "Diadocidia_spinosula" ).getDistanceToParent() ) ) { + return false; + } + if ( !isEqual( 0.3959796191512233, phy.getNode( "Diadocidia_stanfordensis" ).getDistanceToParent() ) ) { + return false; + } } catch ( final Exception e ) { e.printStackTrace( System.out ); @@ -7105,6 +8103,14 @@ public final class Test { .equals( "Aranaeus" ) ) { return false; } + phylogenies = factory.create( Test.PATH_TO_TEST_DATA + "S14117.nex", parser ); + if ( phylogenies.length != 3 ) { + return false; + } + if ( !isEqual( phylogenies[ 2 ].getNode( "Aloysia lycioides 251-76-02169" ).getDistanceToParent(), + 0.00100049 ) ) { + return false; + } } catch ( final Exception e ) { e.printStackTrace( System.out ); @@ -7124,10 +8130,10 @@ public final class Test { nhxp.setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.NO ); nhxp.setReplaceUnderscores( true ); final Phylogeny uc0 = factory.create( "(A__A_,_B_B)", nhxp )[ 0 ]; - if ( !uc0.getRoot().getChildNode( 0 ).getName().equals( "A A " ) ) { + if ( !uc0.getRoot().getChildNode( 0 ).getName().equals( "A A" ) ) { return false; } - if ( !uc0.getRoot().getChildNode( 1 ).getName().equals( " B B" ) ) { + if ( !uc0.getRoot().getChildNode( 1 ).getName().equals( "B B" ) ) { return false; } final Phylogeny p1b = factory @@ -7418,14 +8424,14 @@ public final class Test { if ( p50.getNode( "A" ) == null ) { return false; } - if ( !p50.toNewHampshire( false, NH_CONVERSION_SUPPORT_VALUE_STYLE.IN_SQUARE_BRACKETS ) + if ( !p50.toNewHampshire( NH_CONVERSION_SUPPORT_VALUE_STYLE.IN_SQUARE_BRACKETS ) .equals( "((A,B)ab:2.0[88],C);" ) ) { return false; } - if ( !p50.toNewHampshire( false, NH_CONVERSION_SUPPORT_VALUE_STYLE.NONE ).equals( "((A,B)ab:2.0,C);" ) ) { + if ( !p50.toNewHampshire( NH_CONVERSION_SUPPORT_VALUE_STYLE.NONE ).equals( "((A,B)ab:2.0,C);" ) ) { return false; } - if ( !p50.toNewHampshire( false, NH_CONVERSION_SUPPORT_VALUE_STYLE.AS_INTERNAL_NODE_NAMES ) + if ( !p50.toNewHampshire( NH_CONVERSION_SUPPORT_VALUE_STYLE.AS_INTERNAL_NODE_NAMES ) .equals( "((A,B)88:2.0,C);" ) ) { return false; } @@ -7443,13 +8449,63 @@ public final class Test { if ( p53.getNode( "B (x (a' ,b) f(x);" ) == null ) { return false; } - // final Phylogeny p54 = factory.create( new StringBuffer( "((A,B):[88],C)" ), new NHXParser() )[ 0 ]; if ( p54.getNode( "A" ) == null ) { return false; } - if ( !p54.toNewHampshire( false, NH_CONVERSION_SUPPORT_VALUE_STYLE.IN_SQUARE_BRACKETS ) - .equals( "((A,B)[88],C);" ) ) { + if ( !p54.toNewHampshire( NH_CONVERSION_SUPPORT_VALUE_STYLE.IN_SQUARE_BRACKETS ).equals( "((A,B)[88],C);" ) ) { + return false; + } + final Phylogeny p55 = factory + .create( new StringBuffer( "((\"lcl|HPV32_L1.:1 s\":0.195593,\"lcl|HPV30_L1.1|;a\":0.114237):0.0359322,\"lcl|HPV56_L1.1|,d\":0.0727412,\"lcl|HPV66_L1.1x\":0.0798012);" ), + new NHXParser() )[ 0 ]; + if ( !p55 + .toNewHampshire() + .equals( "(('lcl|HPV32_L1.:1 s':0.195593,'lcl|HPV30_L1.1|;a':0.114237):0.0359322,'lcl|HPV56_L1.1|,d':0.0727412,lcl|HPV66_L1.1x:0.0798012);" ) ) { + System.out.println( p55.toNewHampshire() ); + return false; + } + final Phylogeny p56 = factory + .create( new StringBuffer( "((\"lcl|HPV32_L1.:1 s\":0.195593,\"lcl|HPV30_L1.1|;a\":0.114\n237):0.0359322,\"lcl|HPV56_L1.1|,d\":0.0727412,\"lcl|HPV66_L1.1:x\":0.0798012);" ), + new NHXParser() )[ 0 ]; + if ( !p56 + .toNewHampshire() + .equals( "(('lcl|HPV32_L1.:1 s':0.195593,'lcl|HPV30_L1.1|;a':0.114237):0.0359322,'lcl|HPV56_L1.1|,d':0.0727412,'lcl|HPV66_L1.1:x':0.0798012);" ) ) { + System.out.println( p56.toNewHampshire() ); + return false; + } + final Phylogeny p57 = factory + .create( new StringBuffer( "((\"lcl|HPV32_L1.:1 s\":0.195593,\"lcl|HPV30_L1.1|;a\":0.114\n237):0.0359322,\"lcl|HPV56_L1.1|,d\":0.0727412,\"lcl|HPV66_L1.1:x\":0.0798012);" ), + new NHXParser() )[ 0 ]; + if ( !p57 + .toNewHampshire() + .equals( "(('lcl|HPV32_L1.:1 s':0.195593,'lcl|HPV30_L1.1|;a':0.114237):0.0359322,'lcl|HPV56_L1.1|,d':0.0727412,'lcl|HPV66_L1.1:x':0.0798012);" ) ) { + System.out.println( p56.toNewHampshire() ); + return false; + } + final String s58 = "('Homo \"man\" sapiens:1',\"Homo 'man' sapiens;\")';root \"1_ )';"; + final Phylogeny p58 = factory.create( new StringBuffer( s58 ), new NHXParser() )[ 0 ]; + if ( !p58.toNewHampshire().equals( s58 ) ) { + System.out.println( p58.toNewHampshire() ); + return false; + } + final String s59 = "('Homo \"man sapiens:1',\"Homo 'man sapiens\")\"root; '1_ )\";"; + final Phylogeny p59 = factory.create( new StringBuffer( s59 ), new NHXParser() )[ 0 ]; + if ( !p59.toNewHampshire().equals( s59 ) ) { + System.out.println( p59.toNewHampshire() ); + return false; + } + final String s60 = "('\" ;,:\":\"',\"'abc def' g's_\",'=:0.45+,.:%~`!@#$%^&*()_-+={} | ;,');"; + final Phylogeny p60 = factory.create( new StringBuffer( s60 ), new NHXParser() )[ 0 ]; + if ( !p60.toNewHampshire().equals( s60 ) ) { + System.out.println( p60.toNewHampshire() ); + return false; + } + final String s61 = "('H[omo] \"man\" sapiens:1',\"H[omo] 'man' sapiens;\",H[omo] sapiens)';root \"1_ )';"; + final Phylogeny p61 = factory.create( new StringBuffer( s61 ), new NHXParser() )[ 0 ]; + if ( !p61.toNewHampshire() + .equals( "('H{omo} \"man\" sapiens:1',\"H{omo} 'man' sapiens;\",Hsapiens)';root \"1_ )';" ) ) { + System.out.println( p61.toNewHampshire() ); return false; } } @@ -7842,6 +8898,67 @@ public final class Test { if ( p.next() != null ) { return false; } + // + final String p30_str = "(A,B);(C,D)"; + final NHXParser p30 = new NHXParser(); + p30.setSource( p30_str ); + if ( !p30.hasNext() ) { + return false; + } + Phylogeny phy30 = p30.next(); + if ( !phy30.toNewHampshire().equals( "(A,B);" ) ) { + System.out.println( phy30.toNewHampshire() ); + return false; + } + if ( !p30.hasNext() ) { + return false; + } + Phylogeny phy301 = p30.next(); + if ( !phy301.toNewHampshire().equals( "(C,D);" ) ) { + System.out.println( phy301.toNewHampshire() ); + return false; + } + if ( p30.hasNext() ) { + return false; + } + if ( p30.hasNext() ) { + return false; + } + if ( p30.next() != null ) { + return false; + } + if ( p30.next() != null ) { + return false; + } + p30.reset(); + if ( !p30.hasNext() ) { + return false; + } + phy30 = p30.next(); + if ( !phy30.toNewHampshire().equals( "(A,B);" ) ) { + System.out.println( phy30.toNewHampshire() ); + return false; + } + if ( !p30.hasNext() ) { + return false; + } + phy301 = p30.next(); + if ( !phy301.toNewHampshire().equals( "(C,D);" ) ) { + System.out.println( phy301.toNewHampshire() ); + return false; + } + if ( p30.hasNext() ) { + return false; + } + if ( p30.hasNext() ) { + return false; + } + if ( p30.next() != null ) { + return false; + } + if ( p30.next() != null ) { + return false; + } } catch ( final Exception e ) { e.printStackTrace( System.out ); @@ -7879,6 +8996,14 @@ public final class Test { System.out.println( n6.toNewHampshireX() ); return false; } + final PhylogenyNode n7 = new PhylogenyNode(); + n7.setName( " gks:dr-m4 \" ' `@:[]sadq04 " ); + if ( !n7.toNewHampshire( true, PhylogenyNode.NH_CONVERSION_SUPPORT_VALUE_STYLE.IN_SQUARE_BRACKETS ) + .equals( "'gks:dr-m4 \" ` `@:[]sadq04'" ) ) { + System.out.println( n7 + .toNewHampshire( true, PhylogenyNode.NH_CONVERSION_SUPPORT_VALUE_STYLE.IN_SQUARE_BRACKETS ) ); + return false; + } } catch ( final Exception e ) { e.printStackTrace( System.out ); @@ -8126,8 +9251,8 @@ public final class Test { return false; } final PhylogenyNode n13 = PhylogenyNode - .createInstanceFromNhxString( "blah_12345/1-2", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); - if ( !n13.getName().equals( "blah_12345/1-2" ) ) { + .createInstanceFromNhxString( "BLAH_12345/1-2", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); + if ( !n13.getName().equals( "BLAH_12345/1-2" ) ) { return false; } if ( PhylogenyMethods.getSpecies( n13 ).equals( "12345" ) ) { @@ -8192,7 +9317,7 @@ public final class Test { return false; } final PhylogenyNode n19 = PhylogenyNode - .createInstanceFromNhxString( "blah_1-roejojoej", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); + .createInstanceFromNhxString( "BLAH_1-roejojoej", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); if ( !n19.getNodeData().getTaxonomy().getIdentifier().getValue().equals( "1" ) ) { return false; } @@ -8200,7 +9325,7 @@ public final class Test { return false; } final PhylogenyNode n30 = PhylogenyNode - .createInstanceFromNhxString( "blah_1234567-roejojoej", + .createInstanceFromNhxString( "BLAH_1234567-roejojoej", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); if ( !n30.getNodeData().getTaxonomy().getIdentifier().getValue().equals( "1234567" ) ) { return false; @@ -8209,7 +9334,7 @@ public final class Test { return false; } final PhylogenyNode n31 = PhylogenyNode - .createInstanceFromNhxString( "blah_12345678-roejojoej", + .createInstanceFromNhxString( "BLAH_12345678-roejojoej", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); if ( n31.getNodeData().isHasTaxonomy() ) { return false; @@ -8220,7 +9345,7 @@ public final class Test { return false; } final PhylogenyNode n40 = PhylogenyNode - .createInstanceFromNhxString( "bcl2_12345", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); + .createInstanceFromNhxString( "BCL2_12345", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); if ( !n40.getNodeData().getTaxonomy().getIdentifier().getValue().equals( "12345" ) ) { return false; } @@ -8315,6 +9440,12 @@ public final class Test { if ( !p10.toNewHampshireX().equals( "((A:0.2,B:0.3):0.5[&&NHX:B=91],C:0.1)root:0.1[&&NHX:B=100]" ) ) { return false; } + final Phylogeny p11 = factory + .create( " [79] ( ('A: \" ' [co mment] :0 .2[comment],B:0.3[com])[com ment]: 0. 5 \t[ 9 1 ][ comment],C: 0.1)[comment]root:0.1[100] [comment]", + new NHXParser() )[ 0 ]; + if ( !p11.toNewHampshireX().equals( "(('A: \"':0.2,B:0.3):0.5[&&NHX:B=91],C:0.1)root:0.1[&&NHX:B=100]" ) ) { + return false; + } } catch ( final Exception e ) { e.printStackTrace( System.out ); @@ -8405,13 +9536,13 @@ public final class Test { if ( phy.getNodes( "'single quotes' inside double quotes" ).size() != 1 ) { return false; } - if ( phy.getNodes( "double quotes inside single quotes" ).size() != 1 ) { + if ( phy.getNodes( "\"double quotes\" inside single quotes" ).size() != 1 ) { return false; } if ( phy.getNodes( "noquotes" ).size() != 1 ) { return false; } - if ( phy.getNodes( "A ( B C '" ).size() != 1 ) { + if ( phy.getNodes( "A ( B C '" ).size() != 1 ) { return false; } final NHXParser p1p = new NHXParser(); @@ -8441,7 +9572,7 @@ public final class Test { final Phylogeny p10 = factory .create( " [79] ( (\"A \n\tB \" [co mment] :0 .2[comment],'B':0.3[com])[com ment]: 0. 5 \t[ 9 1 ][ comment],'C (or D?\\//;,))': 0.1)[comment]'\nroot is here (cool, was! ) ':0.1[100] [comment]", new NHXParser() )[ 0 ]; - final String p10_clean_str = "(('A B':0.2,B:0.3):0.5[&&NHX:B=91],'C (or D?\\//;,))':0.1)'root is here (cool, was! )':0.1[&&NHX:B=100]"; + final String p10_clean_str = "(('A B':0.2,B:0.3):0.5[&&NHX:B=91],'C (or D?\\//;,))':0.1)'root is here (cool, was! )':0.1[&&NHX:B=100]"; if ( !p10.toNewHampshireX().equals( p10_clean_str ) ) { return false; } @@ -8449,11 +9580,10 @@ public final class Test { if ( !p11.toNewHampshireX().equals( p10_clean_str ) ) { return false; } - // final Phylogeny p12 = factory .create( " [79] ( (\"A \n\tB \" [[][] :0 .2[comment][\t&\t&\n N\tH\tX:S=mo\tnkey !],'\tB\t\b\t\n\f\rB B ':0.0\b3[])\t[com ment]: 0. 5 \t[ 9 1 ][ \ncomment],'C\t (or D?\\//;,))': 0.\b1)[comment]'\nroot \tis here (cool, \b\t\n\f\r was! ) ':0.1[100] [comment]", new NHXParser() )[ 0 ]; - final String p12_clean_str = "(('A B':0.2[&&NHX:S=monkey!],'BB B':0.03):0.5[&&NHX:B=91],'C (or D?\\//;,))':0.1)'root is here (cool, was! )':0.1[&&NHX:B=100]"; + final String p12_clean_str = "(('A B':0.2[&&NHX:S=monkey!],'BB B':0.03):0.5[&&NHX:B=91],'C (or D?\\//;,))':0.1)'root is here (cool, was! )':0.1[&&NHX:B=100]"; if ( !p12.toNewHampshireX().equals( p12_clean_str ) ) { return false; } @@ -8461,7 +9591,7 @@ public final class Test { if ( !p13.toNewHampshireX().equals( p12_clean_str ) ) { return false; } - final String p12_clean_str_nh = "(('A B':0.2,'BB B':0.03):0.5,'C (or D?\\//;,))':0.1)'root is here (cool, was! )':0.1;"; + final String p12_clean_str_nh = "(('A B':0.2,'BB B':0.03):0.5,'C (or D?\\//;,))':0.1)'root is here (cool, was! )':0.1;"; if ( !p13.toNewHampshire().equals( p12_clean_str_nh ) ) { return false; } @@ -9965,652 +11095,464 @@ public final class Test { if ( sdi_unrooted.getMinimalDuplications() != 3 ) { return false; } - if ( sdi_unrooted.getMinimalMappingCost() != Integer.MAX_VALUE ) { - return false; - } - if ( !p8[ 0 ].getRoot().isDuplication() ) { - return false; - } - if ( !p8[ 0 ].getNode( "4-5-6" ).isDuplication() ) { - return false; - } - if ( !p8[ 0 ].getNode( "7-8-9" ).isDuplication() ) { - return false; - } - if ( p8[ 0 ].getNode( "1-2" ).isDuplication() ) { - return false; - } - if ( p8[ 0 ].getNode( "1-2-3" ).isDuplication() ) { - return false; - } - if ( p8[ 0 ].getNode( "5-6" ).isDuplication() ) { - return false; - } - if ( p8[ 0 ].getNode( "8-9" ).isDuplication() ) { - return false; - } - if ( p8[ 0 ].getNode( "4-5-6-7-8-9" ).isDuplication() ) { - return false; - } - p8 = null; - } - catch ( final Exception e ) { - e.printStackTrace( System.out ); - return false; - } - return true; - } - - private static boolean testSequenceIdParsing() { - try { - Accession id = SequenceAccessionTools.parseAccessorFromString( "gb_ADF31344_segmented_worms_" ); - if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getSource() ) - || !id.getValue().equals( "ADF31344" ) || !id.getSource().equals( "ncbi" ) ) { - if ( id != null ) { - System.out.println( "value =" + id.getValue() ); - System.out.println( "provider=" + id.getSource() ); - } - return false; - } - // - id = SequenceAccessionTools.parseAccessorFromString( "segmented worms|gb_ADF31344" ); - if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getSource() ) - || !id.getValue().equals( "ADF31344" ) || !id.getSource().equals( "ncbi" ) ) { - if ( id != null ) { - System.out.println( "value =" + id.getValue() ); - System.out.println( "provider=" + id.getSource() ); - } - return false; - } - // - id = SequenceAccessionTools.parseAccessorFromString( "segmented worms gb_ADF31344 and more" ); - if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getSource() ) - || !id.getValue().equals( "ADF31344" ) || !id.getSource().equals( "ncbi" ) ) { - if ( id != null ) { - System.out.println( "value =" + id.getValue() ); - System.out.println( "provider=" + id.getSource() ); - } - return false; - } - // - id = SequenceAccessionTools.parseAccessorFromString( "gb_AAA96518_1" ); - if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getSource() ) - || !id.getValue().equals( "AAA96518" ) || !id.getSource().equals( "ncbi" ) ) { - if ( id != null ) { - System.out.println( "value =" + id.getValue() ); - System.out.println( "provider=" + id.getSource() ); - } - return false; - } - // - id = SequenceAccessionTools.parseAccessorFromString( "gb_EHB07727_1_rodents_" ); - if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getSource() ) - || !id.getValue().equals( "EHB07727" ) || !id.getSource().equals( "ncbi" ) ) { - if ( id != null ) { - System.out.println( "value =" + id.getValue() ); - System.out.println( "provider=" + id.getSource() ); - } - return false; - } - // - id = SequenceAccessionTools.parseAccessorFromString( "dbj_BAF37827_1_turtles_" ); - if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getSource() ) - || !id.getValue().equals( "BAF37827" ) || !id.getSource().equals( "ncbi" ) ) { - if ( id != null ) { - System.out.println( "value =" + id.getValue() ); - System.out.println( "provider=" + id.getSource() ); - } - return false; - } - // - id = SequenceAccessionTools.parseAccessorFromString( "emb_CAA73223_1_primates_" ); - if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getSource() ) - || !id.getValue().equals( "CAA73223" ) || !id.getSource().equals( "ncbi" ) ) { - if ( id != null ) { - System.out.println( "value =" + id.getValue() ); - System.out.println( "provider=" + id.getSource() ); - } - return false; - } - // - id = SequenceAccessionTools.parseAccessorFromString( "mites|ref_XP_002434188_1" ); - if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getSource() ) - || !id.getValue().equals( "XP_002434188" ) || !id.getSource().equals( "refseq" ) ) { - if ( id != null ) { - System.out.println( "value =" + id.getValue() ); - System.out.println( "provider=" + id.getSource() ); - } - return false; - } - // - id = SequenceAccessionTools.parseAccessorFromString( "mites_ref_XP_002434188_1_bla_XP_12345" ); - if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getSource() ) - || !id.getValue().equals( "XP_002434188" ) || !id.getSource().equals( "refseq" ) ) { - if ( id != null ) { - System.out.println( "value =" + id.getValue() ); - System.out.println( "provider=" + id.getSource() ); - } - return false; - } - // - id = SequenceAccessionTools.parseAccessorFromString( "P4A123" ); - if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getSource() ) - || !id.getValue().equals( "P4A123" ) || !id.getSource().equals( "uniprot" ) ) { - if ( id != null ) { - System.out.println( "value =" + id.getValue() ); - System.out.println( "provider=" + id.getSource() ); - } - return false; - } - id = SequenceAccessionTools.parseAccessorFromString( "XP_12345" ); - if ( id != null ) { - System.out.println( "value =" + id.getValue() ); - System.out.println( "provider=" + id.getSource() ); + if ( sdi_unrooted.getMinimalMappingCost() != Integer.MAX_VALUE ) { return false; } - } - catch ( final Exception e ) { - e.printStackTrace( System.out ); - return false; - } - return true; - } - - private static boolean testSequenceWriter() { - try { - final String n = ForesterUtil.LINE_SEPARATOR; - if ( !SequenceWriter.toFasta( "name", "awes", 5 ).toString().equals( ">name" + n + "awes" ) ) { + if ( !p8[ 0 ].getRoot().isDuplication() ) { return false; } - if ( !SequenceWriter.toFasta( "name", "awes", 4 ).toString().equals( ">name" + n + "awes" ) ) { + if ( !p8[ 0 ].getNode( "4-5-6" ).isDuplication() ) { return false; } - if ( !SequenceWriter.toFasta( "name", "awes", 3 ).toString().equals( ">name" + n + "awe" + n + "s" ) ) { + if ( !p8[ 0 ].getNode( "7-8-9" ).isDuplication() ) { return false; } - if ( !SequenceWriter.toFasta( "name", "awes", 2 ).toString().equals( ">name" + n + "aw" + n + "es" ) ) { + if ( p8[ 0 ].getNode( "1-2" ).isDuplication() ) { return false; } - if ( !SequenceWriter.toFasta( "name", "awes", 1 ).toString() - .equals( ">name" + n + "a" + n + "w" + n + "e" + n + "s" ) ) { + if ( p8[ 0 ].getNode( "1-2-3" ).isDuplication() ) { return false; } - if ( !SequenceWriter.toFasta( "name", "abcdefghij", 3 ).toString() - .equals( ">name" + n + "abc" + n + "def" + n + "ghi" + n + "j" ) ) { + if ( p8[ 0 ].getNode( "5-6" ).isDuplication() ) { + return false; + } + if ( p8[ 0 ].getNode( "8-9" ).isDuplication() ) { + return false; + } + if ( p8[ 0 ].getNode( "4-5-6-7-8-9" ).isDuplication() ) { return false; } + p8 = null; } catch ( final Exception e ) { - e.printStackTrace(); + e.printStackTrace( System.out ); return false; } return true; } - private static boolean testSpecies() { + private static boolean testSequenceDbWsTools1() { try { - final Species s1 = new BasicSpecies( "a" ); - final Species s2 = new BasicSpecies( "a" ); - final Species s3 = new BasicSpecies( "A" ); - final Species s4 = new BasicSpecies( "b" ); - if ( !s1.equals( s1 ) ) { + final PhylogenyNode n = new PhylogenyNode(); + n.setName( "NP_001025424" ); + Accession acc = SequenceDbWsTools.obtainSeqAccession( n ); + if ( ( acc == null ) || !acc.getSource().equals( Source.REFSEQ.toString() ) + || !acc.getValue().equals( "NP_001025424" ) ) { return false; } - if ( s1.getSpeciesId().equals( "x" ) ) { + n.setName( "340 0559 -- _NP_001025424_dsfdg15 05" ); + acc = SequenceDbWsTools.obtainSeqAccession( n ); + if ( ( acc == null ) || !acc.getSource().equals( Source.REFSEQ.toString() ) + || !acc.getValue().equals( "NP_001025424" ) ) { return false; } - if ( s1.getSpeciesId().equals( null ) ) { + n.setName( "NP_001025424.1" ); + acc = SequenceDbWsTools.obtainSeqAccession( n ); + if ( ( acc == null ) || !acc.getSource().equals( Source.REFSEQ.toString() ) + || !acc.getValue().equals( "NP_001025424" ) ) { return false; } - if ( !s1.equals( s2 ) ) { + n.setName( "NM_001030253" ); + acc = SequenceDbWsTools.obtainSeqAccession( n ); + if ( ( acc == null ) || !acc.getSource().equals( Source.REFSEQ.toString() ) + || !acc.getValue().equals( "NM_001030253" ) ) { return false; } - if ( s1.equals( s3 ) ) { + n.setName( "BCL2_HUMAN" ); + acc = SequenceDbWsTools.obtainSeqAccession( n ); + if ( ( acc == null ) || !acc.getSource().equals( Source.UNIPROT.toString() ) + || !acc.getValue().equals( "BCL2_HUMAN" ) ) { + System.out.println( acc.toString() ); return false; } - if ( s1.hashCode() != s1.hashCode() ) { + n.setName( "P10415" ); + acc = SequenceDbWsTools.obtainSeqAccession( n ); + if ( ( acc == null ) || !acc.getSource().equals( Source.UNIPROT.toString() ) + || !acc.getValue().equals( "P10415" ) ) { + System.out.println( acc.toString() ); return false; } - if ( s1.hashCode() != s2.hashCode() ) { + n.setName( " P10415 " ); + acc = SequenceDbWsTools.obtainSeqAccession( n ); + if ( ( acc == null ) || !acc.getSource().equals( Source.UNIPROT.toString() ) + || !acc.getValue().equals( "P10415" ) ) { + System.out.println( acc.toString() ); return false; } - if ( s1.hashCode() == s3.hashCode() ) { + n.setName( "_P10415|" ); + acc = SequenceDbWsTools.obtainSeqAccession( n ); + if ( ( acc == null ) || !acc.getSource().equals( Source.UNIPROT.toString() ) + || !acc.getValue().equals( "P10415" ) ) { + System.out.println( acc.toString() ); return false; } - if ( s1.compareTo( s1 ) != 0 ) { + n.setName( "AY695820" ); + acc = SequenceDbWsTools.obtainSeqAccession( n ); + if ( ( acc == null ) || !acc.getSource().equals( Source.NCBI.toString() ) + || !acc.getValue().equals( "AY695820" ) ) { + System.out.println( acc.toString() ); return false; } - if ( s1.compareTo( s2 ) != 0 ) { + n.setName( "_AY695820_" ); + acc = SequenceDbWsTools.obtainSeqAccession( n ); + if ( ( acc == null ) || !acc.getSource().equals( Source.NCBI.toString() ) + || !acc.getValue().equals( "AY695820" ) ) { + System.out.println( acc.toString() ); return false; } - if ( s1.compareTo( s3 ) != 0 ) { + n.setName( "AAA59452" ); + acc = SequenceDbWsTools.obtainSeqAccession( n ); + if ( ( acc == null ) || !acc.getSource().equals( Source.NCBI.toString() ) + || !acc.getValue().equals( "AAA59452" ) ) { + System.out.println( acc.toString() ); return false; } - if ( s1.compareTo( s4 ) >= 0 ) { + n.setName( "_AAA59452_" ); + acc = SequenceDbWsTools.obtainSeqAccession( n ); + if ( ( acc == null ) || !acc.getSource().equals( Source.NCBI.toString() ) + || !acc.getValue().equals( "AAA59452" ) ) { + System.out.println( acc.toString() ); return false; } - if ( s4.compareTo( s1 ) <= 0 ) { + n.setName( "AAA59452.1" ); + acc = SequenceDbWsTools.obtainSeqAccession( n ); + if ( ( acc == null ) || !acc.getSource().equals( Source.NCBI.toString() ) + || !acc.getValue().equals( "AAA59452.1" ) ) { + System.out.println( acc.toString() ); return false; } - if ( !s4.getSpeciesId().equals( "b" ) ) { + n.setName( "_AAA59452.1_" ); + acc = SequenceDbWsTools.obtainSeqAccession( n ); + if ( ( acc == null ) || !acc.getSource().equals( Source.NCBI.toString() ) + || !acc.getValue().equals( "AAA59452.1" ) ) { + System.out.println( acc.toString() ); return false; } - final Species s5 = new BasicSpecies( " C " ); - if ( !s5.getSpeciesId().equals( "C" ) ) { + n.setName( "GI:94894583" ); + acc = SequenceDbWsTools.obtainSeqAccession( n ); + if ( ( acc == null ) || !acc.getSource().equals( Source.GI.toString() ) + || !acc.getValue().equals( "94894583" ) ) { + System.out.println( acc.toString() ); return false; } - if ( s5.equals( s1 ) ) { + n.setName( "gi|71845847|1,4-alpha-glucan branching enzyme [Dechloromonas aromatica RCB]" ); + acc = SequenceDbWsTools.obtainSeqAccession( n ); + if ( ( acc == null ) || !acc.getSource().equals( Source.GI.toString() ) + || !acc.getValue().equals( "71845847" ) ) { + System.out.println( acc.toString() ); + return false; + } + n.setName( "gi|71845847|gb|AAZ45343.1| 1,4-alpha-glucan branching enzyme [Dechloromonas aromatica RCB]" ); + acc = SequenceDbWsTools.obtainSeqAccession( n ); + if ( ( acc == null ) || !acc.getSource().equals( Source.NCBI.toString() ) + || !acc.getValue().equals( "AAZ45343.1" ) ) { + System.out.println( acc.toString() ); return false; } } catch ( final Exception e ) { - e.printStackTrace( System.out ); return false; } return true; } - private static boolean testSplit() { + private static boolean testSequenceDbWsTools2() { try { - final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); - final Phylogeny p0 = factory.create( "(((A,B,C),D),(E,(F,G)))R", new NHXParser() )[ 0 ]; - //Archaeopteryx.createApplication( p0 ); - final Set ex = new HashSet(); - ex.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); - ex.add( PhylogenyNode.createInstanceFromNhxString( "B" ) ); - ex.add( PhylogenyNode.createInstanceFromNhxString( "C" ) ); - ex.add( PhylogenyNode.createInstanceFromNhxString( "D" ) ); - ex.add( PhylogenyNode.createInstanceFromNhxString( "E" ) ); - ex.add( PhylogenyNode.createInstanceFromNhxString( "F" ) ); - ex.add( PhylogenyNode.createInstanceFromNhxString( "G" ) ); - ex.add( PhylogenyNode.createInstanceFromNhxString( "X" ) ); - ex.add( PhylogenyNode.createInstanceFromNhxString( "Y" ) ); - final TreeSplitMatrix s0 = new TreeSplitMatrix( p0, false, ex ); - // System.out.println( s0.toString() ); - // - Set query_nodes = new HashSet(); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "B" ) ); - if ( s0.match( query_nodes ) ) { + final PhylogenyNode n1 = new PhylogenyNode( "NP_001025424" ); + SequenceDbWsTools.obtainSeqInformation( n1 ); + if ( !n1.getNodeData().getSequence().getName().equals( "Bcl2" ) ) { return false; } - query_nodes = new HashSet(); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "B" ) ); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "C" ) ); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "D" ) ); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "E" ) ); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "F" ) ); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "G" ) ); - if ( !s0.match( query_nodes ) ) { + if ( !n1.getNodeData().getTaxonomy().getScientificName().equals( "Danio rerio" ) ) { return false; } - // - query_nodes = new HashSet(); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "B" ) ); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "C" ) ); - if ( !s0.match( query_nodes ) ) { + if ( !n1.getNodeData().getSequence().getAccession().getSource().equals( Source.REFSEQ.toString() ) ) { return false; } - // - query_nodes = new HashSet(); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "D" ) ); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "E" ) ); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "F" ) ); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "G" ) ); - if ( !s0.match( query_nodes ) ) { + if ( !n1.getNodeData().getSequence().getAccession().getValue().equals( "NP_001025424" ) ) { return false; } - // - query_nodes = new HashSet(); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "B" ) ); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "C" ) ); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "D" ) ); - if ( !s0.match( query_nodes ) ) { + final PhylogenyNode n2 = new PhylogenyNode( "NM_001030253" ); + SequenceDbWsTools.obtainSeqInformation( n2 ); + if ( !n2.getNodeData().getSequence().getName().equals( "Danio rerio B-cell CLL/lymphoma 2a (bcl2a), mRNA" ) ) { return false; } - // - query_nodes = new HashSet(); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "E" ) ); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "F" ) ); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "G" ) ); - if ( !s0.match( query_nodes ) ) { + if ( !n2.getNodeData().getTaxonomy().getScientificName().equals( "Danio rerio" ) ) { return false; } - // - query_nodes = new HashSet(); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "F" ) ); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "G" ) ); - if ( !s0.match( query_nodes ) ) { + if ( !n2.getNodeData().getSequence().getAccession().getSource().equals( Source.REFSEQ.toString() ) ) { return false; } - // - query_nodes = new HashSet(); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "E" ) ); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "D" ) ); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "C" ) ); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "B" ) ); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); - if ( !s0.match( query_nodes ) ) { + if ( !n2.getNodeData().getSequence().getAccession().getValue().equals( "NM_001030253" ) ) { return false; } - // - query_nodes = new HashSet(); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "F" ) ); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "G" ) ); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "E" ) ); - if ( !s0.match( query_nodes ) ) { + final PhylogenyNode n3 = new PhylogenyNode( "NM_184234.2" ); + SequenceDbWsTools.obtainSeqInformation( n3 ); + if ( !n3.getNodeData().getSequence().getName() + .equals( "Homo sapiens RNA binding motif protein 39 (RBM39), transcript variant 1, mRNA" ) ) { return false; } - // - query_nodes = new HashSet(); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "F" ) ); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "G" ) ); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "E" ) ); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "D" ) ); - if ( !s0.match( query_nodes ) ) { + if ( !n3.getNodeData().getTaxonomy().getScientificName().equals( "Homo sapiens" ) ) { return false; } - // - query_nodes = new HashSet(); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "F" ) ); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); - if ( s0.match( query_nodes ) ) { + if ( !n3.getNodeData().getSequence().getAccession().getSource().equals( Source.REFSEQ.toString() ) ) { + return false; + } + if ( !n3.getNodeData().getSequence().getAccession().getValue().equals( "NM_184234" ) ) { + return false; + } + } + catch ( final IOException e ) { + System.out.println(); + System.out.println( "the following might be due to absence internet connection:" ); + e.printStackTrace( System.out ); + return true; + } + catch ( final Exception e ) { + e.printStackTrace(); + return false; + } + return true; + } + + private static boolean testSequenceIdParsing() { + try { + Accession id = SequenceAccessionTools.parseAccessorFromString( "gb_ADF31344_segmented_worms_" ); + if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getSource() ) + || !id.getValue().equals( "ADF31344" ) || !id.getSource().equals( "ncbi" ) ) { + if ( id != null ) { + System.out.println( "value =" + id.getValue() ); + System.out.println( "provider=" + id.getSource() ); + } return false; } - // - query_nodes = new HashSet(); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "E" ) ); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "B" ) ); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "C" ) ); - if ( s0.match( query_nodes ) ) { + id = SequenceAccessionTools.parseAccessorFromString( "segmented worms|gb_ADF31344" ); + if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getSource() ) + || !id.getValue().equals( "ADF31344" ) || !id.getSource().equals( "ncbi" ) ) { + if ( id != null ) { + System.out.println( "value =" + id.getValue() ); + System.out.println( "provider=" + id.getSource() ); + } return false; } - // - query_nodes = new HashSet(); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "F" ) ); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "G" ) ); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "E" ) ); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "D" ) ); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "C" ) ); - if ( s0.match( query_nodes ) ) { + id = SequenceAccessionTools.parseAccessorFromString( "segmented worms gb_ADF31344 and more" ); + if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getSource() ) + || !id.getValue().equals( "ADF31344" ) || !id.getSource().equals( "ncbi" ) ) { + if ( id != null ) { + System.out.println( "value =" + id.getValue() ); + System.out.println( "provider=" + id.getSource() ); + } return false; } - // - query_nodes = new HashSet(); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "B" ) ); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "D" ) ); - if ( s0.match( query_nodes ) ) { + id = SequenceAccessionTools.parseAccessorFromString( "gb_AAA96518_1" ); + if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getSource() ) + || !id.getValue().equals( "AAA96518" ) || !id.getSource().equals( "ncbi" ) ) { + if ( id != null ) { + System.out.println( "value =" + id.getValue() ); + System.out.println( "provider=" + id.getSource() ); + } return false; } - // - query_nodes = new HashSet(); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "D" ) ); - if ( s0.match( query_nodes ) ) { + id = SequenceAccessionTools.parseAccessorFromString( "gb_EHB07727_1_rodents_" ); + if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getSource() ) + || !id.getValue().equals( "EHB07727" ) || !id.getSource().equals( "ncbi" ) ) { + if ( id != null ) { + System.out.println( "value =" + id.getValue() ); + System.out.println( "provider=" + id.getSource() ); + } return false; } - // - query_nodes = new HashSet(); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "B" ) ); - if ( s0.match( query_nodes ) ) { + id = SequenceAccessionTools.parseAccessorFromString( "dbj_BAF37827_1_turtles_" ); + if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getSource() ) + || !id.getValue().equals( "BAF37827" ) || !id.getSource().equals( "ncbi" ) ) { + if ( id != null ) { + System.out.println( "value =" + id.getValue() ); + System.out.println( "provider=" + id.getSource() ); + } return false; } - // - query_nodes = new HashSet(); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "C" ) ); - if ( s0.match( query_nodes ) ) { + id = SequenceAccessionTools.parseAccessorFromString( "emb_CAA73223_1_primates_" ); + if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getSource() ) + || !id.getValue().equals( "CAA73223" ) || !id.getSource().equals( "ncbi" ) ) { + if ( id != null ) { + System.out.println( "value =" + id.getValue() ); + System.out.println( "provider=" + id.getSource() ); + } return false; } - // - query_nodes = new HashSet(); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "E" ) ); - if ( s0.match( query_nodes ) ) { + id = SequenceAccessionTools.parseAccessorFromString( "mites|ref_XP_002434188_1" ); + if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getSource() ) + || !id.getValue().equals( "XP_002434188" ) || !id.getSource().equals( "refseq" ) ) { + if ( id != null ) { + System.out.println( "value =" + id.getValue() ); + System.out.println( "provider=" + id.getSource() ); + } return false; } - // - query_nodes = new HashSet(); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "F" ) ); - if ( s0.match( query_nodes ) ) { + id = SequenceAccessionTools.parseAccessorFromString( "mites_ref_XP_002434188_1_bla_XP_12345" ); + if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getSource() ) + || !id.getValue().equals( "XP_002434188" ) || !id.getSource().equals( "refseq" ) ) { + if ( id != null ) { + System.out.println( "value =" + id.getValue() ); + System.out.println( "provider=" + id.getSource() ); + } return false; } - // - query_nodes = new HashSet(); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "G" ) ); - if ( s0.match( query_nodes ) ) { + id = SequenceAccessionTools.parseAccessorFromString( "P4A123" ); + if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getSource() ) + || !id.getValue().equals( "P4A123" ) || !id.getSource().equals( "uniprot" ) ) { + if ( id != null ) { + System.out.println( "value =" + id.getValue() ); + System.out.println( "provider=" + id.getSource() ); + } return false; } - // - query_nodes = new HashSet(); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "F" ) ); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "G" ) ); - if ( s0.match( query_nodes ) ) { + id = SequenceAccessionTools.parseAccessorFromString( "XP_12345" ); + if ( id != null ) { + System.out.println( "value =" + id.getValue() ); + System.out.println( "provider=" + id.getSource() ); return false; } - // - query_nodes = new HashSet(); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "B" ) ); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "D" ) ); - if ( s0.match( query_nodes ) ) { + id = SequenceAccessionTools.parseAccessorFromString( "N3B004Z009" ); + if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getSource() ) + || !id.getValue().equals( "N3B004Z009" ) || !id.getSource().equals( "uniprot" ) ) { + if ( id != null ) { + System.out.println( "value =" + id.getValue() ); + System.out.println( "provider=" + id.getSource() ); + } return false; } - // - query_nodes = new HashSet(); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "E" ) ); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "D" ) ); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); - if ( s0.match( query_nodes ) ) { + id = SequenceAccessionTools.parseAccessorFromString( "A4CAA4ZBB9" ); + if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getSource() ) + || !id.getValue().equals( "A4CAA4ZBB9" ) || !id.getSource().equals( "uniprot" ) ) { + if ( id != null ) { + System.out.println( "value =" + id.getValue() ); + System.out.println( "provider=" + id.getSource() ); + } return false; } - // - query_nodes = new HashSet(); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "E" ) ); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "D" ) ); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "G" ) ); - if ( s0.match( query_nodes ) ) { + id = SequenceAccessionTools.parseAccessorFromString( "ecoli_A4CAA4ZBB9_rt" ); + if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getSource() ) + || !id.getValue().equals( "A4CAA4ZBB9" ) || !id.getSource().equals( "uniprot" ) ) { + if ( id != null ) { + System.out.println( "value =" + id.getValue() ); + System.out.println( "provider=" + id.getSource() ); + } return false; } - ///////// - // query_nodes = new HashSet(); - // query_nodes.add( new PhylogenyNode( "X" ) ); - // query_nodes.add( new PhylogenyNode( "Y" ) ); - // query_nodes.add( new PhylogenyNode( "A" ) ); - // query_nodes.add( new PhylogenyNode( "B" ) ); - // query_nodes.add( new PhylogenyNode( "C" ) ); - // query_nodes.add( new PhylogenyNode( "D" ) ); - // query_nodes.add( new PhylogenyNode( "E" ) ); - // query_nodes.add( new PhylogenyNode( "F" ) ); - // query_nodes.add( new PhylogenyNode( "G" ) ); - // if ( !s0.match( query_nodes ) ) { - // return false; - // } - // query_nodes = new HashSet(); - // query_nodes.add( new PhylogenyNode( "X" ) ); - // query_nodes.add( new PhylogenyNode( "Y" ) ); - // query_nodes.add( new PhylogenyNode( "A" ) ); - // query_nodes.add( new PhylogenyNode( "B" ) ); - // query_nodes.add( new PhylogenyNode( "C" ) ); - // if ( !s0.match( query_nodes ) ) { - // return false; - // } - // // - // query_nodes = new HashSet(); - // query_nodes.add( new PhylogenyNode( "X" ) ); - // query_nodes.add( new PhylogenyNode( "Y" ) ); - // query_nodes.add( new PhylogenyNode( "D" ) ); - // query_nodes.add( new PhylogenyNode( "E" ) ); - // query_nodes.add( new PhylogenyNode( "F" ) ); - // query_nodes.add( new PhylogenyNode( "G" ) ); - // if ( !s0.match( query_nodes ) ) { - // return false; - // } - // // - // query_nodes = new HashSet(); - // query_nodes.add( new PhylogenyNode( "X" ) ); - // query_nodes.add( new PhylogenyNode( "Y" ) ); - // query_nodes.add( new PhylogenyNode( "A" ) ); - // query_nodes.add( new PhylogenyNode( "B" ) ); - // query_nodes.add( new PhylogenyNode( "C" ) ); - // query_nodes.add( new PhylogenyNode( "D" ) ); - // if ( !s0.match( query_nodes ) ) { - // return false; - // } - // // - // query_nodes = new HashSet(); - // query_nodes.add( new PhylogenyNode( "X" ) ); - // query_nodes.add( new PhylogenyNode( "Y" ) ); - // query_nodes.add( new PhylogenyNode( "E" ) ); - // query_nodes.add( new PhylogenyNode( "F" ) ); - // query_nodes.add( new PhylogenyNode( "G" ) ); - // if ( !s0.match( query_nodes ) ) { - // return false; - // } - // // - // query_nodes = new HashSet(); - // query_nodes.add( new PhylogenyNode( "X" ) ); - // query_nodes.add( new PhylogenyNode( "Y" ) ); - // query_nodes.add( new PhylogenyNode( "F" ) ); - // query_nodes.add( new PhylogenyNode( "G" ) ); - // if ( !s0.match( query_nodes ) ) { - // return false; - // } - // - query_nodes = new HashSet(); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "X" ) ); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "Y" ) ); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "E" ) ); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "G" ) ); - if ( s0.match( query_nodes ) ) { + id = SequenceAccessionTools.parseAccessorFromString( "Q4CAA4ZBB9" ); + if ( id != null ) { + System.out.println( "value =" + id.getValue() ); + System.out.println( "provider=" + id.getSource() ); return false; } - // - query_nodes = new HashSet(); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "X" ) ); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "Y" ) ); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "B" ) ); - if ( s0.match( query_nodes ) ) { + } + catch ( final Exception e ) { + e.printStackTrace( System.out ); + return false; + } + return true; + } + + private static boolean testSequenceWriter() { + try { + final String n = ForesterUtil.LINE_SEPARATOR; + if ( !SequenceWriter.toFasta( "name", "awes", 5 ).toString().equals( ">name" + n + "awes" ) ) { return false; } - /////////////////////////// - // - query_nodes = new HashSet(); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "X" ) ); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "Y" ) ); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "D" ) ); - if ( s0.match( query_nodes ) ) { + if ( !SequenceWriter.toFasta( "name", "awes", 4 ).toString().equals( ">name" + n + "awes" ) ) { return false; } - // - query_nodes = new HashSet(); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "X" ) ); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "Y" ) ); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "B" ) ); - if ( s0.match( query_nodes ) ) { + if ( !SequenceWriter.toFasta( "name", "awes", 3 ).toString().equals( ">name" + n + "awe" + n + "s" ) ) { return false; } - // - query_nodes = new HashSet(); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "X" ) ); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "Y" ) ); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "C" ) ); - if ( s0.match( query_nodes ) ) { + if ( !SequenceWriter.toFasta( "name", "awes", 2 ).toString().equals( ">name" + n + "aw" + n + "es" ) ) { return false; } - // - query_nodes = new HashSet(); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "X" ) ); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "Y" ) ); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "E" ) ); - if ( s0.match( query_nodes ) ) { + if ( !SequenceWriter.toFasta( "name", "awes", 1 ).toString() + .equals( ">name" + n + "a" + n + "w" + n + "e" + n + "s" ) ) { return false; } - // - query_nodes = new HashSet(); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "X" ) ); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "Y" ) ); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "F" ) ); - if ( s0.match( query_nodes ) ) { + if ( !SequenceWriter.toFasta( "name", "abcdefghij", 3 ).toString() + .equals( ">name" + n + "abc" + n + "def" + n + "ghi" + n + "j" ) ) { return false; } - // - query_nodes = new HashSet(); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "Y" ) ); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "G" ) ); - if ( s0.match( query_nodes ) ) { + } + catch ( final Exception e ) { + e.printStackTrace(); + return false; + } + return true; + } + + private static boolean testSpecies() { + try { + final Species s1 = new BasicSpecies( "a" ); + final Species s2 = new BasicSpecies( "a" ); + final Species s3 = new BasicSpecies( "A" ); + final Species s4 = new BasicSpecies( "b" ); + if ( !s1.equals( s1 ) ) { return false; } - // - query_nodes = new HashSet(); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "X" ) ); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "Y" ) ); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "F" ) ); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "G" ) ); - if ( s0.match( query_nodes ) ) { + if ( s1.getSpeciesId().equals( "x" ) ) { return false; } - // - query_nodes = new HashSet(); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "X" ) ); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "Y" ) ); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "B" ) ); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "D" ) ); - if ( s0.match( query_nodes ) ) { + if ( s1.getSpeciesId().equals( null ) ) { return false; } - // - query_nodes = new HashSet(); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "X" ) ); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "Y" ) ); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "E" ) ); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "D" ) ); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); - if ( s0.match( query_nodes ) ) { + if ( !s1.equals( s2 ) ) { return false; } - // - query_nodes = new HashSet(); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "X" ) ); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "Y" ) ); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "E" ) ); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "D" ) ); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "G" ) ); - if ( s0.match( query_nodes ) ) { + if ( s1.equals( s3 ) ) { + return false; + } + if ( s1.hashCode() != s1.hashCode() ) { + return false; + } + if ( s1.hashCode() != s2.hashCode() ) { + return false; + } + if ( s1.hashCode() == s3.hashCode() ) { + return false; + } + if ( s1.compareTo( s1 ) != 0 ) { + return false; + } + if ( s1.compareTo( s2 ) != 0 ) { + return false; + } + if ( s1.compareTo( s3 ) != 0 ) { + return false; + } + if ( s1.compareTo( s4 ) >= 0 ) { + return false; + } + if ( s4.compareTo( s1 ) <= 0 ) { + return false; + } + if ( !s4.getSpeciesId().equals( "b" ) ) { + return false; + } + final Species s5 = new BasicSpecies( " C " ); + if ( !s5.getSpeciesId().equals( "C" ) ) { + return false; + } + if ( s5.equals( s1 ) ) { return false; } } catch ( final Exception e ) { - e.printStackTrace(); + e.printStackTrace( System.out ); return false; } return true; } - private static boolean testSplitStrict() { + private static boolean testSplit() { try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); final Phylogeny p0 = factory.create( "(((A,B,C),D),(E,(F,G)))R", new NHXParser() )[ 0 ]; + //Archaeopteryx.createApplication( p0 ); final Set ex = new HashSet(); ex.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); ex.add( PhylogenyNode.createInstanceFromNhxString( "B" ) ); @@ -10619,7 +11561,11 @@ public final class Test { ex.add( PhylogenyNode.createInstanceFromNhxString( "E" ) ); ex.add( PhylogenyNode.createInstanceFromNhxString( "F" ) ); ex.add( PhylogenyNode.createInstanceFromNhxString( "G" ) ); - final TreeSplitMatrix s0 = new TreeSplitMatrix( p0, true, ex ); + ex.add( PhylogenyNode.createInstanceFromNhxString( "X" ) ); + ex.add( PhylogenyNode.createInstanceFromNhxString( "Y" ) ); + final TreeSplitMatrix s0 = new TreeSplitMatrix( p0, false, ex ); + // System.out.println( s0.toString() ); + // Set query_nodes = new HashSet(); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "B" ) ); @@ -10671,14 +11617,12 @@ public final class Test { if ( !s0.match( query_nodes ) ) { return false; } - // query_nodes = new HashSet(); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "F" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "G" ) ); if ( !s0.match( query_nodes ) ) { return false; } - // query_nodes = new HashSet(); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "E" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "D" ) ); @@ -10688,7 +11632,6 @@ public final class Test { if ( !s0.match( query_nodes ) ) { return false; } - // query_nodes = new HashSet(); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "F" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "G" ) ); @@ -10696,51 +11639,196 @@ public final class Test { if ( !s0.match( query_nodes ) ) { return false; } - // query_nodes = new HashSet(); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "F" ) ); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "G" ) ); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "F" ) ); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "G" ) ); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "E" ) ); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "D" ) ); + if ( !s0.match( query_nodes ) ) { + return false; + } + query_nodes = new HashSet(); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "F" ) ); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); + if ( s0.match( query_nodes ) ) { + return false; + } + query_nodes = new HashSet(); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "E" ) ); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "B" ) ); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "C" ) ); + if ( s0.match( query_nodes ) ) { + return false; + } + query_nodes = new HashSet(); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "F" ) ); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "G" ) ); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "E" ) ); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "D" ) ); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "C" ) ); + if ( s0.match( query_nodes ) ) { + return false; + } + query_nodes = new HashSet(); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "B" ) ); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "D" ) ); + if ( s0.match( query_nodes ) ) { + return false; + } + query_nodes = new HashSet(); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "D" ) ); + if ( s0.match( query_nodes ) ) { + return false; + } + query_nodes = new HashSet(); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "B" ) ); + if ( s0.match( query_nodes ) ) { + return false; + } + query_nodes = new HashSet(); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "C" ) ); + if ( s0.match( query_nodes ) ) { + return false; + } + query_nodes = new HashSet(); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "E" ) ); + if ( s0.match( query_nodes ) ) { + return false; + } + query_nodes = new HashSet(); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "F" ) ); + if ( s0.match( query_nodes ) ) { + return false; + } + query_nodes = new HashSet(); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "G" ) ); + if ( s0.match( query_nodes ) ) { + return false; + } + query_nodes = new HashSet(); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "F" ) ); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "G" ) ); + if ( s0.match( query_nodes ) ) { + return false; + } + query_nodes = new HashSet(); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "B" ) ); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "D" ) ); + if ( s0.match( query_nodes ) ) { + return false; + } + query_nodes = new HashSet(); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "E" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "D" ) ); - if ( !s0.match( query_nodes ) ) { - return false; - } - // - query_nodes = new HashSet(); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "F" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); if ( s0.match( query_nodes ) ) { return false; } - // query_nodes = new HashSet(); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "E" ) ); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "B" ) ); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "C" ) ); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "D" ) ); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "G" ) ); if ( s0.match( query_nodes ) ) { return false; } + ///////// + // query_nodes = new HashSet(); + // query_nodes.add( new PhylogenyNode( "X" ) ); + // query_nodes.add( new PhylogenyNode( "Y" ) ); + // query_nodes.add( new PhylogenyNode( "A" ) ); + // query_nodes.add( new PhylogenyNode( "B" ) ); + // query_nodes.add( new PhylogenyNode( "C" ) ); + // query_nodes.add( new PhylogenyNode( "D" ) ); + // query_nodes.add( new PhylogenyNode( "E" ) ); + // query_nodes.add( new PhylogenyNode( "F" ) ); + // query_nodes.add( new PhylogenyNode( "G" ) ); + // if ( !s0.match( query_nodes ) ) { + // return false; + // } + // query_nodes = new HashSet(); + // query_nodes.add( new PhylogenyNode( "X" ) ); + // query_nodes.add( new PhylogenyNode( "Y" ) ); + // query_nodes.add( new PhylogenyNode( "A" ) ); + // query_nodes.add( new PhylogenyNode( "B" ) ); + // query_nodes.add( new PhylogenyNode( "C" ) ); + // if ( !s0.match( query_nodes ) ) { + // return false; + // } + // // + // query_nodes = new HashSet(); + // query_nodes.add( new PhylogenyNode( "X" ) ); + // query_nodes.add( new PhylogenyNode( "Y" ) ); + // query_nodes.add( new PhylogenyNode( "D" ) ); + // query_nodes.add( new PhylogenyNode( "E" ) ); + // query_nodes.add( new PhylogenyNode( "F" ) ); + // query_nodes.add( new PhylogenyNode( "G" ) ); + // if ( !s0.match( query_nodes ) ) { + // return false; + // } + // // + // query_nodes = new HashSet(); + // query_nodes.add( new PhylogenyNode( "X" ) ); + // query_nodes.add( new PhylogenyNode( "Y" ) ); + // query_nodes.add( new PhylogenyNode( "A" ) ); + // query_nodes.add( new PhylogenyNode( "B" ) ); + // query_nodes.add( new PhylogenyNode( "C" ) ); + // query_nodes.add( new PhylogenyNode( "D" ) ); + // if ( !s0.match( query_nodes ) ) { + // return false; + // } + // // + // query_nodes = new HashSet(); + // query_nodes.add( new PhylogenyNode( "X" ) ); + // query_nodes.add( new PhylogenyNode( "Y" ) ); + // query_nodes.add( new PhylogenyNode( "E" ) ); + // query_nodes.add( new PhylogenyNode( "F" ) ); + // query_nodes.add( new PhylogenyNode( "G" ) ); + // if ( !s0.match( query_nodes ) ) { + // return false; + // } + // // + // query_nodes = new HashSet(); + // query_nodes.add( new PhylogenyNode( "X" ) ); + // query_nodes.add( new PhylogenyNode( "Y" ) ); + // query_nodes.add( new PhylogenyNode( "F" ) ); + // query_nodes.add( new PhylogenyNode( "G" ) ); + // if ( !s0.match( query_nodes ) ) { + // return false; + // } // query_nodes = new HashSet(); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "F" ) ); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "G" ) ); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "X" ) ); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "Y" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "E" ) ); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "D" ) ); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "C" ) ); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "G" ) ); if ( s0.match( query_nodes ) ) { return false; } // query_nodes = new HashSet(); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "X" ) ); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "Y" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "B" ) ); - query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "D" ) ); if ( s0.match( query_nodes ) ) { return false; } + /////////////////////////// // query_nodes = new HashSet(); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "X" ) ); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "Y" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "D" ) ); if ( s0.match( query_nodes ) ) { @@ -10748,6 +11836,8 @@ public final class Test { } // query_nodes = new HashSet(); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "X" ) ); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "Y" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "B" ) ); if ( s0.match( query_nodes ) ) { @@ -10755,6 +11845,8 @@ public final class Test { } // query_nodes = new HashSet(); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "X" ) ); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "Y" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "C" ) ); if ( s0.match( query_nodes ) ) { @@ -10762,6 +11854,8 @@ public final class Test { } // query_nodes = new HashSet(); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "X" ) ); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "Y" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "E" ) ); if ( s0.match( query_nodes ) ) { @@ -10769,6 +11863,8 @@ public final class Test { } // query_nodes = new HashSet(); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "X" ) ); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "Y" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "F" ) ); if ( s0.match( query_nodes ) ) { @@ -10776,6 +11872,7 @@ public final class Test { } // query_nodes = new HashSet(); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "Y" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "G" ) ); if ( s0.match( query_nodes ) ) { @@ -10783,6 +11880,8 @@ public final class Test { } // query_nodes = new HashSet(); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "X" ) ); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "Y" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "F" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "G" ) ); @@ -10791,6 +11890,8 @@ public final class Test { } // query_nodes = new HashSet(); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "X" ) ); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "Y" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "B" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "D" ) ); @@ -10799,6 +11900,8 @@ public final class Test { } // query_nodes = new HashSet(); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "X" ) ); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "Y" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "E" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "D" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); @@ -10807,6 +11910,8 @@ public final class Test { } // query_nodes = new HashSet(); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "X" ) ); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "Y" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "E" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "D" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); @@ -10822,356 +11927,286 @@ public final class Test { return true; } - private static boolean testSubtreeDeletion() { - try { - final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); - final Phylogeny t1 = factory.create( "((A,B,C)abc,(D,E,F)def)r", new NHXParser() )[ 0 ]; - t1.deleteSubtree( t1.getNode( "A" ), false ); - if ( t1.getNumberOfExternalNodes() != 5 ) { - return false; - } - t1.toNewHampshireX(); - t1.deleteSubtree( t1.getNode( "E" ), false ); - if ( t1.getNumberOfExternalNodes() != 4 ) { - return false; - } - t1.toNewHampshireX(); - t1.deleteSubtree( t1.getNode( "F" ), false ); - if ( t1.getNumberOfExternalNodes() != 3 ) { - return false; - } - t1.toNewHampshireX(); - t1.deleteSubtree( t1.getNode( "D" ), false ); - t1.toNewHampshireX(); - if ( t1.getNumberOfExternalNodes() != 3 ) { - return false; - } - t1.deleteSubtree( t1.getNode( "def" ), false ); - t1.toNewHampshireX(); - if ( t1.getNumberOfExternalNodes() != 2 ) { - return false; - } - t1.deleteSubtree( t1.getNode( "B" ), false ); - t1.toNewHampshireX(); - if ( t1.getNumberOfExternalNodes() != 1 ) { - return false; - } - t1.deleteSubtree( t1.getNode( "C" ), false ); - t1.toNewHampshireX(); - if ( t1.getNumberOfExternalNodes() != 1 ) { - return false; - } - t1.deleteSubtree( t1.getNode( "abc" ), false ); - t1.toNewHampshireX(); - if ( t1.getNumberOfExternalNodes() != 1 ) { - return false; - } - t1.deleteSubtree( t1.getNode( "r" ), false ); - if ( t1.getNumberOfExternalNodes() != 0 ) { - return false; - } - if ( !t1.isEmpty() ) { - return false; - } - final Phylogeny t2 = factory.create( "(((1,2,3)A,B,C)abc,(D,E,F)def)r", new NHXParser() )[ 0 ]; - t2.deleteSubtree( t2.getNode( "A" ), false ); - t2.toNewHampshireX(); - if ( t2.getNumberOfExternalNodes() != 5 ) { - return false; - } - t2.deleteSubtree( t2.getNode( "abc" ), false ); - t2.toNewHampshireX(); - if ( t2.getNumberOfExternalNodes() != 3 ) { - return false; - } - t2.deleteSubtree( t2.getNode( "def" ), false ); - t2.toNewHampshireX(); - if ( t2.getNumberOfExternalNodes() != 1 ) { - return false; - } - } - catch ( final Exception e ) { - e.printStackTrace( System.out ); - return false; - } - return true; - } - - private static boolean testSupportCount() { + private static boolean testSplitStrict() { try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); - final Phylogeny t0_1 = factory.create( "(((A,B),C),(D,E))", new NHXParser() )[ 0 ]; - final Phylogeny[] phylogenies_1 = factory.create( "(((A,B),C),(D,E)) " + "(((C,B),A),(D,E))" - + "(((A,B),C),(D,E)) " + "(((A,B),C),(D,E))" - + "(((A,B),C),(D,E))" + "(((C,B),A),(D,E))" - + "(((E,B),D),(C,A))" + "(((C,B),A),(D,E))" - + "(((A,B),C),(D,E))" + "(((A,B),C),(D,E))", - new NHXParser() ); - SupportCount.count( t0_1, phylogenies_1, true, false ); - final Phylogeny t0_2 = factory.create( "(((((A,B),C),D),E),(F,G))", new NHXParser() )[ 0 ]; - final Phylogeny[] phylogenies_2 = factory.create( "(((((A,B),C),D),E),(F,G))" - + "(((((A,B),C),D),E),((F,G),X))" - + "(((((A,Y),B),C),D),((F,G),E))" - + "(((((A,B),C),D),E),(F,G))" - + "(((((A,B),C),D),E),(F,G))" - + "(((((A,B),C),D),E),(F,G))" - + "(((((A,B),C),D),E),(F,G),Z)" - + "(((((A,B),C),D),E),(F,G))" - + "((((((A,B),C),D),E),F),G)" - + "(((((X,Y),F,G),E),((A,B),C)),D)", - new NHXParser() ); - SupportCount.count( t0_2, phylogenies_2, true, false ); - final PhylogenyNodeIterator it = t0_2.iteratorPostorder(); - while ( it.hasNext() ) { - final PhylogenyNode n = it.next(); - if ( !n.isExternal() && ( PhylogenyMethods.getConfidenceValue( n ) != 10 ) ) { - return false; - } - } - final Phylogeny t0_3 = factory.create( "(((A,B)ab,C)abc,((D,E)de,F)def)", new NHXParser() )[ 0 ]; - final Phylogeny[] phylogenies_3 = factory.create( "(((A,B),C),((D,E),F))" + "(((A,C),B),((D,F),E))" - + "(((C,A),B),((F,D),E))" + "(((A,B),F),((D,E),C))" + "(((((A,B),C),D),E),F)", new NHXParser() ); - SupportCount.count( t0_3, phylogenies_3, true, false ); - t0_3.reRoot( t0_3.getNode( "def" ).getId() ); - if ( PhylogenyMethods.getConfidenceValue( t0_3.getNode( "ab" ) ) != 3 ) { - return false; - } - if ( PhylogenyMethods.getConfidenceValue( t0_3.getNode( "abc" ) ) != 4 ) { - return false; - } - if ( PhylogenyMethods.getConfidenceValue( t0_3.getNode( "def" ) ) != 4 ) { - return false; - } - if ( PhylogenyMethods.getConfidenceValue( t0_3.getNode( "de" ) ) != 2 ) { - return false; - } - if ( PhylogenyMethods.getConfidenceValue( t0_3.getNode( "A" ) ) != 5 ) { - return false; - } - if ( PhylogenyMethods.getConfidenceValue( t0_3.getNode( "B" ) ) != 5 ) { - return false; - } - if ( PhylogenyMethods.getConfidenceValue( t0_3.getNode( "C" ) ) != 5 ) { - return false; - } - if ( PhylogenyMethods.getConfidenceValue( t0_3.getNode( "D" ) ) != 5 ) { - return false; - } - if ( PhylogenyMethods.getConfidenceValue( t0_3.getNode( "E" ) ) != 5 ) { - return false; - } - if ( PhylogenyMethods.getConfidenceValue( t0_3.getNode( "F" ) ) != 5 ) { - return false; - } - final Phylogeny t0_4 = factory.create( "(((((A,B)1,C)2,D)3,E)4,F)", new NHXParser() )[ 0 ]; - final Phylogeny[] phylogenies_4 = factory.create( "((((((A,X),C),B),D),E),F) " - + "(((A,B,Z),C,Q),(((D,Y),E),F))", new NHXParser() ); - SupportCount.count( t0_4, phylogenies_4, true, false ); - t0_4.reRoot( t0_4.getNode( "F" ).getId() ); - if ( PhylogenyMethods.getConfidenceValue( t0_4.getNode( "1" ) ) != 1 ) { + final Phylogeny p0 = factory.create( "(((A,B,C),D),(E,(F,G)))R", new NHXParser() )[ 0 ]; + final Set ex = new HashSet(); + ex.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); + ex.add( PhylogenyNode.createInstanceFromNhxString( "B" ) ); + ex.add( PhylogenyNode.createInstanceFromNhxString( "C" ) ); + ex.add( PhylogenyNode.createInstanceFromNhxString( "D" ) ); + ex.add( PhylogenyNode.createInstanceFromNhxString( "E" ) ); + ex.add( PhylogenyNode.createInstanceFromNhxString( "F" ) ); + ex.add( PhylogenyNode.createInstanceFromNhxString( "G" ) ); + final TreeSplitMatrix s0 = new TreeSplitMatrix( p0, true, ex ); + Set query_nodes = new HashSet(); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "B" ) ); + if ( s0.match( query_nodes ) ) { return false; } - if ( PhylogenyMethods.getConfidenceValue( t0_4.getNode( "2" ) ) != 2 ) { + query_nodes = new HashSet(); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "B" ) ); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "C" ) ); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "D" ) ); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "E" ) ); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "F" ) ); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "G" ) ); + if ( !s0.match( query_nodes ) ) { return false; } - if ( PhylogenyMethods.getConfidenceValue( t0_4.getNode( "3" ) ) != 1 ) { + // + query_nodes = new HashSet(); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "B" ) ); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "C" ) ); + if ( !s0.match( query_nodes ) ) { return false; } - if ( PhylogenyMethods.getConfidenceValue( t0_4.getNode( "4" ) ) != 2 ) { + // + query_nodes = new HashSet(); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "D" ) ); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "E" ) ); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "F" ) ); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "G" ) ); + if ( !s0.match( query_nodes ) ) { return false; } - if ( PhylogenyMethods.getConfidenceValue( t0_4.getNode( "A" ) ) != 2 ) { + // + query_nodes = new HashSet(); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "B" ) ); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "C" ) ); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "D" ) ); + if ( !s0.match( query_nodes ) ) { return false; } - if ( PhylogenyMethods.getConfidenceValue( t0_4.getNode( "B" ) ) != 2 ) { + // + query_nodes = new HashSet(); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "E" ) ); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "F" ) ); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "G" ) ); + if ( !s0.match( query_nodes ) ) { return false; } - if ( PhylogenyMethods.getConfidenceValue( t0_4.getNode( "C" ) ) != 2 ) { + // + query_nodes = new HashSet(); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "F" ) ); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "G" ) ); + if ( !s0.match( query_nodes ) ) { return false; } - if ( PhylogenyMethods.getConfidenceValue( t0_4.getNode( "D" ) ) != 2 ) { + // + query_nodes = new HashSet(); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "E" ) ); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "D" ) ); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "C" ) ); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "B" ) ); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); + if ( !s0.match( query_nodes ) ) { return false; } - if ( PhylogenyMethods.getConfidenceValue( t0_4.getNode( "E" ) ) != 2 ) { + // + query_nodes = new HashSet(); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "F" ) ); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "G" ) ); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "E" ) ); + if ( !s0.match( query_nodes ) ) { return false; } - if ( PhylogenyMethods.getConfidenceValue( t0_4.getNode( "F" ) ) != 2 ) { + // + query_nodes = new HashSet(); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "F" ) ); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "G" ) ); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "E" ) ); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "D" ) ); + if ( !s0.match( query_nodes ) ) { return false; } - Phylogeny a = factory.create( "(((((A,B)1,C)2,D)3,E)4,F)", new NHXParser() )[ 0 ]; - final Phylogeny b1 = factory.create( "(((((B,A)1,C)2,D)3,E)4,F)", new NHXParser() )[ 0 ]; - double d = SupportCount.compare( b1, a, true, true, true ); - if ( !Test.isEqual( d, 5.0 / 5.0 ) ) { + // + query_nodes = new HashSet(); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "F" ) ); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); + if ( s0.match( query_nodes ) ) { return false; } - a = factory.create( "(((((A,B)1,C)2,D)3,E)4,F)", new NHXParser() )[ 0 ]; - final Phylogeny b2 = factory.create( "(((((C,B)1,A)2,D)3,E)4,F)", new NHXParser() )[ 0 ]; - d = SupportCount.compare( b2, a, true, true, true ); - if ( !Test.isEqual( d, 4.0 / 5.0 ) ) { + // + query_nodes = new HashSet(); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "E" ) ); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "B" ) ); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "C" ) ); + if ( s0.match( query_nodes ) ) { return false; } - a = factory.create( "(((((A,B)1,C)2,D)3,E)4,F)", new NHXParser() )[ 0 ]; - final Phylogeny b3 = factory.create( "(((((F,C)1,A)2,B)3,D)4,E)", new NHXParser() )[ 0 ]; - d = SupportCount.compare( b3, a, true, true, true ); - if ( !Test.isEqual( d, 2.0 / 5.0 ) ) { + // + query_nodes = new HashSet(); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "F" ) ); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "G" ) ); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "E" ) ); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "D" ) ); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "C" ) ); + if ( s0.match( query_nodes ) ) { return false; } - a = factory.create( "(((((A,B)1,C)2,D)3,E)4,F)r", new NHXParser() )[ 0 ]; - final Phylogeny b4 = factory.create( "(((((F,C)1,A)2,B)3,D)4,E)r", new NHXParser() )[ 0 ]; - d = SupportCount.compare( b4, a, true, true, false ); - if ( !Test.isEqual( d, 1.0 / 5.0 ) ) { + // + query_nodes = new HashSet(); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "B" ) ); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "D" ) ); + if ( s0.match( query_nodes ) ) { return false; } - } - catch ( final Exception e ) { - e.printStackTrace( System.out ); - return false; - } - return true; - } - - private static boolean testSupportTransfer() { - try { - final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); - final Phylogeny p1 = factory.create( "(((A,B)ab:97,C)abc:57,((D,E)de:10,(F,G)fg:50,(H,I)hi:64)defghi)", - new NHXParser() )[ 0 ]; - final Phylogeny p2 = factory - .create( "(((A:0.1,B:0.3)ab:0.4,C)abc:0.5,((D,E)de,(F,G)fg,(H,I)hi:0.59)defghi)", new NHXParser() )[ 0 ]; - if ( PhylogenyMethods.getConfidenceValue( p2.getNode( "ab" ) ) >= 0.0 ) { + // + query_nodes = new HashSet(); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "D" ) ); + if ( s0.match( query_nodes ) ) { return false; } - if ( PhylogenyMethods.getConfidenceValue( p2.getNode( "abc" ) ) >= 0.0 ) { + // + query_nodes = new HashSet(); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "B" ) ); + if ( s0.match( query_nodes ) ) { return false; } - support_transfer.moveBranchLengthsToBootstrap( p1 ); - support_transfer.transferSupportValues( p1, p2 ); - if ( p2.getNode( "ab" ).getDistanceToParent() != 0.4 ) { + // + query_nodes = new HashSet(); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "C" ) ); + if ( s0.match( query_nodes ) ) { return false; } - if ( p2.getNode( "abc" ).getDistanceToParent() != 0.5 ) { + // + query_nodes = new HashSet(); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "E" ) ); + if ( s0.match( query_nodes ) ) { return false; } - if ( p2.getNode( "hi" ).getDistanceToParent() != 0.59 ) { + // + query_nodes = new HashSet(); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "F" ) ); + if ( s0.match( query_nodes ) ) { return false; } - if ( PhylogenyMethods.getConfidenceValue( p2.getNode( "ab" ) ) != 97 ) { + // + query_nodes = new HashSet(); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "G" ) ); + if ( s0.match( query_nodes ) ) { return false; } - if ( PhylogenyMethods.getConfidenceValue( p2.getNode( "abc" ) ) != 57 ) { + // + query_nodes = new HashSet(); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "F" ) ); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "G" ) ); + if ( s0.match( query_nodes ) ) { return false; } - if ( PhylogenyMethods.getConfidenceValue( p2.getNode( "de" ) ) != 10 ) { + // + query_nodes = new HashSet(); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "B" ) ); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "D" ) ); + if ( s0.match( query_nodes ) ) { return false; } - if ( PhylogenyMethods.getConfidenceValue( p2.getNode( "fg" ) ) != 50 ) { + // + query_nodes = new HashSet(); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "E" ) ); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "D" ) ); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); + if ( s0.match( query_nodes ) ) { return false; } - if ( PhylogenyMethods.getConfidenceValue( p2.getNode( "hi" ) ) != 64 ) { + // + query_nodes = new HashSet(); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "E" ) ); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "D" ) ); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); + query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "G" ) ); + if ( s0.match( query_nodes ) ) { return false; } } catch ( final Exception e ) { - e.printStackTrace( System.out ); + e.printStackTrace(); return false; } return true; } - private static boolean testTaxonomyExtraction() { + private static boolean testSubtreeDeletion() { try { - final PhylogenyNode n0 = PhylogenyNode - .createInstanceFromNhxString( "sd_12345678", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); - if ( n0.getNodeData().isHasTaxonomy() ) { - return false; - } - final PhylogenyNode n1 = PhylogenyNode - .createInstanceFromNhxString( "sd_12345x", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); - if ( n1.getNodeData().isHasTaxonomy() ) { - System.out.println( n1.toString() ); - return false; - } - final PhylogenyNode n2x = PhylogenyNode - .createInstanceFromNhxString( "12345", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); - if ( n2x.getNodeData().isHasTaxonomy() ) { - return false; - } - final PhylogenyNode n3 = PhylogenyNode - .createInstanceFromNhxString( "blag_12345", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); - if ( !n3.getNodeData().getTaxonomy().getIdentifier().getValue().equals( "12345" ) ) { - System.out.println( n3.toString() ); + final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); + final Phylogeny t1 = factory.create( "((A,B,C)abc,(D,E,F)def)r", new NHXParser() )[ 0 ]; + t1.deleteSubtree( t1.getNode( "A" ), false ); + if ( t1.getNumberOfExternalNodes() != 5 ) { return false; } - final PhylogenyNode n4 = PhylogenyNode - .createInstanceFromNhxString( "blag-12345", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); - if ( n4.getNodeData().isHasTaxonomy() ) { - System.out.println( n4.toString() ); + t1.toNewHampshireX(); + t1.deleteSubtree( t1.getNode( "E" ), false ); + if ( t1.getNumberOfExternalNodes() != 4 ) { return false; } - final PhylogenyNode n5 = PhylogenyNode - .createInstanceFromNhxString( "12345-blag", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); - if ( n5.getNodeData().isHasTaxonomy() ) { - System.out.println( n5.toString() ); + t1.toNewHampshireX(); + t1.deleteSubtree( t1.getNode( "F" ), false ); + if ( t1.getNumberOfExternalNodes() != 3 ) { return false; } - final PhylogenyNode n6 = PhylogenyNode - .createInstanceFromNhxString( "blag-12345-blag", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); - if ( n6.getNodeData().isHasTaxonomy() ) { - System.out.println( n6.toString() ); + t1.toNewHampshireX(); + t1.deleteSubtree( t1.getNode( "D" ), false ); + t1.toNewHampshireX(); + if ( t1.getNumberOfExternalNodes() != 3 ) { return false; } - final PhylogenyNode n7 = PhylogenyNode - .createInstanceFromNhxString( "blag-12345_blag", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); - if ( n7.getNodeData().isHasTaxonomy() ) { - System.out.println( n7.toString() ); + t1.deleteSubtree( t1.getNode( "def" ), false ); + t1.toNewHampshireX(); + if ( t1.getNumberOfExternalNodes() != 2 ) { return false; } - final PhylogenyNode n8 = PhylogenyNode - .createInstanceFromNhxString( "blag_12345-blag", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); - if ( !n8.getNodeData().getTaxonomy().getIdentifier().getValue().equals( "12345" ) ) { - System.out.println( n8.toString() ); + t1.deleteSubtree( t1.getNode( "B" ), false ); + t1.toNewHampshireX(); + if ( t1.getNumberOfExternalNodes() != 1 ) { return false; } - final PhylogenyNode n9 = PhylogenyNode - .createInstanceFromNhxString( "blag_12345/blag", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); - if ( !n9.getNodeData().getTaxonomy().getIdentifier().getValue().equals( "12345" ) ) { - System.out.println( n9.toString() ); + t1.deleteSubtree( t1.getNode( "C" ), false ); + t1.toNewHampshireX(); + if ( t1.getNumberOfExternalNodes() != 1 ) { return false; } - final PhylogenyNode n10x = PhylogenyNode - .createInstanceFromNhxString( "blag_12X45-blag", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); - if ( n10x.getNodeData().isHasTaxonomy() ) { - System.out.println( n10x.toString() ); + t1.deleteSubtree( t1.getNode( "abc" ), false ); + t1.toNewHampshireX(); + if ( t1.getNumberOfExternalNodes() != 1 ) { return false; } - final PhylogenyNode n10xx = PhylogenyNode - .createInstanceFromNhxString( "blag_1YX45-blag", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); - if ( n10xx.getNodeData().isHasTaxonomy() ) { - System.out.println( n10xx.toString() ); + t1.deleteSubtree( t1.getNode( "r" ), false ); + if ( t1.getNumberOfExternalNodes() != 0 ) { return false; } - final PhylogenyNode n10 = PhylogenyNode - .createInstanceFromNhxString( "blag_9YX45-blag", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); - if ( !n10.getNodeData().getTaxonomy().getTaxonomyCode().equals( "9YX45" ) ) { - System.out.println( n10.toString() ); + if ( !t1.isEmpty() ) { return false; } - final PhylogenyNode n11 = PhylogenyNode - .createInstanceFromNhxString( "BLAG_Mus_musculus", NHXParser.TAXONOMY_EXTRACTION.AGGRESSIVE ); - if ( !n11.getNodeData().getTaxonomy().getScientificName().equals( "Mus musculus" ) ) { - System.out.println( n11.toString() ); + final Phylogeny t2 = factory.create( "(((1,2,3)A,B,C)abc,(D,E,F)def)r", new NHXParser() )[ 0 ]; + t2.deleteSubtree( t2.getNode( "A" ), false ); + t2.toNewHampshireX(); + if ( t2.getNumberOfExternalNodes() != 5 ) { return false; } - final PhylogenyNode n12 = PhylogenyNode - .createInstanceFromNhxString( "BLAG_Mus_musculus_musculus", - NHXParser.TAXONOMY_EXTRACTION.AGGRESSIVE ); - if ( !n12.getNodeData().getTaxonomy().getScientificName().equals( "Mus musculus musculus" ) ) { - System.out.println( n12.toString() ); + t2.deleteSubtree( t2.getNode( "abc" ), false ); + t2.toNewHampshireX(); + if ( t2.getNumberOfExternalNodes() != 3 ) { return false; } - final PhylogenyNode n13 = PhylogenyNode - .createInstanceFromNhxString( "BLAG_Mus_musculus1", NHXParser.TAXONOMY_EXTRACTION.AGGRESSIVE ); - if ( n13.getNodeData().isHasTaxonomy() ) { - System.out.println( n13.toString() ); + t2.deleteSubtree( t2.getNode( "def" ), false ); + t2.toNewHampshireX(); + if ( t2.getNumberOfExternalNodes() != 1 ) { return false; } } @@ -11182,396 +12217,433 @@ public final class Test { return true; } - private static boolean testTreeMethods() { + private static boolean testSupportCount() { try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); - final Phylogeny t0 = factory.create( "((((A,B)ab,C)abc,D)abcd,E)", new NHXParser() )[ 0 ]; - PhylogenyMethods.collapseSubtreeStructure( t0.getNode( "abcd" ) ); - if ( !t0.toNewHampshireX().equals( "((A,B,C,D)abcd,E)" ) ) { - System.out.println( t0.toNewHampshireX() ); + final Phylogeny t0_1 = factory.create( "(((A,B),C),(D,E))", new NHXParser() )[ 0 ]; + final Phylogeny[] phylogenies_1 = factory.create( "(((A,B),C),(D,E)) " + "(((C,B),A),(D,E))" + + "(((A,B),C),(D,E)) " + "(((A,B),C),(D,E))" + + "(((A,B),C),(D,E))" + "(((C,B),A),(D,E))" + + "(((E,B),D),(C,A))" + "(((C,B),A),(D,E))" + + "(((A,B),C),(D,E))" + "(((A,B),C),(D,E))", + new NHXParser() ); + SupportCount.count( t0_1, phylogenies_1, true, false ); + final Phylogeny t0_2 = factory.create( "(((((A,B),C),D),E),(F,G))", new NHXParser() )[ 0 ]; + final Phylogeny[] phylogenies_2 = factory.create( "(((((A,B),C),D),E),(F,G))" + + "(((((A,B),C),D),E),((F,G),X))" + + "(((((A,Y),B),C),D),((F,G),E))" + + "(((((A,B),C),D),E),(F,G))" + + "(((((A,B),C),D),E),(F,G))" + + "(((((A,B),C),D),E),(F,G))" + + "(((((A,B),C),D),E),(F,G),Z)" + + "(((((A,B),C),D),E),(F,G))" + + "((((((A,B),C),D),E),F),G)" + + "(((((X,Y),F,G),E),((A,B),C)),D)", + new NHXParser() ); + SupportCount.count( t0_2, phylogenies_2, true, false ); + final PhylogenyNodeIterator it = t0_2.iteratorPostorder(); + while ( it.hasNext() ) { + final PhylogenyNode n = it.next(); + if ( !n.isExternal() && ( PhylogenyMethods.getConfidenceValue( n ) != 10 ) ) { + return false; + } + } + final Phylogeny t0_3 = factory.create( "(((A,B)ab,C)abc,((D,E)de,F)def)", new NHXParser() )[ 0 ]; + final Phylogeny[] phylogenies_3 = factory.create( "(((A,B),C),((D,E),F))" + "(((A,C),B),((D,F),E))" + + "(((C,A),B),((F,D),E))" + "(((A,B),F),((D,E),C))" + "(((((A,B),C),D),E),F)", new NHXParser() ); + SupportCount.count( t0_3, phylogenies_3, true, false ); + t0_3.reRoot( t0_3.getNode( "def" ).getId() ); + if ( PhylogenyMethods.getConfidenceValue( t0_3.getNode( "ab" ) ) != 3 ) { return false; } - final Phylogeny t1 = factory.create( "((((A:0.1,B)ab:0.2,C)abc:0.3,D)abcd:0.4,E)", new NHXParser() )[ 0 ]; - PhylogenyMethods.collapseSubtreeStructure( t1.getNode( "abcd" ) ); - if ( !isEqual( t1.getNode( "A" ).getDistanceToParent(), 0.6 ) ) { + if ( PhylogenyMethods.getConfidenceValue( t0_3.getNode( "abc" ) ) != 4 ) { return false; } - if ( !isEqual( t1.getNode( "B" ).getDistanceToParent(), 0.5 ) ) { + if ( PhylogenyMethods.getConfidenceValue( t0_3.getNode( "def" ) ) != 4 ) { return false; } - if ( !isEqual( t1.getNode( "C" ).getDistanceToParent(), 0.3 ) ) { + if ( PhylogenyMethods.getConfidenceValue( t0_3.getNode( "de" ) ) != 2 ) { return false; } - } - catch ( final Exception e ) { - e.printStackTrace( System.out ); - return false; - } - return true; - } - - private static boolean testSequenceDbWsTools1() { - try { - final PhylogenyNode n = new PhylogenyNode(); - n.setName( "NP_001025424" ); - Accession acc = SequenceDbWsTools.obtainSeqAccession( n ); - if ( ( acc == null ) || !acc.getSource().equals( Source.REFSEQ.toString() ) - || !acc.getValue().equals( "NP_001025424" ) ) { + if ( PhylogenyMethods.getConfidenceValue( t0_3.getNode( "A" ) ) != 5 ) { return false; } - n.setName( "340 0559 -- _NP_001025424_dsfdg15 05" ); - acc = SequenceDbWsTools.obtainSeqAccession( n ); - if ( ( acc == null ) || !acc.getSource().equals( Source.REFSEQ.toString() ) - || !acc.getValue().equals( "NP_001025424" ) ) { + if ( PhylogenyMethods.getConfidenceValue( t0_3.getNode( "B" ) ) != 5 ) { return false; } - n.setName( "NP_001025424.1" ); - acc = SequenceDbWsTools.obtainSeqAccession( n ); - if ( ( acc == null ) || !acc.getSource().equals( Source.REFSEQ.toString() ) - || !acc.getValue().equals( "NP_001025424" ) ) { + if ( PhylogenyMethods.getConfidenceValue( t0_3.getNode( "C" ) ) != 5 ) { return false; } - n.setName( "NM_001030253" ); - acc = SequenceDbWsTools.obtainSeqAccession( n ); - if ( ( acc == null ) || !acc.getSource().equals( Source.REFSEQ.toString() ) - || !acc.getValue().equals( "NM_001030253" ) ) { + if ( PhylogenyMethods.getConfidenceValue( t0_3.getNode( "D" ) ) != 5 ) { return false; } - n.setName( "BCL2_HUMAN" ); - acc = SequenceDbWsTools.obtainSeqAccession( n ); - if ( ( acc == null ) || !acc.getSource().equals( Source.UNIPROT.toString() ) - || !acc.getValue().equals( "BCL2_HUMAN" ) ) { - System.out.println( acc.toString() ); + if ( PhylogenyMethods.getConfidenceValue( t0_3.getNode( "E" ) ) != 5 ) { return false; } - n.setName( "P10415" ); - acc = SequenceDbWsTools.obtainSeqAccession( n ); - if ( ( acc == null ) || !acc.getSource().equals( Source.UNIPROT.toString() ) - || !acc.getValue().equals( "P10415" ) ) { - System.out.println( acc.toString() ); + if ( PhylogenyMethods.getConfidenceValue( t0_3.getNode( "F" ) ) != 5 ) { return false; } - n.setName( " P10415 " ); - acc = SequenceDbWsTools.obtainSeqAccession( n ); - if ( ( acc == null ) || !acc.getSource().equals( Source.UNIPROT.toString() ) - || !acc.getValue().equals( "P10415" ) ) { - System.out.println( acc.toString() ); + final Phylogeny t0_4 = factory.create( "(((((A,B)1,C)2,D)3,E)4,F)", new NHXParser() )[ 0 ]; + final Phylogeny[] phylogenies_4 = factory.create( "((((((A,X),C),B),D),E),F) " + + "(((A,B,Z),C,Q),(((D,Y),E),F))", new NHXParser() ); + SupportCount.count( t0_4, phylogenies_4, true, false ); + t0_4.reRoot( t0_4.getNode( "F" ).getId() ); + if ( PhylogenyMethods.getConfidenceValue( t0_4.getNode( "1" ) ) != 1 ) { return false; } - n.setName( "_P10415|" ); - acc = SequenceDbWsTools.obtainSeqAccession( n ); - if ( ( acc == null ) || !acc.getSource().equals( Source.UNIPROT.toString() ) - || !acc.getValue().equals( "P10415" ) ) { - System.out.println( acc.toString() ); + if ( PhylogenyMethods.getConfidenceValue( t0_4.getNode( "2" ) ) != 2 ) { return false; } - n.setName( "AY695820" ); - acc = SequenceDbWsTools.obtainSeqAccession( n ); - if ( ( acc == null ) || !acc.getSource().equals( Source.NCBI.toString() ) - || !acc.getValue().equals( "AY695820" ) ) { - System.out.println( acc.toString() ); + if ( PhylogenyMethods.getConfidenceValue( t0_4.getNode( "3" ) ) != 1 ) { return false; } - n.setName( "_AY695820_" ); - acc = SequenceDbWsTools.obtainSeqAccession( n ); - if ( ( acc == null ) || !acc.getSource().equals( Source.NCBI.toString() ) - || !acc.getValue().equals( "AY695820" ) ) { - System.out.println( acc.toString() ); + if ( PhylogenyMethods.getConfidenceValue( t0_4.getNode( "4" ) ) != 2 ) { return false; } - n.setName( "AAA59452" ); - acc = SequenceDbWsTools.obtainSeqAccession( n ); - if ( ( acc == null ) || !acc.getSource().equals( Source.NCBI.toString() ) - || !acc.getValue().equals( "AAA59452" ) ) { - System.out.println( acc.toString() ); + if ( PhylogenyMethods.getConfidenceValue( t0_4.getNode( "A" ) ) != 2 ) { return false; } - n.setName( "_AAA59452_" ); - acc = SequenceDbWsTools.obtainSeqAccession( n ); - if ( ( acc == null ) || !acc.getSource().equals( Source.NCBI.toString() ) - || !acc.getValue().equals( "AAA59452" ) ) { - System.out.println( acc.toString() ); + if ( PhylogenyMethods.getConfidenceValue( t0_4.getNode( "B" ) ) != 2 ) { return false; } - n.setName( "AAA59452.1" ); - acc = SequenceDbWsTools.obtainSeqAccession( n ); - if ( ( acc == null ) || !acc.getSource().equals( Source.NCBI.toString() ) - || !acc.getValue().equals( "AAA59452.1" ) ) { - System.out.println( acc.toString() ); + if ( PhylogenyMethods.getConfidenceValue( t0_4.getNode( "C" ) ) != 2 ) { return false; } - n.setName( "_AAA59452.1_" ); - acc = SequenceDbWsTools.obtainSeqAccession( n ); - if ( ( acc == null ) || !acc.getSource().equals( Source.NCBI.toString() ) - || !acc.getValue().equals( "AAA59452.1" ) ) { - System.out.println( acc.toString() ); + if ( PhylogenyMethods.getConfidenceValue( t0_4.getNode( "D" ) ) != 2 ) { return false; } - n.setName( "GI:94894583" ); - acc = SequenceDbWsTools.obtainSeqAccession( n ); - if ( ( acc == null ) || !acc.getSource().equals( Source.GI.toString() ) - || !acc.getValue().equals( "94894583" ) ) { - System.out.println( acc.toString() ); + if ( PhylogenyMethods.getConfidenceValue( t0_4.getNode( "E" ) ) != 2 ) { return false; } - n.setName( "gi|71845847|1,4-alpha-glucan branching enzyme [Dechloromonas aromatica RCB]" ); - acc = SequenceDbWsTools.obtainSeqAccession( n ); - if ( ( acc == null ) || !acc.getSource().equals( Source.GI.toString() ) - || !acc.getValue().equals( "71845847" ) ) { - System.out.println( acc.toString() ); + if ( PhylogenyMethods.getConfidenceValue( t0_4.getNode( "F" ) ) != 2 ) { return false; } - n.setName( "gi|71845847|gb|AAZ45343.1| 1,4-alpha-glucan branching enzyme [Dechloromonas aromatica RCB]" ); - acc = SequenceDbWsTools.obtainSeqAccession( n ); - if ( ( acc == null ) || !acc.getSource().equals( Source.NCBI.toString() ) - || !acc.getValue().equals( "AAZ45343.1" ) ) { - System.out.println( acc.toString() ); + Phylogeny a = factory.create( "(((((A,B)1,C)2,D)3,E)4,F)", new NHXParser() )[ 0 ]; + final Phylogeny b1 = factory.create( "(((((B,A)1,C)2,D)3,E)4,F)", new NHXParser() )[ 0 ]; + double d = SupportCount.compare( b1, a, true, true, true ); + if ( !Test.isEqual( d, 5.0 / 5.0 ) ) { + return false; + } + a = factory.create( "(((((A,B)1,C)2,D)3,E)4,F)", new NHXParser() )[ 0 ]; + final Phylogeny b2 = factory.create( "(((((C,B)1,A)2,D)3,E)4,F)", new NHXParser() )[ 0 ]; + d = SupportCount.compare( b2, a, true, true, true ); + if ( !Test.isEqual( d, 4.0 / 5.0 ) ) { + return false; + } + a = factory.create( "(((((A,B)1,C)2,D)3,E)4,F)", new NHXParser() )[ 0 ]; + final Phylogeny b3 = factory.create( "(((((F,C)1,A)2,B)3,D)4,E)", new NHXParser() )[ 0 ]; + d = SupportCount.compare( b3, a, true, true, true ); + if ( !Test.isEqual( d, 2.0 / 5.0 ) ) { + return false; + } + a = factory.create( "(((((A,B)1,C)2,D)3,E)4,F)r", new NHXParser() )[ 0 ]; + final Phylogeny b4 = factory.create( "(((((F,C)1,A)2,B)3,D)4,E)r", new NHXParser() )[ 0 ]; + d = SupportCount.compare( b4, a, true, true, false ); + if ( !Test.isEqual( d, 1.0 / 5.0 ) ) { return false; } } catch ( final Exception e ) { + e.printStackTrace( System.out ); return false; } return true; } - private static boolean testSequenceDbWsTools2() { + private static boolean testSupportTransfer() { try { - final PhylogenyNode n1 = new PhylogenyNode( "NP_001025424" ); - SequenceDbWsTools.obtainSeqInformation( n1 ); - if ( !n1.getNodeData().getSequence().getName().equals( "Bcl2" ) ) { - return false; - } - if ( !n1.getNodeData().getTaxonomy().getScientificName().equals( "Danio rerio" ) ) { - return false; - } - if ( !n1.getNodeData().getSequence().getAccession().getSource().equals( Source.REFSEQ.toString() ) ) { + final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); + final Phylogeny p1 = factory.create( "(((A,B)ab:97,C)abc:57,((D,E)de:10,(F,G)fg:50,(H,I)hi:64)defghi)", + new NHXParser() )[ 0 ]; + final Phylogeny p2 = factory + .create( "(((A:0.1,B:0.3)ab:0.4,C)abc:0.5,((D,E)de,(F,G)fg,(H,I)hi:0.59)defghi)", new NHXParser() )[ 0 ]; + if ( PhylogenyMethods.getConfidenceValue( p2.getNode( "ab" ) ) >= 0.0 ) { return false; } - if ( !n1.getNodeData().getSequence().getAccession().getValue().equals( "NP_001025424" ) ) { + if ( PhylogenyMethods.getConfidenceValue( p2.getNode( "abc" ) ) >= 0.0 ) { return false; } - final PhylogenyNode n2 = new PhylogenyNode( "NM_001030253" ); - SequenceDbWsTools.obtainSeqInformation( n2 ); - if ( !n2.getNodeData().getSequence().getName() - .equals( "Danio rerio B-cell leukemia/lymphoma 2 (bcl2), mRNA" ) ) { + support_transfer.moveBranchLengthsToBootstrap( p1 ); + support_transfer.transferSupportValues( p1, p2 ); + if ( p2.getNode( "ab" ).getDistanceToParent() != 0.4 ) { return false; } - if ( !n2.getNodeData().getTaxonomy().getScientificName().equals( "Danio rerio" ) ) { + if ( p2.getNode( "abc" ).getDistanceToParent() != 0.5 ) { return false; } - if ( !n2.getNodeData().getSequence().getAccession().getSource().equals( Source.REFSEQ.toString() ) ) { + if ( p2.getNode( "hi" ).getDistanceToParent() != 0.59 ) { return false; } - if ( !n2.getNodeData().getSequence().getAccession().getValue().equals( "NM_001030253" ) ) { + if ( PhylogenyMethods.getConfidenceValue( p2.getNode( "ab" ) ) != 97 ) { return false; } - final PhylogenyNode n3 = new PhylogenyNode( "NM_184234.2" ); - SequenceDbWsTools.obtainSeqInformation( n3 ); - if ( !n3.getNodeData().getSequence().getName() - .equals( "Homo sapiens RNA binding motif protein 39 (RBM39), transcript variant 1, mRNA" ) ) { + if ( PhylogenyMethods.getConfidenceValue( p2.getNode( "abc" ) ) != 57 ) { return false; } - if ( !n3.getNodeData().getTaxonomy().getScientificName().equals( "Homo sapiens" ) ) { + if ( PhylogenyMethods.getConfidenceValue( p2.getNode( "de" ) ) != 10 ) { return false; } - if ( !n3.getNodeData().getSequence().getAccession().getSource().equals( Source.REFSEQ.toString() ) ) { + if ( PhylogenyMethods.getConfidenceValue( p2.getNode( "fg" ) ) != 50 ) { return false; } - if ( !n3.getNodeData().getSequence().getAccession().getValue().equals( "NM_184234" ) ) { + if ( PhylogenyMethods.getConfidenceValue( p2.getNode( "hi" ) ) != 64 ) { return false; } } - catch ( final IOException e ) { - System.out.println(); - System.out.println( "the following might be due to absence internet connection:" ); - e.printStackTrace( System.out ); - return true; - } catch ( final Exception e ) { - e.printStackTrace(); + e.printStackTrace( System.out ); return false; } return true; } - private static boolean testEbiEntryRetrieval() { + private static boolean testTaxonomyExtraction() { try { - final SequenceDatabaseEntry entry = SequenceDbWsTools.obtainEntry( "AAK41263" ); - if ( !entry.getAccession().equals( "AAK41263" ) ) { - System.out.println( entry.getAccession() ); + final PhylogenyNode n0 = PhylogenyNode + .createInstanceFromNhxString( "sd_12345678", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); + if ( n0.getNodeData().isHasTaxonomy() ) { return false; } - if ( !entry.getTaxonomyScientificName().equals( "Sulfolobus solfataricus P2" ) ) { - System.out.println( entry.getTaxonomyScientificName() ); + final PhylogenyNode n1 = PhylogenyNode + .createInstanceFromNhxString( "sd_12345x", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); + if ( n1.getNodeData().isHasTaxonomy() ) { + System.out.println( n1.toString() ); return false; } - if ( !entry.getSequenceName() - .equals( "Sulfolobus solfataricus P2 Glycogen debranching enzyme, hypothetical (treX-like)" ) ) { - System.out.println( entry.getSequenceName() ); + final PhylogenyNode n2x = PhylogenyNode + .createInstanceFromNhxString( "12345", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); + if ( n2x.getNodeData().isHasTaxonomy() ) { return false; } - // if ( !entry.getSequenceSymbol().equals( "" ) ) { - // System.out.println( entry.getSequenceSymbol() ); - // return false; - // } - if ( !entry.getGeneName().equals( "treX-like" ) ) { - System.out.println( entry.getGeneName() ); + final PhylogenyNode n3 = PhylogenyNode + .createInstanceFromNhxString( "BLAG_12345", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); + if ( !n3.getNodeData().getTaxonomy().getIdentifier().getValue().equals( "12345" ) ) { + System.out.println( n3.toString() ); return false; } - if ( !entry.getTaxonomyIdentifier().equals( "273057" ) ) { - System.out.println( entry.getTaxonomyIdentifier() ); + final PhylogenyNode n4 = PhylogenyNode + .createInstanceFromNhxString( "blag-12345", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); + if ( n4.getNodeData().isHasTaxonomy() ) { + System.out.println( n4.toString() ); return false; } - if ( !entry.getAnnotations().first().getRefValue().equals( "3.2.1.33" ) ) { - System.out.println( entry.getAnnotations().first().getRefValue() ); + final PhylogenyNode n5 = PhylogenyNode + .createInstanceFromNhxString( "12345-blag", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); + if ( n5.getNodeData().isHasTaxonomy() ) { + System.out.println( n5.toString() ); return false; } - if ( !entry.getAnnotations().first().getRefSource().equals( "EC" ) ) { - System.out.println( entry.getAnnotations().first().getRefSource() ); + final PhylogenyNode n6 = PhylogenyNode + .createInstanceFromNhxString( "BLAG-12345-blag", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); + if ( n6.getNodeData().isHasTaxonomy() ) { + System.out.println( n6.toString() ); return false; } - if ( entry.getCrossReferences().size() != 5 ) { + final PhylogenyNode n7 = PhylogenyNode + .createInstanceFromNhxString( "BLAG-12345_blag", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); + if ( n7.getNodeData().isHasTaxonomy() ) { + System.out.println( n7.toString() ); return false; } - // - final SequenceDatabaseEntry entry1 = SequenceDbWsTools.obtainEntry( "ABJ16409" ); - if ( !entry1.getAccession().equals( "ABJ16409" ) ) { + final PhylogenyNode n8 = PhylogenyNode + .createInstanceFromNhxString( "BLAG_12345-blag", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); + if ( !n8.getNodeData().getTaxonomy().getIdentifier().getValue().equals( "12345" ) ) { + System.out.println( n8.toString() ); return false; } - if ( !entry1.getTaxonomyScientificName().equals( "Felis catus" ) ) { - System.out.println( entry1.getTaxonomyScientificName() ); + final PhylogenyNode n9 = PhylogenyNode + .createInstanceFromNhxString( "BLAG_12345/blag", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); + if ( !n9.getNodeData().getTaxonomy().getIdentifier().getValue().equals( "12345" ) ) { + System.out.println( n9.toString() ); return false; } - if ( !entry1.getSequenceName().equals( "Felis catus (domestic cat) partial BCL2" ) ) { - System.out.println( entry1.getSequenceName() ); + final PhylogenyNode n10x = PhylogenyNode + .createInstanceFromNhxString( "BLAG_12X45-blag", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); + if ( n10x.getNodeData().isHasTaxonomy() ) { + System.out.println( n10x.toString() ); return false; } - if ( !entry1.getTaxonomyIdentifier().equals( "9685" ) ) { - System.out.println( entry1.getTaxonomyIdentifier() ); + final PhylogenyNode n10xx = PhylogenyNode + .createInstanceFromNhxString( "BLAG_1YX45-blag", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); + if ( n10xx.getNodeData().isHasTaxonomy() ) { + System.out.println( n10xx.toString() ); return false; } - if ( !entry1.getGeneName().equals( "BCL2" ) ) { - System.out.println( entry1.getGeneName() ); + final PhylogenyNode n10 = PhylogenyNode + .createInstanceFromNhxString( "BLAG_9YX45-blag", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); + if ( !n10.getNodeData().getTaxonomy().getTaxonomyCode().equals( "9YX45" ) ) { + System.out.println( n10.toString() ); return false; } - if ( entry1.getCrossReferences().size() != 6 ) { + final PhylogenyNode n11 = PhylogenyNode + .createInstanceFromNhxString( "BLAG_Mus_musculus", NHXParser.TAXONOMY_EXTRACTION.AGGRESSIVE ); + if ( !n11.getNodeData().getTaxonomy().getScientificName().equals( "Mus musculus" ) ) { + System.out.println( n11.toString() ); return false; } - // - final SequenceDatabaseEntry entry2 = SequenceDbWsTools.obtainEntry( "NM_184234" ); - if ( !entry2.getAccession().equals( "NM_184234" ) ) { + final PhylogenyNode n12 = PhylogenyNode + .createInstanceFromNhxString( "BLAG_Mus_musculus_musculus", + NHXParser.TAXONOMY_EXTRACTION.AGGRESSIVE ); + if ( !n12.getNodeData().getTaxonomy().getScientificName().equals( "Mus musculus musculus" ) ) { + System.out.println( n12.toString() ); return false; } - if ( !entry2.getTaxonomyScientificName().equals( "Homo sapiens" ) ) { - System.out.println( entry2.getTaxonomyScientificName() ); + final PhylogenyNode n13 = PhylogenyNode + .createInstanceFromNhxString( "BLAG_Mus_musculus1", NHXParser.TAXONOMY_EXTRACTION.AGGRESSIVE ); + if ( n13.getNodeData().isHasTaxonomy() ) { + System.out.println( n13.toString() ); return false; } - if ( !entry2.getSequenceName() - .equals( "Homo sapiens RNA binding motif protein 39 (RBM39), transcript variant 1, mRNA" ) ) { - System.out.println( entry2.getSequenceName() ); + final PhylogenyNode n14 = PhylogenyNode + .createInstanceFromNhxString( "Mus_musculus_392", NHXParser.TAXONOMY_EXTRACTION.AGGRESSIVE ); + if ( !n14.getNodeData().getTaxonomy().getScientificName().equals( "Mus musculus" ) ) { + System.out.println( n14.toString() ); return false; } - if ( !entry2.getTaxonomyIdentifier().equals( "9606" ) ) { - System.out.println( entry2.getTaxonomyIdentifier() ); + final PhylogenyNode n15 = PhylogenyNode + .createInstanceFromNhxString( "Mus_musculus_K392", NHXParser.TAXONOMY_EXTRACTION.AGGRESSIVE ); + if ( !n15.getNodeData().getTaxonomy().getScientificName().equals( "Mus musculus" ) ) { + System.out.println( n15.toString() ); return false; } - if ( !entry2.getGeneName().equals( "RBM39" ) ) { - System.out.println( entry2.getGeneName() ); + final PhylogenyNode n16 = PhylogenyNode + .createInstanceFromNhxString( "Mus musculus 392", NHXParser.TAXONOMY_EXTRACTION.AGGRESSIVE ); + if ( !n16.getNodeData().getTaxonomy().getScientificName().equals( "Mus musculus" ) ) { + System.out.println( n16.toString() ); return false; } - if ( entry2.getCrossReferences().size() != 3 ) { + final PhylogenyNode n17 = PhylogenyNode + .createInstanceFromNhxString( "Mus musculus K392", NHXParser.TAXONOMY_EXTRACTION.AGGRESSIVE ); + if ( !n17.getNodeData().getTaxonomy().getScientificName().equals( "Mus musculus" ) ) { + System.out.println( n17.toString() ); return false; } - // - final SequenceDatabaseEntry entry3 = SequenceDbWsTools.obtainEntry( "HM043801" ); - if ( !entry3.getAccession().equals( "HM043801" ) ) { + final PhylogenyNode n18 = PhylogenyNode + .createInstanceFromNhxString( "Mus_musculus_musculus_392", NHXParser.TAXONOMY_EXTRACTION.AGGRESSIVE ); + if ( !n18.getNodeData().getTaxonomy().getScientificName().equals( "Mus musculus musculus" ) ) { + System.out.println( n18.toString() ); return false; } - if ( !entry3.getTaxonomyScientificName().equals( "Bursaphelenchus xylophilus" ) ) { - System.out.println( entry3.getTaxonomyScientificName() ); + final PhylogenyNode n19 = PhylogenyNode + .createInstanceFromNhxString( "Mus_musculus_musculus_K392", + NHXParser.TAXONOMY_EXTRACTION.AGGRESSIVE ); + if ( !n19.getNodeData().getTaxonomy().getScientificName().equals( "Mus musculus musculus" ) ) { + System.out.println( n19.toString() ); return false; } - if ( !entry3.getSequenceName().equals( "Bursaphelenchus xylophilus RAF gene, complete cds" ) ) { - System.out.println( entry3.getSequenceName() ); + final PhylogenyNode n20 = PhylogenyNode + .createInstanceFromNhxString( "Mus musculus musculus 392", NHXParser.TAXONOMY_EXTRACTION.AGGRESSIVE ); + if ( !n20.getNodeData().getTaxonomy().getScientificName().equals( "Mus musculus musculus" ) ) { + System.out.println( n20.toString() ); return false; } - if ( !entry3.getTaxonomyIdentifier().equals( "6326" ) ) { - System.out.println( entry3.getTaxonomyIdentifier() ); + final PhylogenyNode n21 = PhylogenyNode + .createInstanceFromNhxString( "Mus musculus musculus K392", + NHXParser.TAXONOMY_EXTRACTION.AGGRESSIVE ); + if ( !n21.getNodeData().getTaxonomy().getScientificName().equals( "Mus musculus musculus" ) ) { + System.out.println( n21.toString() ); return false; } - if ( !entry3.getSequenceSymbol().equals( "RAF" ) ) { - System.out.println( entry3.getSequenceSymbol() ); + final PhylogenyNode n23 = PhylogenyNode + .createInstanceFromNhxString( "9EMVE_Nematostella_vectensis", + NHXParser.TAXONOMY_EXTRACTION.AGGRESSIVE ); + if ( !n23.getNodeData().getTaxonomy().getScientificName().equals( "Nematostella vectensis" ) ) { + System.out.println( n23.toString() ); return false; } - if ( !ForesterUtil.isEmpty( entry3.getGeneName() ) ) { + final PhylogenyNode n24 = PhylogenyNode + .createInstanceFromNhxString( "9EMVE_Nematostella", NHXParser.TAXONOMY_EXTRACTION.AGGRESSIVE ); + if ( !n24.getNodeData().getTaxonomy().getTaxonomyCode().equals( "9EMVE" ) ) { + System.out.println( n24.toString() ); return false; } - if ( entry3.getCrossReferences().size() != 8 ) { + // + final PhylogenyNode n25 = PhylogenyNode + .createInstanceFromNhxString( "Nematostella_vectensis_NEMVE", + NHXParser.TAXONOMY_EXTRACTION.AGGRESSIVE ); + if ( !n25.getNodeData().getTaxonomy().getTaxonomyCode().equals( "NEMVE" ) ) { + System.out.println( n25.toString() ); return false; } - // - // - final SequenceDatabaseEntry entry4 = SequenceDbWsTools.obtainEntry( "AAA36557.1" ); - if ( !entry4.getAccession().equals( "AAA36557" ) ) { + final PhylogenyNode n26 = PhylogenyNode + .createInstanceFromNhxString( "Nematostella_vectensis_9EMVE", + NHXParser.TAXONOMY_EXTRACTION.AGGRESSIVE ); + if ( !n26.getNodeData().getTaxonomy().getScientificName().equals( "Nematostella vectensis" ) ) { + System.out.println( n26.toString() ); return false; } - if ( !entry4.getTaxonomyScientificName().equals( "Homo sapiens" ) ) { - System.out.println( entry4.getTaxonomyScientificName() ); + final PhylogenyNode n27 = PhylogenyNode + .createInstanceFromNhxString( "Nematostella_9EMVE", NHXParser.TAXONOMY_EXTRACTION.AGGRESSIVE ); + if ( !n27.getNodeData().getTaxonomy().getTaxonomyCode().equals( "9EMVE" ) ) { + System.out.println( n27.toString() ); return false; } - if ( !entry4.getSequenceName().equals( "Homo sapiens (human) ras protein" ) ) { - System.out.println( entry4.getSequenceName() ); + } + catch ( final Exception e ) { + e.printStackTrace( System.out ); + return false; + } + return true; + } + + private static boolean testTreeCopy() { + try { + final String str_0 = "((((a,b),c),d)[&&NHX:S=lizards],e[&&NHX:S=reptiles])r[&&NHX:S=animals]"; + final Phylogeny t0 = Phylogeny.createInstanceFromNhxString( str_0 ); + final Phylogeny t1 = t0.copy(); + if ( !t1.toNewHampshireX().equals( t0.toNewHampshireX() ) ) { return false; } - if ( !entry4.getTaxonomyIdentifier().equals( "9606" ) ) { - System.out.println( entry4.getTaxonomyIdentifier() ); + if ( !t1.toNewHampshireX().equals( str_0 ) ) { return false; } - if ( !entry4.getGeneName().equals( "ras" ) ) { - System.out.println( entry4.getGeneName() ); + t0.deleteSubtree( t0.getNode( "c" ), true ); + t0.deleteSubtree( t0.getNode( "a" ), true ); + t0.getRoot().getNodeData().getTaxonomy().setScientificName( "metazoa" ); + t0.getNode( "b" ).setName( "Bee" ); + if ( !t0.toNewHampshireX().equals( "((Bee,d)[&&NHX:S=lizards],e[&&NHX:S=reptiles])r[&&NHX:S=metazoa]" ) ) { return false; } - // if ( !entry4.getChromosome().equals( "ras" ) ) { - // System.out.println( entry4.getChromosome() ); - // return false; - // } - // if ( !entry4.getMap().equals( "ras" ) ) { - // System.out.println( entry4.getMap() ); - // return false; - // } - //TODO FIXME gi... - // - //TODO fails: - // final SequenceDatabaseEntry entry5 = SequenceDbWsTools.obtainEntry( "M30539" ); - // if ( !entry5.getAccession().equals( "HM043801" ) ) { - // return false; - // } - final SequenceDatabaseEntry entry5 = SequenceDbWsTools.obtainEntry( "AAZ45343.1" ); - if ( !entry5.getAccession().equals( "AAZ45343" ) ) { + if ( !t1.toNewHampshireX().equals( str_0 ) ) { return false; } - if ( !entry5.getTaxonomyScientificName().equals( "Dechloromonas aromatica RCB" ) ) { - System.out.println( entry5.getTaxonomyScientificName() ); + t0.deleteSubtree( t0.getNode( "e" ), true ); + t0.deleteSubtree( t0.getNode( "Bee" ), true ); + t0.deleteSubtree( t0.getNode( "d" ), true ); + if ( !t1.toNewHampshireX().equals( str_0 ) ) { return false; } - if ( !entry5.getSequenceName().equals( "Dechloromonas aromatica RCB 1,4-alpha-glucan branching enzyme" ) ) { - System.out.println( entry5.getSequenceName() ); + } + catch ( final Exception e ) { + e.printStackTrace(); + return false; + } + return true; + } + + private static boolean testTreeMethods() { + try { + final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); + final Phylogeny t0 = factory.create( "((((A,B)ab,C)abc,D)abcd,E)", new NHXParser() )[ 0 ]; + PhylogenyMethods.collapseSubtreeStructure( t0.getNode( "abcd" ) ); + if ( !t0.toNewHampshireX().equals( "((A,B,C,D)abcd,E)" ) ) { + System.out.println( t0.toNewHampshireX() ); return false; } - if ( !entry5.getTaxonomyIdentifier().equals( "159087" ) ) { - System.out.println( entry5.getTaxonomyIdentifier() ); + final Phylogeny t1 = factory.create( "((((A:0.1,B)ab:0.2,C)abc:0.3,D)abcd:0.4,E)", new NHXParser() )[ 0 ]; + PhylogenyMethods.collapseSubtreeStructure( t1.getNode( "abcd" ) ); + if ( !isEqual( t1.getNode( "A" ).getDistanceToParent(), 0.6 ) ) { + return false; + } + if ( !isEqual( t1.getNode( "B" ).getDistanceToParent(), 0.5 ) ) { + return false; + } + if ( !isEqual( t1.getNode( "C" ).getDistanceToParent(), 0.3 ) ) { return false; } - } - catch ( final IOException e ) { - System.out.println(); - System.out.println( "the following might be due to absence internet connection:" ); - e.printStackTrace( System.out ); - return true; } catch ( final Exception e ) { - e.printStackTrace(); + e.printStackTrace( System.out ); return false; } return true; @@ -11598,6 +12670,14 @@ public final class Test { if ( !entry.getTaxonomyIdentifier().equals( "9986" ) ) { return false; } + if ( !entry + .getMolecularSequence() + .getMolecularSequenceAsString() + .startsWith( "MALLHSARVLSGVASAFHPGLAAAASARASSWWAHVEMGPPDPILGVTEAYKRDTNSKKMNLGVGAYRDDNGKPYVLPSVRKAEAQIAAKGLDKEYLPIGGLAEFCRASAELALGENSEV" ) + || !entry.getMolecularSequence().getMolecularSequenceAsString().endsWith( "LAHAIHQVTK" ) ) { + System.out.println( entry.getMolecularSequence().getMolecularSequenceAsString() ); + return false; + } } catch ( final IOException e ) { System.out.println();