X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Ftest%2FTest.java;h=1a8a2165195b3d32e9df0712b7147f55e87ef1b3;hb=8298cc0323b6f4d4a905f31512f0b3adcc76f925;hp=d1b94ed6a0138e20c02b7be133853225351c79f9;hpb=7e2a839d55608212fed645ce9ffe3a3f4952fb17;p=jalview.git diff --git a/forester/java/src/org/forester/test/Test.java b/forester/java/src/org/forester/test/Test.java index d1b94ed..1a8a216 100644 --- a/forester/java/src/org/forester/test/Test.java +++ b/forester/java/src/org/forester/test/Test.java @@ -2,8 +2,8 @@ // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // -// Copyright (C) 2008-2009 Christian M. Zmasek -// Copyright (C) 2008-2009 Burnham Institute for Medical Research +// Copyright (C) 2014 Christian M. Zmasek +// Copyright (C) 2014 Sanford-Burnham Medical Research Institute // All rights reserved // // This library is free software; you can redistribute it and/or @@ -20,7 +20,6 @@ // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // -// Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.test; @@ -29,6 +28,9 @@ import java.io.ByteArrayInputStream; import java.io.File; import java.io.FileInputStream; import java.io.IOException; +import java.io.StringWriter; +import java.io.Writer; +import java.net.URL; import java.util.ArrayList; import java.util.Date; import java.util.HashSet; @@ -39,7 +41,10 @@ import java.util.Set; import java.util.SortedSet; import org.forester.application.support_transfer; +import org.forester.archaeopteryx.AptxUtil; import org.forester.archaeopteryx.TreePanelUtil; +import org.forester.archaeopteryx.webservices.WebserviceUtil; +import org.forester.clade_analysis.CladeAnalysisTest; import org.forester.development.DevelopmentTools; import org.forester.evoinference.TestPhylogenyReconstruction; import org.forester.evoinference.matrix.character.CharacterStateMatrix; @@ -60,8 +65,10 @@ import org.forester.io.parsers.util.ParserUtils; import org.forester.io.writers.PhylogenyWriter; import org.forester.io.writers.SequenceWriter; import org.forester.msa.BasicMsa; +import org.forester.msa.DeleteableMsa; import org.forester.msa.Mafft; import org.forester.msa.Msa; +import org.forester.msa.Msa.MSA_FORMAT; import org.forester.msa.MsaInferrer; import org.forester.msa.MsaMethods; import org.forester.pccx.TestPccx; @@ -71,6 +78,7 @@ import org.forester.phylogeny.PhylogenyMethods; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.PhylogenyNode.NH_CONVERSION_SUPPORT_VALUE_STYLE; import org.forester.phylogeny.data.Accession; +import org.forester.phylogeny.data.Accession.Source; import org.forester.phylogeny.data.BinaryCharacters; import org.forester.phylogeny.data.BranchWidth; import org.forester.phylogeny.data.Confidence; @@ -81,7 +89,7 @@ import org.forester.phylogeny.data.Identifier; import org.forester.phylogeny.data.PhylogenyData; import org.forester.phylogeny.data.PhylogenyDataUtil; import org.forester.phylogeny.data.Polygon; -import org.forester.phylogeny.data.PropertiesMap; +import org.forester.phylogeny.data.PropertiesList; import org.forester.phylogeny.data.Property; import org.forester.phylogeny.data.Property.AppliesTo; import org.forester.phylogeny.data.ProteinDomain; @@ -99,7 +107,7 @@ import org.forester.sdi.SDI; import org.forester.sdi.SDIR; import org.forester.sdi.TestGSDI; import org.forester.sequence.BasicSequence; -import org.forester.sequence.Sequence; +import org.forester.sequence.MolecularSequence; import org.forester.species.BasicSpecies; import org.forester.species.Species; import org.forester.surfacing.TestSurfacing; @@ -114,34 +122,28 @@ import org.forester.util.DescriptiveStatistics; import org.forester.util.ForesterConstants; import org.forester.util.ForesterUtil; import org.forester.util.GeneralTable; -import org.forester.util.SequenceIdParser; +import org.forester.util.SequenceAccessionTools; import org.forester.ws.seqdb.SequenceDatabaseEntry; import org.forester.ws.seqdb.SequenceDbWsTools; import org.forester.ws.seqdb.UniProtTaxonomy; -import org.forester.ws.wabi.TxSearch; -import org.forester.ws.wabi.TxSearch.RANKS; -import org.forester.ws.wabi.TxSearch.TAX_NAME_CLASS; -import org.forester.ws.wabi.TxSearch.TAX_RANK; @SuppressWarnings( "unused") public final class Test { - private final static double ZERO_DIFF = 1.0E-9; - private final static String PATH_TO_TEST_DATA = System.getProperty( "user.dir" ) - + ForesterUtil.getFileSeparator() + "test_data" - + ForesterUtil.getFileSeparator(); private final static String PATH_TO_RESOURCES = System.getProperty( "user.dir" ) - + ForesterUtil.getFileSeparator() + "resources" - + ForesterUtil.getFileSeparator(); - private final static boolean USE_LOCAL_PHYLOXML_SCHEMA = true; - private static final String PHYLOXML_REMOTE_XSD = ForesterConstants.PHYLO_XML_LOCATION + "/" - + ForesterConstants.PHYLO_XML_VERSION + "/" - + ForesterConstants.PHYLO_XML_XSD; + + ForesterUtil.getFileSeparator() + "resources" + ForesterUtil.getFileSeparator(); + private final static String PATH_TO_TEST_DATA = System.getProperty( "user.dir" ) + + ForesterUtil.getFileSeparator() + "test_data" + ForesterUtil.getFileSeparator(); + private final static boolean PERFORM_DB_TESTS = false; + private static final boolean PERFORM_WEB_TREE_ACCESS = false; private static final String PHYLOXML_LOCAL_XSD = PATH_TO_RESOURCES + "phyloxml_schema/" - + ForesterConstants.PHYLO_XML_VERSION + "/" - + ForesterConstants.PHYLO_XML_XSD; + + ForesterConstants.PHYLO_XML_VERSION + "/" + ForesterConstants.PHYLO_XML_XSD; + private static final String PHYLOXML_REMOTE_XSD = ForesterConstants.PHYLO_XML_LOCATION + "/" + + ForesterConstants.PHYLO_XML_VERSION + "/" + ForesterConstants.PHYLO_XML_XSD; + private final static boolean USE_LOCAL_PHYLOXML_SCHEMA = true; + private final static double ZERO_DIFF = 1.0E-9; - public static boolean isEqual( final double a, final double b ) { + private static boolean isEqual( final double a, final double b ) { return ( ( Math.abs( a - b ) ) < Test.ZERO_DIFF ); } @@ -172,6 +174,15 @@ public final class Test { System.exit( -1 ); } final long start_time = new Date().getTime(); + System.out.print( "Basic node methods: " ); + if ( Test.testBasicNodeMethods() ) { + System.out.println( "OK." ); + succeeded++; + } + else { + System.out.println( "failed." ); + failed++; + } System.out.print( "Protein id: " ); if ( !testProteinId() ) { System.out.println( "failed." ); @@ -208,6 +219,28 @@ public final class Test { succeeded++; } System.out.println( "OK." ); + + System.out.print( "Common prefix: " ); + if ( !testCommonPrefix() ) { + System.out.println( "failed." ); + failed++; + } + else { + succeeded++; + } + System.out.println( "OK." ); + + System.out.print( "Common prefix sep: " ); + if ( !testCommonPrefixSep() ) { + System.out.println( "failed." ); + failed++; + } + else { + succeeded++; + } + System.out.println( "OK." ); + + System.out.print( "Sequence writer: " ); if ( testSequenceWriter() ) { System.out.println( "OK." ); @@ -226,6 +259,24 @@ public final class Test { System.out.println( "failed." ); failed++; } + System.out.print( "UniProtKB id extraction: " ); + if ( Test.testExtractUniProtKbProteinSeqIdentifier() ) { + System.out.println( "OK." ); + succeeded++; + } + else { + System.out.println( "failed." ); + failed++; + } + System.out.print( "Sequence DB tools 1: " ); + if ( testSequenceDbWsTools1() ) { + System.out.println( "OK." ); + succeeded++; + } + else { + System.out.println( "failed." ); + failed++; + } System.out.print( "Hmmscan output parser: " ); if ( testHmmscanOutputParser() ) { System.out.println( "OK." ); @@ -235,8 +286,26 @@ public final class Test { System.out.println( "failed." ); failed++; } - System.out.print( "Basic node methods: " ); - if ( Test.testBasicNodeMethods() ) { + System.out.print( "Overlap removal: " ); + if ( !org.forester.test.Test.testOverlapRemoval() ) { + System.out.println( "failed." ); + failed++; + } + else { + succeeded++; + } + System.out.println( "OK." ); + System.out.print( "Engulfing overlap removal: " ); + if ( !Test.testEngulfingOverlapRemoval() ) { + System.out.println( "failed." ); + failed++; + } + else { + succeeded++; + } + System.out.println( "OK." ); + System.out.print( "Taxonomy data extraction: " ); + if ( Test.testExtractTaxonomyDataFromNodeName() ) { System.out.println( "OK." ); succeeded++; } @@ -271,8 +340,8 @@ public final class Test { System.out.println( "failed." ); failed++; } - System.out.print( "UniProtKB id extraction: " ); - if ( Test.testExtractUniProtKbProteinSeqIdentifier() ) { + System.out.print( "Uri for Aptx web sequence accession: " ); + if ( Test.testCreateUriForSeqWeb() ) { System.out.println( "OK." ); succeeded++; } @@ -280,8 +349,8 @@ public final class Test { System.out.println( "failed." ); failed++; } - System.out.print( "Uri for Aptx web sequence accession: " ); - if ( Test.testCreateUriForSeqWeb() ) { + System.out.print( "Basic node construction and parsing of NHX (node level): " ); + if ( Test.testNHXNodeParsing() ) { System.out.println( "OK." ); succeeded++; } @@ -289,8 +358,8 @@ public final class Test { System.out.println( "failed." ); failed++; } - System.out.print( "Basic node construction and parsing of NHX (node level): " ); - if ( Test.testNHXNodeParsing() ) { + System.out.print( "Node construction and parsing of NHX (node level): " ); + if ( Test.testNHXNodeParsing2() ) { System.out.println( "OK." ); succeeded++; } @@ -316,6 +385,15 @@ public final class Test { System.out.println( "failed." ); failed++; } + System.out.print( "NH parsing - special chars: " ); + if ( Test.testNHParsingSpecialChars() ) { + System.out.println( "OK." ); + succeeded++; + } + else { + System.out.println( "failed." ); + failed++; + } System.out.print( "Conversion to NHX (node level): " ); if ( Test.testNHXconversion() ) { System.out.println( "OK." ); @@ -415,6 +493,15 @@ public final class Test { System.out.println( "failed." ); failed++; } + System.out.print( "phyloXML parsing (validating against schema): " ); + if ( testPhyloXMLparsingValidating() ) { + System.out.println( "OK." ); + succeeded++; + } + else { + System.out.println( "failed." ); + failed++; + } System.out.print( "Roundtrip phyloXML parsing (validating against schema): " ); if ( Test.testBasicPhyloXMLparsingRoundtrip() ) { System.out.println( "OK." ); @@ -442,6 +529,15 @@ public final class Test { System.out.println( "failed." ); failed++; } + System.out.print( "UTF-8 parsing from file: " ); + if ( Test.testUTF8ParsingFromFile() ) { + System.out.println( "OK." ); + succeeded++; + } + else { + System.out.println( "failed." ); + failed++; + } System.out.print( "Copying of node data: " ); if ( Test.testCopyOfNodeData() ) { System.out.println( "OK." ); @@ -451,6 +547,15 @@ public final class Test { System.out.println( "failed." ); failed++; } + System.out.print( "Tree copy: " ); + if ( Test.testTreeCopy() ) { + System.out.println( "OK." ); + succeeded++; + } + else { + System.out.println( "failed." ); + failed++; + } System.out.print( "Basic tree methods: " ); if ( Test.testBasicTreeMethods() ) { System.out.println( "OK." ); @@ -469,6 +574,15 @@ public final class Test { System.out.println( "failed." ); failed++; } + System.out.print( "Phylogeny methods:" ); + if ( Test.testPhylogenyMethods() ) { + System.out.println( "OK." ); + succeeded++; + } + else { + System.out.println( "failed." ); + failed++; + } System.out.print( "Postorder Iterator: " ); if ( Test.testPostOrderIterator() ) { System.out.println( "OK." ); @@ -685,6 +799,15 @@ public final class Test { System.out.println( "failed." ); failed++; } + System.out.print( "Clade analyis: " ); + if ( CladeAnalysisTest.test() ) { + System.out.println( "OK." ); + succeeded++; + } + else { + System.out.println( "failed." ); + failed++; + } System.out.print( "Phylogeny reconstruction:" ); System.out.println(); if ( TestPhylogenyReconstruction.test( new File( PATH_TO_TEST_DATA ) ) ) { @@ -805,26 +928,8 @@ public final class Test { System.out.println( "failed." ); failed++; } - System.out.print( "EMBL Entry Retrieval: " ); - if ( Test.testEmblEntryRetrieval() ) { - System.out.println( "OK." ); - succeeded++; - } - else { - System.out.println( "failed." ); - failed++; - } - System.out.print( "Uniprot Entry Retrieval: " ); - if ( Test.testUniprotEntryRetrieval() ) { - System.out.println( "OK." ); - succeeded++; - } - else { - System.out.println( "failed." ); - failed++; - } - System.out.print( "Uniprot Taxonomy Search: " ); - if ( Test.testUniprotTaxonomySearch() ) { + System.out.print( "Genbank accessor parsing: " ); + if ( Test.testGenbankAccessorParsing() ) { System.out.println( "OK." ); succeeded++; } @@ -832,7 +937,6 @@ public final class Test { System.out.println( "failed." ); failed++; } - //---- String path = ""; final String os = ForesterUtil.OS_NAME.toLowerCase(); if ( ( os.indexOf( "mac" ) >= 0 ) && ( os.indexOf( "os" ) > 0 ) ) { @@ -842,13 +946,13 @@ public final class Test { path = "C:\\Program Files\\mafft-win\\mafft.bat"; } else { - path = "/home/czmasek/bin/mafft"; - } - if ( !MsaInferrer.isInstalled( path ) ) { path = "mafft"; - } - if ( !MsaInferrer.isInstalled( path ) ) { - path = "/usr/local/bin/mafft"; + if ( !MsaInferrer.isInstalled( path ) ) { + path = "/usr/bin/mafft"; + } + if ( !MsaInferrer.isInstalled( path ) ) { + path = "/usr/local/bin/mafft"; + } } if ( MsaInferrer.isInstalled( path ) ) { System.out.print( "MAFFT (external program): " ); @@ -860,7 +964,6 @@ public final class Test { System.out.println( "failed [will not count towards failed tests]" ); } } - //---- System.out.print( "Next nodes with collapsed: " ); if ( Test.testNextNodeWithCollapsing() ) { System.out.println( "OK." ); @@ -879,6 +982,128 @@ public final class Test { System.out.println( "failed." ); failed++; } + System.out.print( "Deleteable MSA: " ); + if ( Test.testDeleteableMsa() ) { + System.out.println( "OK." ); + succeeded++; + } + else { + System.out.println( "failed." ); + failed++; + } + System.out.print( "MSA entropy: " ); + if ( Test.testMsaEntropy() ) { + System.out.println( "OK." ); + succeeded++; + } + else { + System.out.println( "failed." ); + failed++; + } + if ( PERFORM_DB_TESTS ) { + System.out.print( "Uniprot Entry Retrieval: " ); + if ( Test.testUniprotEntryRetrieval() ) { + System.out.println( "OK." ); + succeeded++; + } + else { + System.out.println( "failed." ); + failed++; + } + System.out.print( "Ebi Entry Retrieval: " ); + if ( Test.testEbiEntryRetrieval() ) { + System.out.println( "OK." ); + succeeded++; + } + else { + System.out.println( "failed." ); + failed++; + } + System.out.print( "Sequence DB tools 2: " ); + if ( testSequenceDbWsTools2() ) { + System.out.println( "OK." ); + succeeded++; + } + else { + System.out.println( "failed." ); + failed++; + System.exit( -1 ); + } + System.out.print( "Uniprot Taxonomy Search: " ); + if ( Test.testUniprotTaxonomySearch() ) { + System.out.println( "OK." ); + succeeded++; + } + else { + System.out.println( "failed." ); + failed++; + } + } + if ( PERFORM_WEB_TREE_ACCESS ) { + System.out.print( "TreeBase acccess: " ); + if ( Test.testTreeBaseReading() ) { + System.out.println( "OK." ); + succeeded++; + } + else { + System.out.println( "failed." ); + failed++; + } + System.out.print( "ToL access: " ); + if ( Test.testToLReading() ) { + System.out.println( "OK." ); + succeeded++; + } + else { + System.out.println( "failed." ); + failed++; + } + System.out.print( "NHX parsing from URL: " ); + if ( Test.testNHXparsingFromURL() ) { + System.out.println( "OK." ); + succeeded++; + } + else { + System.out.println( "failed." ); + failed++; + } + System.out.print( "NHX parsing from URL 2: " ); + if ( Test.testNHXparsingFromURL2() ) { + System.out.println( "OK." ); + succeeded++; + } + else { + System.out.println( "failed." ); + failed++; + } + System.out.print( "phyloXML parsing from URL: " ); + if ( Test.testPhyloXMLparsingFromURL() ) { + System.out.println( "OK." ); + succeeded++; + } + else { + System.out.println( "failed." ); + failed++; + } + System.out.print( "TreeFam access: " ); + if ( Test.testTreeFamReading() ) { + System.out.println( "OK." ); + succeeded++; + } + else { + System.out.println( "failed." ); + failed++; + } + System.out.print( "Pfam tree access: " ); + if ( Test.testPfamTreeReading() ) { + System.out.println( "OK." ); + succeeded++; + } + else { + System.out.println( "failed." ); + failed++; + } + } System.out.println(); final Runtime rt = java.lang.Runtime.getRuntime(); final long free_memory = rt.freeMemory() / 1000000; @@ -897,139 +1122,1143 @@ public final class Test { } } - private final static Phylogeny createPhylogeny( final String nhx ) throws IOException { - final Phylogeny p = ParserBasedPhylogenyFactory.getInstance().create( nhx, new NHXParser() )[ 0 ]; - return p; - } - - private final static Event getEvent( final Phylogeny p, final String n1, final String n2 ) { - return PhylogenyMethods.calculateLCA( p.getNode( n1 ), p.getNode( n2 ) ).getNodeData().getEvent(); - } - - private static boolean testAminoAcidSequence() { + + private static boolean testEngulfingOverlapRemoval() { try { - final Sequence aa1 = BasicSequence.createAaSequence( "aa1", "aAklm-?xX*z$#" ); - if ( aa1.getLength() != 13 ) { - return false; - } - if ( aa1.getResidueAt( 0 ) != 'A' ) { + final Domain d0 = new BasicDomain( "d0", 0, 8, ( short ) 1, ( short ) 1, 0.1, 1 ); + final Domain d1 = new BasicDomain( "d1", 0, 1, ( short ) 1, ( short ) 1, 0.1, 1 ); + final Domain d2 = new BasicDomain( "d2", 0, 2, ( short ) 1, ( short ) 1, 0.1, 1 ); + final Domain d3 = new BasicDomain( "d3", 7, 8, ( short ) 1, ( short ) 1, 0.1, 1 ); + final Domain d4 = new BasicDomain( "d4", 7, 9, ( short ) 1, ( short ) 1, 0.1, 1 ); + final Domain d5 = new BasicDomain( "d4", 0, 9, ( short ) 1, ( short ) 1, 0.1, 1 ); + final Domain d6 = new BasicDomain( "d4", 4, 5, ( short ) 1, ( short ) 1, 0.1, 1 ); + final List covered = new ArrayList(); + covered.add( true ); // 0 + covered.add( false ); // 1 + covered.add( true ); // 2 + covered.add( false ); // 3 + covered.add( true ); // 4 + covered.add( true ); // 5 + covered.add( false ); // 6 + covered.add( true ); // 7 + covered.add( true ); // 8 + if ( ForesterUtil.isEngulfed( d0, covered ) ) { return false; } - if ( aa1.getResidueAt( 2 ) != 'K' ) { - return false; - } - if ( !new String( aa1.getMolecularSequence() ).equals( "AAKLM-XXX*ZXX" ) ) { + if ( ForesterUtil.isEngulfed( d1, covered ) ) { return false; } - final Sequence aa2 = BasicSequence.createAaSequence( "aa3", "ARNDCQEGHILKMFPSTWYVX*-BZOJU" ); - if ( !new String( aa2.getMolecularSequence() ).equals( "ARNDCQEGHILKMFPSTWYVX*-BZXXU" ) ) { + if ( ForesterUtil.isEngulfed( d2, covered ) ) { return false; } - final Sequence dna1 = BasicSequence.createDnaSequence( "dna1", "ACGTUX*-?RYMKWSN" ); - if ( !new String( dna1.getMolecularSequence() ).equals( "ACGTNN*-NRYMKWSN" ) ) { + if ( !ForesterUtil.isEngulfed( d3, covered ) ) { return false; } - final Sequence rna1 = BasicSequence.createRnaSequence( "rna1", "..ACGUTX*-?RYMKWSN" ); - if ( !new String( rna1.getMolecularSequence() ).equals( "--ACGUNN*-NRYMKWSN" ) ) { + if ( ForesterUtil.isEngulfed( d4, covered ) ) { return false; } - } - catch ( final Exception e ) { - e.printStackTrace(); - return false; - } - return true; - } - - private static boolean testBasicDomain() { - try { - final Domain pd = new BasicDomain( "id", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); - if ( !pd.getDomainId().equals( "id" ) ) { + if ( ForesterUtil.isEngulfed( d5, covered ) ) { return false; } - if ( pd.getNumber() != 1 ) { + if ( !ForesterUtil.isEngulfed( d6, covered ) ) { return false; } - if ( pd.getTotalCount() != 4 ) { + final Domain a = new BasicDomain( "a", 0, 10, ( short ) 1, ( short ) 1, 0.1, 1 ); + final Domain b = new BasicDomain( "b", 8, 20, ( short ) 1, ( short ) 1, 0.2, 1 ); + final Domain c = new BasicDomain( "c", 15, 16, ( short ) 1, ( short ) 1, 0.3, 1 ); + final Protein abc = new BasicProtein( "abc", "nemve", 0 ); + abc.addProteinDomain( a ); + abc.addProteinDomain( b ); + abc.addProteinDomain( c ); + final Protein abc_r1 = ForesterUtil.removeOverlappingDomains( 3, false, abc ); + final Protein abc_r2 = ForesterUtil.removeOverlappingDomains( 3, true, abc ); + if ( abc.getNumberOfProteinDomains() != 3 ) { return false; } - if ( !pd.equals( new BasicDomain( "id", 22, 111, ( short ) 1, ( short ) 4, 0.2, -12 ) ) ) { + if ( abc_r1.getNumberOfProteinDomains() != 3 ) { return false; } - final Domain a1 = new BasicDomain( "a", 1, 10, ( short ) 1, ( short ) 4, 0.1, -12 ); - final BasicDomain a1_copy = new BasicDomain( "a", 1, 10, ( short ) 1, ( short ) 4, 0.1, -12 ); - final BasicDomain a1_equal = new BasicDomain( "a", 524, 743994, ( short ) 1, ( short ) 300, 3.0005, 230 ); - final BasicDomain a2 = new BasicDomain( "a", 1, 10, ( short ) 2, ( short ) 4, 0.1, -12 ); - final BasicDomain a3 = new BasicDomain( "A", 1, 10, ( short ) 1, ( short ) 4, 0.1, -12 ); - if ( !a1.equals( a1 ) ) { + if ( abc_r2.getNumberOfProteinDomains() != 2 ) { return false; } - if ( !a1.equals( a1_copy ) ) { + if ( !abc_r2.getProteinDomain( 0 ).getDomainId().equals( "a" ) ) { return false; } - if ( !a1.equals( a1_equal ) ) { + if ( !abc_r2.getProteinDomain( 1 ).getDomainId().equals( "b" ) ) { return false; } - if ( !a1.equals( a2 ) ) { + final Domain d = new BasicDomain( "d", 0, 10, ( short ) 1, ( short ) 1, 0.1, 1 ); + final Domain e = new BasicDomain( "e", 8, 20, ( short ) 1, ( short ) 1, 0.3, 1 ); + final Domain f = new BasicDomain( "f", 15, 16, ( short ) 1, ( short ) 1, 0.2, 1 ); + final Protein def = new BasicProtein( "def", "nemve", 0 ); + def.addProteinDomain( d ); + def.addProteinDomain( e ); + def.addProteinDomain( f ); + final Protein def_r1 = ForesterUtil.removeOverlappingDomains( 5, false, def ); + final Protein def_r2 = ForesterUtil.removeOverlappingDomains( 5, true, def ); + if ( def.getNumberOfProteinDomains() != 3 ) { return false; } - if ( a1.equals( a3 ) ) { + if ( def_r1.getNumberOfProteinDomains() != 3 ) { return false; } - if ( a1.compareTo( a1 ) != 0 ) { + if ( def_r2.getNumberOfProteinDomains() != 3 ) { return false; } - if ( a1.compareTo( a1_copy ) != 0 ) { + if ( !def_r2.getProteinDomain( 0 ).getDomainId().equals( "d" ) ) { return false; } - if ( a1.compareTo( a1_equal ) != 0 ) { + if ( !def_r2.getProteinDomain( 1 ).getDomainId().equals( "f" ) ) { return false; } - if ( a1.compareTo( a2 ) != 0 ) { + if ( !def_r2.getProteinDomain( 2 ).getDomainId().equals( "e" ) ) { return false; } - if ( a1.compareTo( a3 ) == 0 ) { + } + catch ( final Exception e ) { + e.printStackTrace( System.out ); + return false; + } + return true; + } + + private static final boolean testNHXparsingFromURL2() { + try { + final String s = "https://sites.google.com/site/cmzmasek/home/software/archaeopteryx/examples/simple/simple_1.nh"; + final Phylogeny phys[] = AptxUtil + .readPhylogeniesFromUrl( new URL( s ), false, false, false, TAXONOMY_EXTRACTION.NO, false ); + if ( ( phys == null ) || ( phys.length != 5 ) ) { + return false; + } + if ( !phys[ 0 ].toNewHampshire().equals( "((((A,B),C),D),(E,F));" ) ) { + System.out.println( phys[ 0 ].toNewHampshire() ); + return false; + } + if ( !phys[ 1 ].toNewHampshire().equals( "((1,2,3),(4,5,6),(7,8,9));" ) ) { + System.out.println( phys[ 1 ].toNewHampshire() ); + return false; + } + final Phylogeny phys2[] = AptxUtil + .readPhylogeniesFromUrl( new URL( s ), false, false, false, TAXONOMY_EXTRACTION.NO, false ); + if ( ( phys2 == null ) || ( phys2.length != 5 ) ) { + return false; + } + if ( !phys2[ 0 ].toNewHampshire().equals( "((((A,B),C),D),(E,F));" ) ) { + System.out.println( phys2[ 0 ].toNewHampshire() ); + return false; + } + if ( !phys2[ 1 ].toNewHampshire().equals( "((1,2,3),(4,5,6),(7,8,9));" ) ) { + System.out.println( phys2[ 1 ].toNewHampshire() ); + return false; + } + final Phylogeny phys3[] = AptxUtil.readPhylogeniesFromUrl( new URL( "http://swisstree.vital-it.ch:80/" + + "SwissTree/ST001/consensus_tree.nhx" ), + false, + false, + false, + TAXONOMY_EXTRACTION.NO, + false ); + if ( ( phys3 == null ) || ( phys3.length != 1 ) ) { + return false; + } + if ( !phys3[ 0 ].toNewHampshire() + .equals( "((((POP23a_CIOIN_ENSCING00000016202,POP23b_CIOIN_ENSCING00000016169),POP23_CIOSA_ENSCSAVG00000000248),((POP23a_BRAFL_C3ZMF1,POP23b_BRAFL_121417),(((POP3_ORYLA_ENSORLG00000019669,POP3_GASAC_ENSGACG00000014023,POP3_DANRE_Q6JWW1),(POP3_XENTR_B1H1F6,(POP3_CHICK_Q9DG25,(POP3_ORNAN_ENSOANG00000004179,POP3_MONDO_ENSMODG00000018033,((POP3_MOUSE_Q9ES81,POP3_RAT_Q3BCU3),POP3_RABIT_ENSOCUG00000025973,POP3_MACMU_ENSMMUG00000014473,POP3_HUMAN_Q9HBV1))))),(((POP2_GASAC_ENSGACG00000001420,POP2_ORYLA_ENSORLG00000008627,POP2_TAKRU_ENSTRUG00000015933),POP2_DANRE_ENSDARG00000069922),POP2_XENTR_ENSXETG00000018064,(((POP2_TAEGU_ENSTGUG00000013383,POP2_CHICK_Q6T9Z5),POP2_ANOCA_ENSACAG00000003557),((POP2_MACEU_ENSMEUG00000015825,POP2_MONDO_ENSMODG00000018205),((POP2_RABIT_ENSOCUG00000009515,(POP2_RAT_Q6P722,POP2_MOUSE_Q9ES82)),(POP2_MACMU_ENSMMUG00000000905,POP2_HUMAN_Q9HBU9)))))))),((POP1_CIOSA_ENSCSAVG00000000247,POP1_CIOIN_ENSCING00000000496),((POP1_DANRE_Q5PQZ7,(POP1_ORYLA_ENSORLG00000019663,POP1_GASAC_ENSGACG00000014015,POP1_TAKRU_ENSORLG00000019663)),(POP1_XENTR_B1H1G2,(POP1_ANOCA_ENSACAG00000003910,(POP1_TAEGU_ENSTGUG00000012218,POP1_CHICK_Q9DG23)),POP1_ORNAN_ENSOANG00000004180,POP1_MONDO_ENSMODG00000018034,(POP1_RABIT_ENSOCUG00000016944,(POP1_RAT_Q3BCU4,POP1_MOUSE_Q9ES83),(POP1_HUMAN_Q8NE79,POP1_MACMU_ENSMMUG00000014471))))));" ) ) { + System.out.println( phys3[ 0 ].toNewHampshire() ); + return false; + } + final Phylogeny phys4[] = AptxUtil.readPhylogeniesFromUrl( new URL( "http://swisstree.vital-it.ch:80/" + + "SwissTree/ST001/consensus_tree.nhx" ), + false, + false, + false, + TAXONOMY_EXTRACTION.NO, + false ); + if ( ( phys4 == null ) || ( phys4.length != 1 ) ) { + return false; + } + if ( !phys4[ 0 ].toNewHampshire() + .equals( "((((POP23a_CIOIN_ENSCING00000016202,POP23b_CIOIN_ENSCING00000016169),POP23_CIOSA_ENSCSAVG00000000248),((POP23a_BRAFL_C3ZMF1,POP23b_BRAFL_121417),(((POP3_ORYLA_ENSORLG00000019669,POP3_GASAC_ENSGACG00000014023,POP3_DANRE_Q6JWW1),(POP3_XENTR_B1H1F6,(POP3_CHICK_Q9DG25,(POP3_ORNAN_ENSOANG00000004179,POP3_MONDO_ENSMODG00000018033,((POP3_MOUSE_Q9ES81,POP3_RAT_Q3BCU3),POP3_RABIT_ENSOCUG00000025973,POP3_MACMU_ENSMMUG00000014473,POP3_HUMAN_Q9HBV1))))),(((POP2_GASAC_ENSGACG00000001420,POP2_ORYLA_ENSORLG00000008627,POP2_TAKRU_ENSTRUG00000015933),POP2_DANRE_ENSDARG00000069922),POP2_XENTR_ENSXETG00000018064,(((POP2_TAEGU_ENSTGUG00000013383,POP2_CHICK_Q6T9Z5),POP2_ANOCA_ENSACAG00000003557),((POP2_MACEU_ENSMEUG00000015825,POP2_MONDO_ENSMODG00000018205),((POP2_RABIT_ENSOCUG00000009515,(POP2_RAT_Q6P722,POP2_MOUSE_Q9ES82)),(POP2_MACMU_ENSMMUG00000000905,POP2_HUMAN_Q9HBU9)))))))),((POP1_CIOSA_ENSCSAVG00000000247,POP1_CIOIN_ENSCING00000000496),((POP1_DANRE_Q5PQZ7,(POP1_ORYLA_ENSORLG00000019663,POP1_GASAC_ENSGACG00000014015,POP1_TAKRU_ENSORLG00000019663)),(POP1_XENTR_B1H1G2,(POP1_ANOCA_ENSACAG00000003910,(POP1_TAEGU_ENSTGUG00000012218,POP1_CHICK_Q9DG23)),POP1_ORNAN_ENSOANG00000004180,POP1_MONDO_ENSMODG00000018034,(POP1_RABIT_ENSOCUG00000016944,(POP1_RAT_Q3BCU4,POP1_MOUSE_Q9ES83),(POP1_HUMAN_Q8NE79,POP1_MACMU_ENSMMUG00000014471))))));" ) ) { + System.out.println( phys4[ 0 ].toNewHampshire() ); + return false; + } + } + catch ( final Exception e ) { + e.printStackTrace(); + return false; + } + return true; + } + + private static final boolean testNHXparsingFromURL() { + try { + final String s = "https://sites.google.com/site/cmzmasek/home/software/archaeopteryx/examples/simple/simple_1.nh"; + final URL u = new URL( s ); + final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); + final Phylogeny[] phys = factory.create( u, new NHXParser() ); + if ( ( phys == null ) || ( phys.length != 5 ) ) { + return false; + } + if ( !phys[ 0 ].toNewHampshire().equals( "((((A,B),C),D),(E,F));" ) ) { + System.out.println( phys[ 0 ].toNewHampshire() ); + return false; + } + if ( !phys[ 1 ].toNewHampshire().equals( "((1,2,3),(4,5,6),(7,8,9));" ) ) { + System.out.println( phys[ 1 ].toNewHampshire() ); + return false; + } + final URL u2 = new URL( s ); + final Phylogeny[] phys2 = factory.create( u2.openStream(), new NHXParser() ); + if ( ( phys2 == null ) || ( phys2.length != 5 ) ) { + return false; + } + if ( !phys2[ 0 ].toNewHampshire().equals( "((((A,B),C),D),(E,F));" ) ) { + System.out.println( phys2[ 0 ].toNewHampshire() ); + return false; + } + final PhylogenyFactory factory2 = ParserBasedPhylogenyFactory.getInstance(); + final NHXParser p = new NHXParser(); + final URL u3 = new URL( s ); + p.setSource( u3 ); + if ( !p.hasNext() ) { + return false; + } + if ( !p.next().toNewHampshire().equals( "((((A,B),C),D),(E,F));" ) ) { + return false; + } + if ( !p.hasNext() ) { + return false; + } + p.reset(); + if ( !p.hasNext() ) { + return false; + } + if ( !p.next().toNewHampshire().equals( "((((A,B),C),D),(E,F));" ) ) { + return false; + } + if ( !p.next().toNewHampshire().equals( "((1,2,3),(4,5,6),(7,8,9));" ) ) { + return false; + } + p.reset(); + if ( !p.hasNext() ) { + return false; + } + if ( !p.next().toNewHampshire().equals( "((((A,B),C),D),(E,F));" ) ) { + return false; + } + if ( !p.next().toNewHampshire().equals( "((1,2,3),(4,5,6),(7,8,9));" ) ) { + return false; + } + } + catch ( final Exception e ) { + System.out.println( e.toString() ); + e.printStackTrace(); + return false; + } + return true; + } + + private static boolean testOverlapRemoval() { + try { + final Domain d0 = new BasicDomain( "d0", ( short ) 2, ( short ) 5, ( short ) 1, ( short ) 1, 0.1, 1 ); + final Domain d1 = new BasicDomain( "d1", ( short ) 7, ( short ) 10, ( short ) 1, ( short ) 1, 0.1, 1 ); + final Domain d2 = new BasicDomain( "d2", ( short ) 0, ( short ) 20, ( short ) 1, ( short ) 1, 0.1, 1 ); + final Domain d3 = new BasicDomain( "d3", ( short ) 9, ( short ) 10, ( short ) 1, ( short ) 1, 0.1, 1 ); + final Domain d4 = new BasicDomain( "d4", ( short ) 7, ( short ) 8, ( short ) 1, ( short ) 1, 0.1, 1 ); + final List covered = new ArrayList(); + covered.add( true ); // 0 + covered.add( false ); // 1 + covered.add( true ); // 2 + covered.add( false ); // 3 + covered.add( true ); // 4 + covered.add( true ); // 5 + covered.add( false ); // 6 + covered.add( true ); // 7 + covered.add( true ); // 8 + if ( ForesterUtil.calculateOverlap( d0, covered ) != 3 ) { + return false; + } + if ( ForesterUtil.calculateOverlap( d1, covered ) != 2 ) { + return false; + } + if ( ForesterUtil.calculateOverlap( d2, covered ) != 6 ) { + return false; + } + if ( ForesterUtil.calculateOverlap( d3, covered ) != 0 ) { + return false; + } + if ( ForesterUtil.calculateOverlap( d4, covered ) != 2 ) { + return false; + } + final Domain a = new BasicDomain( "a", ( short ) 2, ( short ) 5, ( short ) 1, ( short ) 1, 1, -1 ); + final Domain b = new BasicDomain( "b", ( short ) 2, ( short ) 10, ( short ) 1, ( short ) 1, 0.1, -1 ); + final Protein ab = new BasicProtein( "ab", "varanus", 0 ); + ab.addProteinDomain( a ); + ab.addProteinDomain( b ); + final Protein ab_s0 = ForesterUtil.removeOverlappingDomains( 3, false, ab ); + if ( ab.getNumberOfProteinDomains() != 2 ) { + return false; + } + if ( ab_s0.getNumberOfProteinDomains() != 1 ) { + return false; + } + if ( !ab_s0.getProteinDomain( 0 ).getDomainId().equals( "b" ) ) { + return false; + } + final Protein ab_s1 = ForesterUtil.removeOverlappingDomains( 4, false, ab ); + if ( ab.getNumberOfProteinDomains() != 2 ) { + return false; + } + if ( ab_s1.getNumberOfProteinDomains() != 2 ) { + return false; + } + final Domain c = new BasicDomain( "c", ( short ) 20000, ( short ) 20500, ( short ) 1, ( short ) 1, 10, 1 ); + final Domain d = new BasicDomain( "d", + ( short ) 10000, + ( short ) 10500, + ( short ) 1, + ( short ) 1, + 0.0000001, + 1 ); + final Domain e = new BasicDomain( "e", + ( short ) 5000, + ( short ) 5500, + ( short ) 1, + ( short ) 1, + 0.0001, + 1 ); + final Protein cde = new BasicProtein( "cde", "varanus", 0 ); + cde.addProteinDomain( c ); + cde.addProteinDomain( d ); + cde.addProteinDomain( e ); + final Protein cde_s0 = ForesterUtil.removeOverlappingDomains( 0, false, cde ); + if ( cde.getNumberOfProteinDomains() != 3 ) { + return false; + } + if ( cde_s0.getNumberOfProteinDomains() != 3 ) { + return false; + } + final Domain f = new BasicDomain( "f", ( short ) 10, ( short ) 20, ( short ) 1, ( short ) 1, 10, 1 ); + final Domain g = new BasicDomain( "g", ( short ) 10, ( short ) 20, ( short ) 1, ( short ) 1, 0.01, 1 ); + final Domain h = new BasicDomain( "h", ( short ) 10, ( short ) 20, ( short ) 1, ( short ) 1, 0.0001, 1 ); + final Domain i = new BasicDomain( "i", ( short ) 10, ( short ) 20, ( short ) 1, ( short ) 1, 0.5, 1 ); + final Domain i2 = new BasicDomain( "i", ( short ) 5, ( short ) 30, ( short ) 1, ( short ) 1, 0.5, 10 ); + final Protein fghi = new BasicProtein( "fghi", "varanus", 0 ); + fghi.addProteinDomain( f ); + fghi.addProteinDomain( g ); + fghi.addProteinDomain( h ); + fghi.addProteinDomain( i ); + fghi.addProteinDomain( i ); + fghi.addProteinDomain( i ); + fghi.addProteinDomain( i2 ); + final Protein fghi_s0 = ForesterUtil.removeOverlappingDomains( 10, false, fghi ); + if ( fghi.getNumberOfProteinDomains() != 7 ) { + return false; + } + if ( fghi_s0.getNumberOfProteinDomains() != 1 ) { + return false; + } + if ( !fghi_s0.getProteinDomain( 0 ).getDomainId().equals( "h" ) ) { + return false; + } + final Protein fghi_s1 = ForesterUtil.removeOverlappingDomains( 11, false, fghi ); + if ( fghi.getNumberOfProteinDomains() != 7 ) { + return false; + } + if ( fghi_s1.getNumberOfProteinDomains() != 7 ) { + return false; + } + final Domain j = new BasicDomain( "j", ( short ) 10, ( short ) 20, ( short ) 1, ( short ) 1, 10, 1 ); + final Domain k = new BasicDomain( "k", ( short ) 10, ( short ) 20, ( short ) 1, ( short ) 1, 0.01, 1 ); + final Domain l = new BasicDomain( "l", ( short ) 10, ( short ) 20, ( short ) 1, ( short ) 1, 0.0001, 1 ); + final Domain m = new BasicDomain( "m", ( short ) 10, ( short ) 20, ( short ) 1, ( short ) 4, 0.5, 1 ); + final Domain m0 = new BasicDomain( "m", ( short ) 10, ( short ) 20, ( short ) 2, ( short ) 4, 0.5, 1 ); + final Domain m1 = new BasicDomain( "m", ( short ) 10, ( short ) 20, ( short ) 3, ( short ) 4, 0.5, 1 ); + final Domain m2 = new BasicDomain( "m", ( short ) 5, ( short ) 30, ( short ) 4, ( short ) 4, 0.5, 10 ); + final Protein jklm = new BasicProtein( "jklm", "varanus", 0 ); + jklm.addProteinDomain( j ); + jklm.addProteinDomain( k ); + jklm.addProteinDomain( l ); + jklm.addProteinDomain( m ); + jklm.addProteinDomain( m0 ); + jklm.addProteinDomain( m1 ); + jklm.addProteinDomain( m2 ); + final Protein jklm_s0 = ForesterUtil.removeOverlappingDomains( 10, false, jklm ); + if ( jklm.getNumberOfProteinDomains() != 7 ) { + return false; + } + if ( jklm_s0.getNumberOfProteinDomains() != 1 ) { + return false; + } + if ( !jklm_s0.getProteinDomain( 0 ).getDomainId().equals( "l" ) ) { + return false; + } + final Protein jklm_s1 = ForesterUtil.removeOverlappingDomains( 11, false, jklm ); + if ( jklm.getNumberOfProteinDomains() != 7 ) { + return false; + } + if ( jklm_s1.getNumberOfProteinDomains() != 7 ) { + return false; + } + final Domain only = new BasicDomain( "only", ( short ) 5, ( short ) 30, ( short ) 4, ( short ) 4, 0.5, 10 ); + final Protein od = new BasicProtein( "od", "varanus", 0 ); + od.addProteinDomain( only ); + final Protein od_s0 = ForesterUtil.removeOverlappingDomains( 0, false, od ); + if ( od.getNumberOfProteinDomains() != 1 ) { + return false; + } + if ( od_s0.getNumberOfProteinDomains() != 1 ) { + return false; + } + } + catch ( final Exception e ) { + e.printStackTrace( System.out ); + return false; + } + return true; + } + + private static final boolean testPfamTreeReading() { + try { + final URL u = new URL( WebserviceUtil.PFAM_SERVER + "/family/PF" + "01849" + "/tree/download" ); + final NHXParser parser = new NHXParser(); + parser.setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ); + parser.setReplaceUnderscores( false ); + parser.setGuessRootedness( true ); + final Phylogeny[] phys = ForesterUtil.readPhylogeniesFromUrl( u, parser ); + if ( ( phys == null ) || ( phys.length != 1 ) ) { + return false; + } + if ( phys[ 0 ].getNumberOfExternalNodes() < 10 ) { + return false; + } + } + catch ( final Exception e ) { + e.printStackTrace(); + return false; + } + return true; + } + + private static final boolean testPhyloXMLparsingFromURL() { + try { + final String s = "https://sites.google.com/site/cmzmasek/home/software/archaeopteryx/examples/archaeopteryx_a/apaf_bcl2.xml"; + final URL u = new URL( s ); + final Phylogeny[] phys = ForesterUtil.readPhylogeniesFromUrl( u, PhyloXmlParser.createPhyloXmlParser() ); + if ( ( phys == null ) || ( phys.length != 2 ) ) { + return false; + } + final Phylogeny[] phys2 = ForesterUtil.readPhylogeniesFromUrl( u, PhyloXmlParser.createPhyloXmlParser() ); + if ( ( phys2 == null ) || ( phys2.length != 2 ) ) { + return false; + } + } + catch ( final Exception e ) { + e.printStackTrace(); + return false; + } + return true; + } + + private static final boolean testToLReading() { + try { + final URL u = new URL( WebserviceUtil.TOL_URL_BASE + "15079" ); + final Phylogeny[] phys = ForesterUtil.readPhylogeniesFromUrl( u, new TolParser() ); + if ( ( phys == null ) || ( phys.length != 1 ) ) { + return false; + } + if ( !phys[ 0 ].getRoot().getNodeData().getTaxonomy().getIdentifier().getValue().equals( "15079" ) ) { + return false; + } + if ( !phys[ 0 ].getRoot().getNodeData().getTaxonomy().getScientificName() + .equals( "Protacanthopterygii" ) ) { + return false; + } + if ( phys[ 0 ].getNumberOfExternalNodes() < 5 ) { return false; } + // + final URL u2 = new URL( WebserviceUtil.TOL_URL_BASE + "17706" ); + final Phylogeny[] phys2 = ForesterUtil.readPhylogeniesFromUrl( u2, new TolParser() ); + if ( ( phys2 == null ) || ( phys2.length != 1 ) ) { + return false; + } + if ( !phys2[ 0 ].getRoot().getNodeData().getTaxonomy().getIdentifier().getValue().equals( "17706" ) ) { + return false; + } + if ( phys2[ 0 ].getNumberOfExternalNodes() < 5 ) { + return false; + } + } + catch ( final Exception e ) { + e.printStackTrace(); + return false; + } + return true; + } + + private static final boolean testTreeBaseReading() { + try { + final URL u = new URL( WebserviceUtil.TREEBASE_PHYLOWS_TREE_URL_BASE + "72557?format=nexus" ); + final NexusPhylogeniesParser parser = new NexusPhylogeniesParser(); + parser.setReplaceUnderscores( true ); + final Phylogeny[] phys = ForesterUtil.readPhylogeniesFromUrl( u, parser ); + if ( ( phys == null ) || ( phys.length != 1 ) ) { + return false; + } + final URL u_1 = new URL( WebserviceUtil.TREEBASE_PHYLOWS_TREE_URL_BASE + "2406?format=nexus" ); + final NexusPhylogeniesParser parser_1 = new NexusPhylogeniesParser(); + final Phylogeny[] phys_1 = ForesterUtil.readPhylogeniesFromUrl( u_1, parser_1 ); + if ( ( phys_1 == null ) || ( phys_1.length != 1 ) ) { + return false; + } + final URL u_2 = new URL( WebserviceUtil.TREEBASE_PHYLOWS_TREE_URL_BASE + "422?format=nexus" ); + final NexusPhylogeniesParser parser_2 = new NexusPhylogeniesParser(); + final Phylogeny[] phys_2 = ForesterUtil.readPhylogeniesFromUrl( u_2, parser_2 ); + if ( ( phys_2 == null ) || ( phys_2.length != 1 ) ) { + return false; + } + final URL u_3 = new URL( WebserviceUtil.TREEBASE_PHYLOWS_TREE_URL_BASE + "2654?format=nexus" ); + final NexusPhylogeniesParser parser_3 = new NexusPhylogeniesParser(); + final Phylogeny[] phys_3 = ForesterUtil.readPhylogeniesFromUrl( u_3, parser_3 ); + if ( ( phys_3 == null ) || ( phys_3.length != 1 ) ) { + return false; + } + final URL u_4 = new URL( WebserviceUtil.TREEBASE_PHYLOWS_TREE_URL_BASE + "825?format=nexus" ); + final NexusPhylogeniesParser parser_4 = new NexusPhylogeniesParser(); + final Phylogeny[] phys_4 = ForesterUtil.readPhylogeniesFromUrl( u_4, parser_4 ); + if ( ( phys_4 == null ) || ( phys_4.length != 1 ) ) { + return false; + } + final URL u2 = new URL( WebserviceUtil.TREEBASE_PHYLOWS_STUDY_URL_BASE + "15613?format=nexus" ); + final NexusPhylogeniesParser parser2 = new NexusPhylogeniesParser(); + parser2.setReplaceUnderscores( true ); + final Phylogeny[] phys2 = ForesterUtil.readPhylogeniesFromUrl( u2, parser2 ); + if ( ( phys2 == null ) || ( phys2.length != 9 ) ) { + return false; + } + final URL u3 = new URL( WebserviceUtil.TREEBASE_PHYLOWS_STUDY_URL_BASE + "14909?format=nexus" ); + final NexusPhylogeniesParser parser3 = new NexusPhylogeniesParser(); + final Phylogeny[] phys3 = ForesterUtil.readPhylogeniesFromUrl( u3, parser3 ); + if ( ( phys3 == null ) || ( phys3.length != 2 ) ) { + return false; + } + final Phylogeny[] phys4 = ForesterUtil.readPhylogeniesFromUrl( + new URL( WebserviceUtil.TREEBASE_PHYLOWS_STUDY_URL_BASE + + "14525?format=nexus" ), + new NexusPhylogeniesParser() ); + if ( ( phys4 == null ) || ( phys4.length != 1 ) ) { + return false; + } + final Phylogeny[] phys5 = ForesterUtil.readPhylogeniesFromUrl( + new URL( WebserviceUtil.TREEBASE_PHYLOWS_STUDY_URL_BASE + + "15632?format=nexus" ), + new NexusPhylogeniesParser() ); + if ( ( phys5 == null ) || ( phys5.length != 1 ) ) { + return false; + } + final Phylogeny[] phys6 = ForesterUtil.readPhylogeniesFromUrl( + new URL( WebserviceUtil.TREEBASE_PHYLOWS_STUDY_URL_BASE + + "10190?format=nexus" ), + new NexusPhylogeniesParser() ); + if ( ( phys6 == null ) || ( phys6.length != 1 ) ) { + return false; + } + final Phylogeny[] phys7 = ForesterUtil.readPhylogeniesFromUrl( + new URL( WebserviceUtil.TREEBASE_PHYLOWS_STUDY_URL_BASE + + "13246?format=nexus" ), + new NexusPhylogeniesParser() ); + if ( ( phys7 == null ) || ( phys7.length != 2 ) ) { + return false; + } + final Phylogeny[] phys8 = ForesterUtil.readPhylogeniesFromUrl( + new URL( WebserviceUtil.TREEBASE_PHYLOWS_STUDY_URL_BASE + + "11662?format=nexus" ), + new NexusPhylogeniesParser() ); + if ( ( phys8 == null ) || ( phys8.length != 2 ) ) { + return false; + } + final Phylogeny[] phys9 = ForesterUtil.readPhylogeniesFromUrl( + new URL( WebserviceUtil.TREEBASE_PHYLOWS_STUDY_URL_BASE + + "562?format=nexus" ), + new NexusPhylogeniesParser() ); + if ( ( phys9 == null ) || ( phys9.length != 4 ) ) { + return false; + } + final Phylogeny[] phys16424 = ForesterUtil.readPhylogeniesFromUrl( + new URL( WebserviceUtil.TREEBASE_PHYLOWS_STUDY_URL_BASE + + "16424?format=nexus" ), + new NexusPhylogeniesParser() ); + if ( ( phys16424 == null ) || ( phys16424.length != 1 ) ) { + return false; + } + final Phylogeny[] phys17878 = ForesterUtil.readPhylogeniesFromUrl( + new URL( WebserviceUtil.TREEBASE_PHYLOWS_STUDY_URL_BASE + + "17878?format=nexus" ), + new NexusPhylogeniesParser() ); + if ( ( phys17878 == null ) || ( phys17878.length != 17 ) ) { + return false; + } + final Phylogeny[] phys18804 = ForesterUtil.readPhylogeniesFromUrl( + new URL( WebserviceUtil.TREEBASE_PHYLOWS_STUDY_URL_BASE + + "18804?format=nexus" ), + new NexusPhylogeniesParser() ); + if ( ( phys18804 == null ) || ( phys18804.length != 2 ) ) { + return false; + } + final Phylogeny[] phys346 = ForesterUtil.readPhylogeniesFromUrl( + new URL( WebserviceUtil.TREEBASE_PHYLOWS_STUDY_URL_BASE + + "346?format=nexus" ), + new NexusPhylogeniesParser() ); + if ( ( phys346 == null ) || ( phys346.length != 1 ) ) { + return false; + } + } + catch ( final Exception e ) { + e.printStackTrace(); + return false; + } + return true; + } + + private static final boolean testTreeFamReading() { + try { + final URL u = new URL( WebserviceUtil.TREE_FAM_URL_BASE + "101004" + "/tree/newick" ); + final NHXParser parser = new NHXParser(); + parser.setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.NO ); + parser.setReplaceUnderscores( false ); + parser.setGuessRootedness( true ); + final Phylogeny[] phys = ForesterUtil.readPhylogeniesFromUrl( u, parser ); + if ( ( phys == null ) || ( phys.length != 1 ) ) { + return false; + } + if ( phys[ 0 ].getNumberOfExternalNodes() < 10 ) { + return false; + } + } + catch ( final Exception e ) { + e.printStackTrace(); + return false; + } + return true; + } + + private final static Phylogeny createPhylogeny( final String nhx ) throws IOException { + final Phylogeny p = ParserBasedPhylogenyFactory.getInstance().create( nhx, new NHXParser() )[ 0 ]; + return p; + } + + private final static Event getEvent( final Phylogeny p, final String n1, final String n2 ) { + return PhylogenyMethods.calculateLCA( p.getNode( n1 ), p.getNode( n2 ) ).getNodeData().getEvent(); + } + + private static boolean testAminoAcidSequence() { + try { + final MolecularSequence aa1 = BasicSequence.createAaSequence( "aa1", "aAklm-?xX*z$#" ); + if ( aa1.getLength() != 13 ) { + return false; + } + if ( aa1.getResidueAt( 0 ) != 'A' ) { + return false; + } + if ( aa1.getResidueAt( 2 ) != 'K' ) { + return false; + } + if ( !new String( aa1.getMolecularSequence() ).equals( "AAKLM-XXX*ZXX" ) ) { + return false; + } + final MolecularSequence aa2 = BasicSequence.createAaSequence( "aa3", "ARNDCQEGHILKMFPSTWYVX*-BZOJU" ); + if ( !new String( aa2.getMolecularSequence() ).equals( "ARNDCQEGHILKMFPSTWYVX*-BZOXU" ) ) { + return false; + } + final MolecularSequence dna1 = BasicSequence.createDnaSequence( "dna1", "ACGTUX*-?RYMKWSN" ); + if ( !new String( dna1.getMolecularSequence() ).equals( "ACGTNN*-NRYMKWSN" ) ) { + return false; + } + final MolecularSequence rna1 = BasicSequence.createRnaSequence( "rna1", "..ACGUTX*-?RYMKWSN" ); + if ( !new String( rna1.getMolecularSequence() ).equals( "--ACGUNN*-NRYMKWSN" ) ) { + return false; + } + } + catch ( final Exception e ) { + e.printStackTrace(); + return false; + } + return true; + } + + private static boolean testBasicDomain() { + try { + final Domain pd = new BasicDomain( "id", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); + if ( !pd.getDomainId().equals( "id" ) ) { + return false; + } + if ( pd.getNumber() != 1 ) { + return false; + } + if ( pd.getTotalCount() != 4 ) { + return false; + } + if ( !pd.equals( new BasicDomain( "id", 22, 111, ( short ) 1, ( short ) 4, 0.2, -12 ) ) ) { + return false; + } + final Domain a1 = new BasicDomain( "a", 1, 10, ( short ) 1, ( short ) 4, 0.1, -12 ); + final BasicDomain a1_copy = new BasicDomain( "a", 1, 10, ( short ) 1, ( short ) 4, 0.1, -12 ); + final BasicDomain a1_equal = new BasicDomain( "a", 524, 743994, ( short ) 1, ( short ) 300, 3.0005, 230 ); + final BasicDomain a2 = new BasicDomain( "a", 1, 10, ( short ) 2, ( short ) 4, 0.1, -12 ); + final BasicDomain a3 = new BasicDomain( "A", 1, 10, ( short ) 1, ( short ) 4, 0.1, -12 ); + if ( !a1.equals( a1 ) ) { + return false; + } + if ( !a1.equals( a1_copy ) ) { + return false; + } + if ( !a1.equals( a1_equal ) ) { + return false; + } + if ( !a1.equals( a2 ) ) { + return false; + } + if ( a1.equals( a3 ) ) { + return false; + } + if ( a1.compareTo( a1 ) != 0 ) { + return false; + } + if ( a1.compareTo( a1_copy ) != 0 ) { + return false; + } + if ( a1.compareTo( a1_equal ) != 0 ) { + return false; + } + if ( a1.compareTo( a2 ) != 0 ) { + return false; + } + if ( a1.compareTo( a3 ) == 0 ) { + return false; + } + } + catch ( final Exception e ) { + e.printStackTrace( System.out ); + return false; + } + return true; + } + + private static boolean testBasicNodeMethods() { + try { + if ( PhylogenyNode.getNodeCount() != 0 ) { + return false; + } + final PhylogenyNode n1 = new PhylogenyNode(); + final PhylogenyNode n2 = PhylogenyNode + .createInstanceFromNhxString( "", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ); + final PhylogenyNode n3 = PhylogenyNode + .createInstanceFromNhxString( "n3", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ); + final PhylogenyNode n4 = PhylogenyNode + .createInstanceFromNhxString( "n4:0.01", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ); + if ( n1.isHasAssignedEvent() ) { + return false; + } + if ( PhylogenyNode.getNodeCount() != 4 ) { + return false; + } + if ( n3.getIndicator() != 0 ) { + return false; + } + if ( n3.getNumberOfExternalNodes() != 1 ) { + return false; + } + if ( !n3.isExternal() ) { + return false; + } + if ( !n3.isRoot() ) { + return false; + } + if ( !n4.getName().equals( "n4" ) ) { + return false; + } + } + catch ( final Exception e ) { + e.printStackTrace( System.out ); + return false; + } + return true; + } + + + private static boolean testCommonPrefix() { + final List l0 = new ArrayList(); + l0.add( "abc" ); + if ( !ForesterUtil.greatestCommonPrefix( l0 ).equals( "abc" ) ) { + return false; + } + + final List l1 = new ArrayList(); + l1.add( "abc" ); + l1.add( "abX" ); + if ( !ForesterUtil.greatestCommonPrefix( l1 ).equals( "ab" ) ) { + return false; + } + + final List l2 = new ArrayList(); + l2.add( "abc" ); + l2.add( "abX" ); + l2.add( "axy" ); + if ( !ForesterUtil.greatestCommonPrefix( l2 ).equals( "a" ) ) { + return false; + } + + final List l3 = new ArrayList(); + l3.add( "abXsdfsdfsdfsdfsdfsd" ); + l3.add( "abXsdfsdfsdfsdfsdfsd" ); + l3.add( "abc" ); + l3.add( "abXsdfsdfsdfsdfsdfsd" ); + l3.add( "ab" ); + l3.add( "abc" ); + l3.add( "ab" ); + if ( !ForesterUtil.greatestCommonPrefix( l3 ).equals( "ab" ) ) { + return false; + } + + final List l4 = new ArrayList(); + l4.add( "abXsdfsdfsdfsdfsdfsd" ); + l4.add( "abXsdfsdfsdfsdfsdfsd" ); + l4.add( "abc" ); + l4.add( "Xsdfsdfsdfsdfsdfsd" ); + l4.add( "ab" ); + l4.add( "abc" ); + if ( !ForesterUtil.greatestCommonPrefix( l4 ).equals( "" ) ) { + return false; } - catch ( final Exception e ) { - e.printStackTrace( System.out ); + + final List l5 = new ArrayList(); + l5.add( "" ); + if ( !ForesterUtil.greatestCommonPrefix( l5 ).equals( "" ) ) { + return false; + } + + final List l6 = new ArrayList(); + l6.add( "abc" ); + l6.add( "abX" ); + l6.add( "" ); + if ( !ForesterUtil.greatestCommonPrefix( l6 ).equals( "" ) ) { + return false; + } + return true; + } + + private static boolean testCommonPrefixSep() { + final List l0 = new ArrayList(); + l0.add( "a.b.c" ); + if ( !ForesterUtil.greatestCommonPrefix( l0, ".").equals( "a.b.c" ) ) { + return false; + } + + final List l1 = new ArrayList(); + l1.add( "a.b.c" ); + l1.add( "a.b.X" ); + if ( !ForesterUtil.greatestCommonPrefix( l1 , ".").equals( "a.b" ) ) { + return false; + } + + final List l2 = new ArrayList(); + l2.add( "a.b.c." ); + l2.add( "a.b.X." ); + l2.add( "a.x.y." ); + if ( !ForesterUtil.greatestCommonPrefix( l2, ".").equals( "a" ) ) { + return false; + } + + final List l3 = new ArrayList(); + l3.add( "a/b/X/s/d/f/s/d/f/s/d/f/s/d/f/s/d/f/s/d/" ); + l3.add( "a/b/X/s/d/f/s/d/f/s/d/f/s/d/f/s/d/f/s/d" ); + l3.add( "a/b/c" ); + l3.add( "a/b/X/s/d/f/s/d/f/s/d/f/s/d/f/s/d/f/s/d/" ); + l3.add( "a/b/" ); + l3.add( "a/b/c/" ); + l3.add( "a/b////////" ); + if ( !ForesterUtil.greatestCommonPrefix( l3, "/" ).equals( "a/b" ) ) { + return false; + } + + final List l4 = new ArrayList(); + l4.add( "a.b.X.s.d.f.s.d.f.s.d.f.s.d.f.s.d.f.s.d" ); + l4.add( "a.b.X.s.d.f.s.d.f.s.d.f.s.d.f.s.d.f.s.d" ); + l4.add( "a.b.c" ); + l4.add( "X.s.d.f.s.d.f.s.d.f.s.d.f.s.d.f.s.d..." ); + l4.add( "a.b" ); + l4.add( "a.b.c" ); + if ( !ForesterUtil.greatestCommonPrefix( l4, "." ).equals( "" ) ) { + return false; + } + + final List l5 = new ArrayList(); + l5.add( "" ); + if ( !ForesterUtil.greatestCommonPrefix( l5, "_" ).equals( "" ) ) { + return false; + } + + final List l6 = new ArrayList(); + l6.add( "_" ); + l6.add( "__" ); + if ( !ForesterUtil.greatestCommonPrefix( l6, "_" ).equals( "" ) ) { + return false; + } + + final List l7 = new ArrayList(); + l7.add( "a,b,c" ); + l7.add( "a,b,X" ); + l7.add( "" ); + l7.add( ",,,,,,,,,," ); + if ( !ForesterUtil.greatestCommonPrefix( l7, "," ).equals( "" ) ) { + return false; + } + + final List l8 = new ArrayList(); + l8.add( "123.304.403.04" ); + l8.add( "123.304.403.04.02" ); + l8.add( "123.304.403.03.03" ); + if ( !ForesterUtil.greatestCommonPrefix( l8, "." ).equals( "123.304.403" ) ) { + return false; + } + + final List l9 = new ArrayList(); + l9.add( "123.304.403.04" ); + l9.add( "123.304.403.04.02" ); + l9.add( "123.304.402.03.03" ); + if ( !ForesterUtil.greatestCommonPrefix( l9, "." ).equals( "123.304" ) ) { return false; } return true; } - private static boolean testBasicNodeMethods() { + + private static boolean testUTF8ParsingFromFile() { try { - if ( PhylogenyNode.getNodeCount() != 0 ) { + final PhyloXmlParser xml_parser = PhyloXmlParser.createPhyloXmlParser(); + final Phylogeny[] phylogenies_xml = ParserBasedPhylogenyFactory.getInstance() + .create( new File( Test.PATH_TO_TEST_DATA + "chars.xml" ), xml_parser ); + if ( xml_parser.getErrorCount() > 0 ) { + System.out.println( xml_parser.getErrorMessages().toString() ); return false; } - final PhylogenyNode n1 = new PhylogenyNode(); - final PhylogenyNode n2 = PhylogenyNode - .createInstanceFromNhxString( "", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ); - final PhylogenyNode n3 = PhylogenyNode - .createInstanceFromNhxString( "n3", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ); - final PhylogenyNode n4 = PhylogenyNode - .createInstanceFromNhxString( "n4:0.01", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ); - if ( n1.isHasAssignedEvent() ) { + if ( phylogenies_xml.length != 1 ) { return false; } - if ( PhylogenyNode.getNodeCount() != 4 ) { + final Phylogeny[] phylogenies_xml2 = ParserBasedPhylogenyFactory.getInstance() + .create( new StringBuffer( phylogenies_xml[ 0 ].toPhyloXML( 0 ) ), xml_parser ); + final Phylogeny[] phylogenies_nh = ParserBasedPhylogenyFactory.getInstance() + .create( new File( Test.PATH_TO_TEST_DATA + "chars.nh" ), new NHXParser() ); + if ( phylogenies_nh.length != 1 ) { return false; } - if ( n3.getIndicator() != 0 ) { + final Phylogeny[] phylogenies_nex = ParserBasedPhylogenyFactory.getInstance() + .create( new File( Test.PATH_TO_TEST_DATA + "chars.nex" ), new NexusPhylogeniesParser() ); + if ( phylogenies_nex.length != 1 ) { return false; } - if ( n3.getNumberOfExternalNodes() != 1 ) { + final String[] xml_n = phylogenies_xml[ 0 ].getAllExternalNodeNames(); + final String[] xml_n2 = phylogenies_xml2[ 0 ].getAllExternalNodeNames(); + final String[] nh_n = phylogenies_nh[ 0 ].getAllExternalNodeNames(); + final String[] nex_n = phylogenies_nex[ 0 ].getAllExternalNodeNames(); + final String n0 = "AQ~!@#$%^&*()_+-=\\{}|;:\"<>?,./"; + final String n1 = "€‚ƒ„…†‡ˆ‰Š‹ŒŽ‘’“”•–—˜˜˜™š›œžŸ¡¢£¤¥¦§¨©ª«¬®¯°±¹²³´µ¶·¸º»¼¿À÷þÿ"; + final String n2 = "漢字ひらがなカタカナ"; + final String n3 = "อักษรไทย"; + final String n4 = "繁體字"; + final String n5 = "한글"; + final String n6 = "देवनागरी"; + final String n7 = "chữ Quốc ngữ"; + final String n8 = "ру́сский язы́к"; + final String n9 = "អក្សរខ្មែរ"; + if ( !xml_n[ 0 ].equals( n0 ) ) { + System.out.println( xml_n[ 0 ] ); + System.out.println( n0 ); return false; } - if ( !n3.isExternal() ) { + if ( !xml_n2[ 0 ].equals( n0 ) ) { + System.out.println( xml_n2[ 0 ] ); + System.out.println( n0 ); return false; } - if ( !n3.isRoot() ) { + if ( !nh_n[ 0 ].equals( n0 ) ) { + System.out.println( nh_n[ 0 ] ); + System.out.println( n0 ); return false; } - if ( !n4.getName().equals( "n4" ) ) { + if ( !nex_n[ 0 ].equals( n0 ) ) { + System.out.println( nex_n[ 0 ] ); + System.out.println( n0 ); + return false; + } + if ( !xml_n[ 1 ].equals( n1 ) ) { + System.out.println( xml_n[ 1 ] ); + System.out.println( n1 ); + return false; + } + if ( !xml_n2[ 1 ].equals( n1 ) ) { + System.out.println( xml_n2[ 1 ] ); + System.out.println( n1 ); + return false; + } + if ( !nh_n[ 1 ].equals( n1 ) ) { + System.out.println( nh_n[ 1 ] ); + System.out.println( n1 ); + return false; + } + if ( !nex_n[ 1 ].equals( n1 ) ) { + System.out.println( nex_n[ 1 ] ); + System.out.println( n1 ); + return false; + } + if ( !xml_n[ 2 ].equals( n2 ) ) { + System.out.println( xml_n[ 2 ] ); + System.out.println( n2 ); + return false; + } + if ( !xml_n2[ 2 ].equals( n2 ) ) { + System.out.println( xml_n2[ 2 ] ); + System.out.println( n2 ); + return false; + } + if ( !nh_n[ 2 ].equals( n2 ) ) { + System.out.println( nh_n[ 2 ] ); + System.out.println( n2 ); + return false; + } + if ( !nex_n[ 2 ].equals( n2 ) ) { + System.out.println( nex_n[ 2 ] ); + System.out.println( n2 ); + return false; + } + // + if ( !xml_n[ 3 ].equals( n3 ) ) { + System.out.println( xml_n[ 3 ] ); + System.out.println( n3 ); + return false; + } + if ( !xml_n2[ 3 ].equals( n3 ) ) { + System.out.println( xml_n2[ 3 ] ); + System.out.println( n3 ); + return false; + } + if ( !nh_n[ 3 ].equals( n3 ) ) { + System.out.println( nh_n[ 3 ] ); + System.out.println( n3 ); + return false; + } + if ( !nex_n[ 3 ].equals( n3 ) ) { + System.out.println( nex_n[ 3 ] ); + System.out.println( n3 ); + return false; + } + // + if ( !xml_n[ 4 ].equals( n4 ) ) { + System.out.println( xml_n[ 4 ] ); + System.out.println( n4 ); + return false; + } + if ( !nh_n[ 4 ].equals( n4 ) ) { + System.out.println( nh_n[ 4 ] ); + System.out.println( n4 ); + return false; + } + if ( !nex_n[ 4 ].equals( n4 ) ) { + System.out.println( nex_n[ 4 ] ); + System.out.println( n4 ); + return false; + } + // + if ( !xml_n[ 5 ].equals( n5 ) ) { + System.out.println( xml_n[ 5 ] ); + System.out.println( n5 ); + return false; + } + if ( !nh_n[ 5 ].equals( n5 ) ) { + System.out.println( nh_n[ 5 ] ); + System.out.println( n5 ); + return false; + } + if ( !nex_n[ 5 ].equals( n5 ) ) { + System.out.println( nex_n[ 5 ] ); + System.out.println( n5 ); + return false; + } + // + if ( !xml_n[ 6 ].equals( n6 ) ) { + System.out.println( xml_n[ 6 ] ); + System.out.println( n6 ); + return false; + } + if ( !nh_n[ 6 ].equals( n6 ) ) { + System.out.println( nh_n[ 6 ] ); + System.out.println( n6 ); + return false; + } + if ( !nex_n[ 6 ].equals( n6 ) ) { + System.out.println( nex_n[ 6 ] ); + System.out.println( n6 ); + return false; + } + // + if ( !xml_n[ 7 ].equals( n7 ) ) { + System.out.println( xml_n[ 7 ] ); + System.out.println( n7 ); + return false; + } + if ( !nh_n[ 7 ].equals( n7 ) ) { + System.out.println( nh_n[ 7 ] ); + System.out.println( n7 ); + return false; + } + if ( !nex_n[ 7 ].equals( n7 ) ) { + System.out.println( nex_n[ 7 ] ); + System.out.println( n7 ); + return false; + } + if ( !xml_n[ 8 ].equals( n8 ) ) { + System.out.println( xml_n[ 8 ] ); + System.out.println( n8 ); + return false; + } + if ( !nh_n[ 8 ].equals( n8 ) ) { + System.out.println( nh_n[ 8 ] ); + System.out.println( n8 ); + return false; + } + if ( !nex_n[ 8 ].equals( n8 ) ) { + System.out.println( nex_n[ 8 ] ); + System.out.println( n8 ); + return false; + } + if ( !xml_n[ 9 ].equals( n9 ) ) { + System.out.println( xml_n[ 9 ] ); + System.out.println( n9 ); + return false; + } + if ( !xml_n2[ 9 ].equals( n9 ) ) { + System.out.println( xml_n2[ 9 ] ); + System.out.println( n9 ); + return false; + } + if ( !nh_n[ 9 ].equals( n9 ) ) { + System.out.println( nh_n[ 9 ] ); + System.out.println( n9 ); + return false; + } + if ( !nex_n[ 9 ].equals( n9 ) ) { + System.out.println( nex_n[ 9 ] ); + System.out.println( n9 ); + return false; + } + if ( !phylogenies_xml[ 0 ].toNewHampshire().equals( phylogenies_nh[ 0 ].toNewHampshire() ) ) { + System.out.println( phylogenies_xml[ 0 ].toNewHampshire() ); + System.out.println( phylogenies_nh[ 0 ].toNewHampshire() ); + return false; + } + if ( !phylogenies_xml[ 0 ].toNewHampshire().equals( phylogenies_nex[ 0 ].toNewHampshire() ) ) { + System.out.println( phylogenies_xml[ 0 ].toNewHampshire() ); + System.out.println( phylogenies_nex[ 0 ].toNewHampshire() ); return false; } } @@ -1043,9 +2272,9 @@ public final class Test { private static boolean testBasicPhyloXMLparsing() { try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); - final PhyloXmlParser xml_parser = new PhyloXmlParser(); - final Phylogeny[] phylogenies_0 = factory.create( Test.PATH_TO_TEST_DATA + "phyloxml_test_t1.xml", - xml_parser ); + final PhyloXmlParser xml_parser = PhyloXmlParser.createPhyloXmlParser(); + final Phylogeny[] phylogenies_0 = factory + .create( new File( Test.PATH_TO_TEST_DATA + "phyloxml_test_t1.xml" ), xml_parser ); if ( xml_parser.getErrorCount() > 0 ) { System.out.println( xml_parser.getErrorMessages().toString() ); return false; @@ -1119,6 +2348,21 @@ public final class Test { if ( !t3.getIdentifier().getProvider().equals( "treebank" ) ) { return false; } + if ( !t3.getNode( "root node" ).isDuplication() ) { + return false; + } + if ( !t3.getNode( "node a" ).isDuplication() ) { + return false; + } + if ( t3.getNode( "node a" ).isSpeciation() ) { + return false; + } + if ( t3.getNode( "node bc" ).isDuplication() ) { + return false; + } + if ( !t3.getNode( "node bc" ).isSpeciation() ) { + return false; + } if ( !t3.getNode( "root node" ).getNodeData().getSequence().getType().equals( "protein" ) ) { return false; } @@ -1132,7 +2376,8 @@ public final class Test { if ( !t3.getNode( "root node" ).getNodeData().getSequence().getAccession().getValue().equals( "Q9BZR8" ) ) { return false; } - if ( !t3.getNode( "root node" ).getNodeData().getSequence().getAccession().getSource().equals( "UniProtKB" ) ) { + if ( !t3.getNode( "root node" ).getNodeData().getSequence().getAccession().getSource() + .equals( "UniProtKB" ) ) { return false; } if ( !( t3.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getDesc() @@ -1159,8 +2404,8 @@ public final class Test { .getValue() != 1 ) { return false; } - if ( !( t3.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getConfidence() - .getType().equals( "ml" ) ) { + if ( !( t3.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getConfidence().getType() + .equals( "ml" ) ) { return false; } if ( !( t3.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getDesc() @@ -1168,27 +2413,27 @@ public final class Test { return false; } if ( ( t3.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getProperties() - .getProperty( "AFFY:expression" ).getAppliesTo() != AppliesTo.ANNOTATION ) { + .getProperties( "AFFY:expression" ).get( 0 ).getAppliesTo() != AppliesTo.ANNOTATION ) { return false; } if ( !( t3.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getProperties() - .getProperty( "AFFY:expression" ).getDataType().equals( "xsd:double" ) ) { + .getProperties( "AFFY:expression" ).get( 0 ).getDataType().equals( "xsd:double" ) ) { return false; } if ( !( t3.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getProperties() - .getProperty( "AFFY:expression" ).getRef().equals( "AFFY:expression" ) ) { + .getProperties( "AFFY:expression" ).get( 0 ).getRef().equals( "AFFY:expression" ) ) { return false; } if ( !( t3.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getProperties() - .getProperty( "AFFY:expression" ).getUnit().equals( "AFFY:x" ) ) { + .getProperties( "AFFY:expression" ).get( 0 ).getUnit().equals( "AFFY:x" ) ) { return false; } if ( !( t3.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getProperties() - .getProperty( "AFFY:expression" ).getValue().equals( "0.2" ) ) { + .getProperties( "AFFY:expression" ).get( 0 ).getValue().equals( "0.2" ) ) { return false; } if ( !( t3.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getProperties() - .getProperty( "MED:disease" ).getValue().equals( "lymphoma" ) ) { + .getProperties( "MED:disease" ).get( 0 ).getValue().equals( "lymphoma" ) ) { return false; } if ( !( t3.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 1 ) ).getRef() @@ -1236,15 +2481,15 @@ public final class Test { private static boolean testBasicPhyloXMLparsingRoundtrip() { try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); - final PhyloXmlParser xml_parser = new PhyloXmlParser(); + final PhyloXmlParser xml_parser = PhyloXmlParser.createPhyloXmlParser(); if ( USE_LOCAL_PHYLOXML_SCHEMA ) { xml_parser.setValidateAgainstSchema( PHYLOXML_LOCAL_XSD ); } else { xml_parser.setValidateAgainstSchema( PHYLOXML_REMOTE_XSD ); } - final Phylogeny[] phylogenies_0 = factory.create( Test.PATH_TO_TEST_DATA + "phyloxml_test_t1.xml", - xml_parser ); + final Phylogeny[] phylogenies_0 = factory + .create( new File( Test.PATH_TO_TEST_DATA + "phyloxml_test_t1.xml" ), xml_parser ); if ( xml_parser.getErrorCount() > 0 ) { System.out.println( xml_parser.getErrorMessages().toString() ); return false; @@ -1323,7 +2568,8 @@ public final class Test { if ( !t3_rt.getNode( "root node" ).getNodeData().getSequence().getSymbol().equals( "BCL2L14" ) ) { return false; } - if ( !t3_rt.getNode( "root node" ).getNodeData().getSequence().getAccession().getValue().equals( "Q9BZR8" ) ) { + if ( !t3_rt.getNode( "root node" ).getNodeData().getSequence().getAccession().getValue() + .equals( "Q9BZR8" ) ) { return false; } if ( !t3_rt.getNode( "root node" ).getNodeData().getSequence().getAccession().getSource() @@ -1363,27 +2609,27 @@ public final class Test { return false; } if ( ( t3_rt.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getProperties() - .getProperty( "AFFY:expression" ).getAppliesTo() != AppliesTo.ANNOTATION ) { + .getProperties( "AFFY:expression" ).get( 0 ).getAppliesTo() != AppliesTo.ANNOTATION ) { return false; } if ( !( t3_rt.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getProperties() - .getProperty( "AFFY:expression" ).getDataType().equals( "xsd:double" ) ) { + .getProperties( "AFFY:expression" ).get( 0 ).getDataType().equals( "xsd:double" ) ) { return false; } if ( !( t3_rt.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getProperties() - .getProperty( "AFFY:expression" ).getRef().equals( "AFFY:expression" ) ) { + .getProperties( "AFFY:expression" ).get( 0 ).getRef().equals( "AFFY:expression" ) ) { return false; } if ( !( t3_rt.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getProperties() - .getProperty( "AFFY:expression" ).getUnit().equals( "AFFY:x" ) ) { + .getProperties( "AFFY:expression" ).get( 0 ).getUnit().equals( "AFFY:x" ) ) { return false; } if ( !( t3_rt.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getProperties() - .getProperty( "AFFY:expression" ).getValue().equals( "0.2" ) ) { + .getProperties( "AFFY:expression" ).get( 0 ).getValue().equals( "0.2" ) ) { return false; } if ( !( t3_rt.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 2 ) ).getProperties() - .getProperty( "MED:disease" ).getValue().equals( "lymphoma" ) ) { + .getProperties( "MED:disease" ).get( 0 ).getValue().equals( "lymphoma" ) ) { return false; } if ( !( t3_rt.getNode( "root node" ).getNodeData().getSequence().getAnnotation( 1 ) ).getRef() @@ -1394,7 +2640,8 @@ public final class Test { .equals( "intracellular organelle" ) ) { return false; } - if ( !( t3_rt.getNode( "root node" ).getNodeData().getSequence().getUri( 0 ).getType().equals( "source" ) ) ) { + if ( !( t3_rt.getNode( "root node" ).getNodeData().getSequence().getUri( 0 ).getType() + .equals( "source" ) ) ) { return false; } if ( !( t3_rt.getNode( "root node" ).getNodeData().getSequence().getUri( 0 ).getDescription() @@ -1404,11 +2651,13 @@ public final class Test { if ( !( t3_rt.getNode( "root node" ).getNodeData().getSequence().getLocation().equals( "12p13-p12" ) ) ) { return false; } - if ( !( t3_rt.getNode( "root node" ).getNodeData().getReference().getDoi().equals( "10.1038/387489a0" ) ) ) { + if ( !( t3_rt.getNode( "root node" ).getNodeData().getReference().getDoi() + .equals( "10.1038/387489a0" ) ) ) { return false; } if ( !( t3_rt.getNode( "root node" ).getNodeData().getReference().getDescription() .equals( "Aguinaldo, A. M. A.; J. M. Turbeville, L. S. Linford, M. C. Rivera, J. R. Garey, R. A. Raff, & J. A. Lake (1997). \"Evidence for a clade of nematodes, arthropods and other moulting animals\". Nature 387 (6632): 489–493." ) ) ) { + System.out.println( t3_rt.getNode( "root node" ).getNodeData().getReference().getDescription() ); return false; } if ( !t3_rt.getNode( "root node" ).getNodeData().getTaxonomy().getTaxonomyCode().equals( "ECDYS" ) ) { @@ -1417,7 +2666,8 @@ public final class Test { if ( !t3_rt.getNode( "root node" ).getNodeData().getTaxonomy().getScientificName().equals( "ecdysozoa" ) ) { return false; } - if ( !t3_rt.getNode( "root node" ).getNodeData().getTaxonomy().getCommonName().equals( "molting animals" ) ) { + if ( !t3_rt.getNode( "root node" ).getNodeData().getTaxonomy().getCommonName() + .equals( "molting animals" ) ) { return false; } if ( !t3_rt.getNode( "root node" ).getNodeData().getTaxonomy().getIdentifier().getValue().equals( "1" ) ) { @@ -1427,7 +2677,8 @@ public final class Test { .equals( "ncbi" ) ) { return false; } - if ( t3_rt.getNode( "node bc" ).getNodeData().getSequence().getDomainArchitecture().getTotalLength() != 124 ) { + if ( t3_rt.getNode( "node bc" ).getNodeData().getSequence().getDomainArchitecture() + .getTotalLength() != 124 ) { return false; } if ( !t3_rt.getNode( "node bc" ).getNodeData().getSequence().getDomainArchitecture().getDomain( 0 ) @@ -1438,7 +2689,8 @@ public final class Test { .getFrom() != 21 ) { return false; } - if ( t3_rt.getNode( "node bc" ).getNodeData().getSequence().getDomainArchitecture().getDomain( 0 ).getTo() != 44 ) { + if ( t3_rt.getNode( "node bc" ).getNodeData().getSequence().getDomainArchitecture().getDomain( 0 ) + .getTo() != 44 ) { return false; } if ( t3_rt.getNode( "node bc" ).getNodeData().getSequence().getDomainArchitecture().getDomain( 0 ) @@ -1446,7 +2698,7 @@ public final class Test { return false; } if ( t3_rt.getNode( "node bc" ).getNodeData().getSequence().getDomainArchitecture().getDomain( 0 ) - .getConfidence() != 2144 ) { + .getConfidence() != 0 ) { return false; } if ( !t3_rt.getNode( "node bc" ).getNodeData().getSequence().getDomainArchitecture().getDomain( 0 ).getId() @@ -1527,7 +2779,6 @@ public final class Test { if ( !t3_rt.getNode( "node b" ).getNodeData().getBinaryCharacters().getType().equals( "characters" ) ) { return false; } - // if ( !t3_rt.getNode( "node ba" ).getNodeData().getDate().getDesc().equals( "Silurian" ) ) { return false; } @@ -1535,7 +2786,8 @@ public final class Test { .equalsIgnoreCase( "435" ) ) { return false; } - if ( !t3_rt.getNode( "node ba" ).getNodeData().getDate().getMin().toPlainString().equalsIgnoreCase( "416" ) ) { + if ( !t3_rt.getNode( "node ba" ).getNodeData().getDate().getMin().toPlainString() + .equalsIgnoreCase( "416" ) ) { return false; } if ( !t3_rt.getNode( "node ba" ).getNodeData().getDate().getMax().toPlainString() @@ -1588,7 +2840,7 @@ public final class Test { // Do nothing -- means were not running from jar. } if ( xml_parser == null ) { - xml_parser = new PhyloXmlParser(); + xml_parser = PhyloXmlParser.createPhyloXmlParser(); if ( USE_LOCAL_PHYLOXML_SCHEMA ) { xml_parser.setValidateAgainstSchema( PHYLOXML_LOCAL_XSD ); } @@ -1596,8 +2848,8 @@ public final class Test { xml_parser.setValidateAgainstSchema( PHYLOXML_REMOTE_XSD ); } } - final Phylogeny[] phylogenies_0 = factory.create( Test.PATH_TO_TEST_DATA + "phyloxml_test_t1.xml", - xml_parser ); + final Phylogeny[] phylogenies_0 = factory + .create( new File( Test.PATH_TO_TEST_DATA + "phyloxml_test_t1.xml" ), xml_parser ); if ( xml_parser.getErrorCount() > 0 ) { System.out.println( xml_parser.getErrorMessages().toString() ); return false; @@ -1631,7 +2883,7 @@ public final class Test { return false; } final String x2 = Test.PATH_TO_TEST_DATA + "phyloxml_test_t1.xml"; - final Phylogeny[] phylogenies_1 = factory.create( x2, xml_parser ); + final Phylogeny[] phylogenies_1 = factory.create( new File( x2 ), xml_parser ); if ( xml_parser.getErrorCount() > 0 ) { System.out.println( "errors:" ); System.out.println( xml_parser.getErrorMessages().toString() ); @@ -1640,8 +2892,8 @@ public final class Test { if ( phylogenies_1.length != 4 ) { return false; } - final Phylogeny[] phylogenies_2 = factory.create( Test.PATH_TO_TEST_DATA + "phyloxml_test_t3.xml", - xml_parser ); + final Phylogeny[] phylogenies_2 = factory + .create( new File( Test.PATH_TO_TEST_DATA + "phyloxml_test_t3.xml" ), xml_parser ); if ( xml_parser.getErrorCount() > 0 ) { System.out.println( "errors:" ); System.out.println( xml_parser.getErrorMessages().toString() ); @@ -1653,8 +2905,8 @@ public final class Test { if ( phylogenies_2[ 0 ].getNumberOfExternalNodes() != 2 ) { return false; } - final Phylogeny[] phylogenies_3 = factory.create( Test.PATH_TO_TEST_DATA + "phyloxml_test_t4.xml", - xml_parser ); + final Phylogeny[] phylogenies_3 = factory + .create( new File( Test.PATH_TO_TEST_DATA + "phyloxml_test_t4.xml" ), xml_parser ); if ( xml_parser.getErrorCount() > 0 ) { System.out.println( xml_parser.getErrorMessages().toString() ); return false; @@ -1675,8 +2927,8 @@ public final class Test { if ( !a.getNode( "node b1" ).getNodeData().getTaxonomy().getCommonName().equals( "b1 species" ) ) { return false; } - final Phylogeny[] phylogenies_4 = factory.create( Test.PATH_TO_TEST_DATA + "special_characters.xml", - xml_parser ); + final Phylogeny[] phylogenies_4 = factory + .create( new File( Test.PATH_TO_TEST_DATA + "special_characters.xml" ), xml_parser ); if ( xml_parser.getErrorCount() > 0 ) { System.out.println( xml_parser.getErrorMessages().toString() ); return false; @@ -1702,6 +2954,42 @@ public final class Test { return true; } + private static boolean testPhyloXMLparsingValidating() { + try { + final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); + PhyloXmlParser xml_parser = null; + try { + xml_parser = PhyloXmlParser.createPhyloXmlParserXsdValidating(); + } + catch ( final Exception e ) { + // Do nothing -- means were not running from jar. + } + if ( xml_parser == null ) { + xml_parser = PhyloXmlParser.createPhyloXmlParser(); + if ( USE_LOCAL_PHYLOXML_SCHEMA ) { + xml_parser.setValidateAgainstSchema( PHYLOXML_LOCAL_XSD ); + } + else { + xml_parser.setValidateAgainstSchema( PHYLOXML_REMOTE_XSD ); + } + } + final Phylogeny[] phylogenies_0 = factory + .create( new File( Test.PATH_TO_TEST_DATA + "phyloxml_test_1.xml" ), xml_parser ); + if ( xml_parser.getErrorCount() > 0 ) { + System.out.println( xml_parser.getErrorMessages().toString() ); + return false; + } + if ( phylogenies_0.length != 3 ) { + return false; + } + } + catch ( final Exception e ) { + e.printStackTrace( System.out ); + return false; + } + return true; + } + private static boolean testBasicProtein() { try { final BasicProtein p0 = new BasicProtein( "p0", "owl", 0 ); @@ -2150,7 +3438,8 @@ public final class Test { if ( !t1.getRoot().getNodeData().getTaxonomy().getIdentifier().getValue().equals( "2484" ) ) { return false; } - if ( !t1.getRoot().getChildNode( 0 ).getNodeData().getTaxonomy().getScientificName().equals( "Rhombozoa" ) ) { + if ( !t1.getRoot().getChildNode( 0 ).getNodeData().getTaxonomy().getScientificName() + .equals( "Rhombozoa" ) ) { return false; } if ( t1.getRoot().getChildNode( 0 ).getNumberOfDescendants() != 3 ) { @@ -2183,7 +3472,8 @@ public final class Test { if ( t2.getRoot().getNumberOfDescendants() != 24 ) { return false; } - if ( !t2.getRoot().getChildNode( 0 ).getNodeData().getTaxonomy().getScientificName().equals( "Aquificae" ) ) { + if ( !t2.getRoot().getChildNode( 0 ).getNodeData().getTaxonomy().getScientificName() + .equals( "Aquificae" ) ) { return false; } if ( !t2.getRoot().getChildNode( 0 ).getChildNode( 0 ).getNodeData().getTaxonomy().getScientificName() @@ -2264,15 +3554,11 @@ public final class Test { private static boolean testBasicTreeMethods() { try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); - final Phylogeny t1 = factory.create(); - if ( !t1.isEmpty() ) { - return false; - } final Phylogeny t2 = factory.create( "((A:1,B:2)AB:1,(C:3,D:5)CD:3)ABCD:0.5", new NHXParser() )[ 0 ]; if ( t2.getNumberOfExternalNodes() != 4 ) { return false; } - if ( t2.getHeight() != 8.5 ) { + if ( t2.calculateHeight( false ) != 8.5 ) { return false; } if ( !t2.isCompletelyBinary() ) { @@ -2285,57 +3571,58 @@ public final class Test { if ( t3.getNumberOfExternalNodes() != 5 ) { return false; } - if ( t3.getHeight() != 11 ) { + if ( t3.calculateHeight( true ) != 11 ) { return false; } if ( t3.isCompletelyBinary() ) { return false; } final PhylogenyNode n = t3.getNode( "ABC" ); - final Phylogeny t4 = factory.create( "((A:1,B:2,C:10)ABC:1,(D:3,E:5)DE:3,(F,G,H,I))", new NHXParser() )[ 0 ]; + final Phylogeny t4 = factory.create( "((A:1,B:2,C:10)ABC:1,(D:3,E:5)DE:3,(F,G,H,I))", + new NHXParser() )[ 0 ]; if ( t4.getNumberOfExternalNodes() != 9 ) { return false; } - if ( t4.getHeight() != 11 ) { + if ( t4.calculateHeight( false ) != 11 ) { return false; } if ( t4.isCompletelyBinary() ) { return false; } final StringBuffer sb5 = new StringBuffer( "(((A11:2)A1:2,(A21:1,A22:2,A23)A2:11,A3:2)A:2,B:10,C:3,D:8)" ); - final Phylogeny t5 = factory.create( sb5, new NHXParser() )[ 0 ]; + final Phylogeny t5 = factory.create( sb5.toString(), new NHXParser() )[ 0 ]; if ( t5.getNumberOfExternalNodes() != 8 ) { return false; } - if ( t5.getHeight() != 15 ) { + if ( t5.calculateHeight( false ) != 15 ) { return false; } final StringBuffer sb6 = new StringBuffer( "(X,Y,Z,(((A111)A11:2)A1:2,(X,Y,Z,A21:1,A22:2,A23)A2:11,A3:2)A:2,B:10,C:3,D:8)" ); - final Phylogeny t6 = factory.create( sb6, new NHXParser() )[ 0 ]; - if ( t6.getHeight() != 15 ) { + final Phylogeny t6 = factory.create( sb6.toString(), new NHXParser() )[ 0 ]; + if ( t6.calculateHeight( true ) != 15 ) { return false; } final StringBuffer sb7 = new StringBuffer( "(((A11:2)A1:2,(A21:1,A22:2,A23)A2:11,A3:2)A:2,B:10,C:15,D:8)" ); - final Phylogeny t7 = factory.create( sb7, new NHXParser() )[ 0 ]; - if ( t7.getHeight() != 15 ) { + final Phylogeny t7 = factory.create( sb7.toString(), new NHXParser() )[ 0 ]; + if ( t7.calculateHeight( true ) != 15 ) { return false; } final StringBuffer sb8 = new StringBuffer( "(((A11:11)A1:2,(A21:2,A22:2,A23,A24,AA:)A2:11,A3:2)A:2,B:15,C:15,D:15)" ); - final Phylogeny t8 = factory.create( sb8, new NHXParser() )[ 0 ]; + final Phylogeny t8 = factory.create( sb8.toString(), new NHXParser() )[ 0 ]; if ( t8.getNumberOfExternalNodes() != 10 ) { return false; } - if ( t8.getHeight() != 15 ) { + if ( t8.calculateHeight( true ) != 15 ) { return false; } final char[] a9 = new char[] { 'a' }; final Phylogeny t9 = factory.create( a9, new NHXParser() )[ 0 ]; - if ( t9.getHeight() != 0 ) { + if ( t9.calculateHeight( true ) != 0 ) { return false; } final char[] a10 = new char[] { 'a', ':', '6' }; final Phylogeny t10 = factory.create( a10, new NHXParser() )[ 0 ]; - if ( t10.getHeight() != 6 ) { + if ( t10.calculateHeight( true ) != 6 ) { return false; } } @@ -2350,9 +3637,9 @@ public final class Test { try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); final Phylogeny t0 = factory.create( "((((A,B)ab,C)abc,D)abcd,E)abcde", new NHXParser() )[ 0 ]; - final Phylogeny[] ev0 = factory - .create( "((((A,B),C),D),E);((((A,B),C),D),E);((((A,B),C),D),E);((((A,B),C),D),E);", - new NHXParser() ); + final Phylogeny[] ev0 = factory.create( + "((((A,B),C),D),E);((((A,B),C),D),E);((((A,B),C),D),E);((((A,B),C),D),E);", + new NHXParser() ); ConfidenceAssessor.evaluate( "bootstrap", ev0, t0, false, 1, 0, 2 ); if ( !isEqual( t0.getNode( "ab" ).getBranchData().getConfidence( 0 ).getValue(), 3 ) ) { return false; @@ -2361,9 +3648,9 @@ public final class Test { return false; } final Phylogeny t1 = factory.create( "((((A,B)ab[&&NHX:B=50],C)abc,D)abcd,E)abcde", new NHXParser() )[ 0 ]; - final Phylogeny[] ev1 = factory - .create( "((((A,B),C),D),E);((A,B),((E,D),C));(((A,B),C),(E,D));(A,(((E,D),C),B));(B,(A,((E,D),C)));(C,((E,D),(A,B)));(D,(E,((A,B),C)));", - new NHXParser() ); + final Phylogeny[] ev1 = factory.create( + "((((A,B),C),D),E);((A,B),((E,D),C));(((A,B),C),(E,D));(A,(((E,D),C),B));(B,(A,((E,D),C)));(C,((E,D),(A,B)));(D,(E,((A,B),C)));", + new NHXParser() ); ConfidenceAssessor.evaluate( "bootstrap", ev1, t1, false, 1 ); if ( !isEqual( t1.getNode( "ab" ).getBranchData().getConfidence( 1 ).getValue(), 7 ) ) { return false; @@ -2372,9 +3659,9 @@ public final class Test { return false; } final Phylogeny t_b = factory.create( "((((A,C)ac,D)acd,E)acde,B)abcde", new NHXParser() )[ 0 ]; - final Phylogeny[] ev_b = factory - .create( "((A,C),X);((A,X),C);(A,C);((((A,B),C),D),E);((A,B),((E,D),C));(((A,B),C),(E,D));(A,(((E,D),C),B));(B,(A,((E,D),C)));(C,((E,D),(A,B)));(D,(E,((A,B),C)));((((A,C)ac,D)acd,E)acde,B)abcd", - new NHXParser() ); + final Phylogeny[] ev_b = factory.create( + "((A,C),X);((A,X),C);(A,C);((((A,B),C),D),E);((A,B),((E,D),C));(((A,B),C),(E,D));(A,(((E,D),C),B));(B,(A,((E,D),C)));(C,((E,D),(A,B)));(D,(E,((A,B),C)));((((A,C)ac,D)acd,E)acde,B)abcd", + new NHXParser() ); ConfidenceAssessor.evaluate( "bootstrap", ev_b, t_b, false, 1 ); if ( !isEqual( t_b.getNode( "ac" ).getBranchData().getConfidence( 0 ).getValue(), 4 ) ) { return false; @@ -2384,9 +3671,9 @@ public final class Test { } // final Phylogeny t1x = factory.create( "((((A,B)ab,C)abc,D)abcd,E)abcde", new NHXParser() )[ 0 ]; - final Phylogeny[] ev1x = factory - .create( "((((A,B),C),D),E);((A,B),((E,D),C));(((A,B),C),(E,D));(A,(((E,D),C),B));(B,(A,((E,D),C)));(C,((E,D),(A,B)));(D,(E,((A,B),C)));", - new NHXParser() ); + final Phylogeny[] ev1x = factory.create( + "((((A,B),C),D),E);((A,B),((E,D),C));(((A,B),C),(E,D));(A,(((E,D),C),B));(B,(A,((E,D),C)));(C,((E,D),(A,B)));(D,(E,((A,B),C)));", + new NHXParser() ); ConfidenceAssessor.evaluate( "bootstrap", ev1x, t1x, true, 1 ); if ( !isEqual( t1x.getNode( "ab" ).getBranchData().getConfidence( 0 ).getValue(), 7 ) ) { return false; @@ -2395,9 +3682,9 @@ public final class Test { return false; } final Phylogeny t_bx = factory.create( "((((A,C)ac,D)acd,E)acde,B)abcde", new NHXParser() )[ 0 ]; - final Phylogeny[] ev_bx = factory - .create( "((((A,B),C),D),E);((A,B),((E,D),C));(((A,B),C),(E,D));(A,(((E,D),C),B));(B,(A,((E,D),C)));(C,((E,D),(A,B)));(D,(E,((A,B),C)));((((A,C)ac,D)acd,E)acde,B)abcd", - new NHXParser() ); + final Phylogeny[] ev_bx = factory.create( + "((((A,B),C),D),E);((A,B),((E,D),C));(((A,B),C),(E,D));(A,(((E,D),C),B));(B,(A,((E,D),C)));(C,((E,D),(A,B)));(D,(E,((A,B),C)));((((A,C)ac,D)acd,E)acde,B)abcd", + new NHXParser() ); ConfidenceAssessor.evaluate( "bootstrap", ev_bx, t_bx, true, 1 ); if ( !isEqual( t_bx.getNode( "ac" ).getBranchData().getConfidence( 0 ).getValue(), 1 ) ) { return false; @@ -2405,17 +3692,15 @@ public final class Test { if ( !isEqual( t_bx.getNode( "acd" ).getBranchData().getConfidence( 0 ).getValue(), 1 ) ) { return false; } - // - final Phylogeny[] t2 = factory - .create( "((((a,b),c),d),e);(((a,b),c),(d,e));(((((a,b),c),d),e),f);((((a,b),c),(d,e)),f);(((a,b),c),d,e);((a,b,c),d,e);", - new NHXParser() ); - final Phylogeny[] ev2 = factory - .create( "((((a,b),c),d),e);((((a,b),c),d),e);((((a,b),e),d),c);((((a,b),e),d),c);(((a,b),(c,d)),e);((a,b),x);((a,b),(x,y));(a,b);(a,e);(a,b,c);", - new NHXParser() ); + final Phylogeny[] t2 = factory.create( + "((((a,b),c),d),e);(((a,b),c),(d,e));(((((a,b),c),d),e),f);((((a,b),c),(d,e)),f);(((a,b),c),d,e);((a,b,c),d,e);", + new NHXParser() ); + final Phylogeny[] ev2 = factory.create( + "((((a,b),c),d),e);((((a,b),c),d),e);((((a,b),e),d),c);((((a,b),e),d),c);(((a,b),(c,d)),e);((a,b),x);((a,b),(x,y));(a,b);(a,e);(a,b,c);", + new NHXParser() ); for( final Phylogeny target : t2 ) { ConfidenceAssessor.evaluate( "bootstrap", ev2, target, false, 1 ); } - // final Phylogeny t4 = factory.create( "((((((A,B)ab,C)abc,D)abcd,E)abcde,F)abcdef,G)abcdefg", new NHXParser() )[ 0 ]; final Phylogeny[] ev4 = factory.create( "(((A,B),C),(X,Y));((F,G),((A,B,C),(D,E)))", new NHXParser() ); @@ -2485,27 +3770,31 @@ public final class Test { return false; } n.setName( "B0LM41_HUMAN" ); - if ( !TreePanelUtil.createUriForSeqWeb( n, null, null ).equals( ForesterUtil.UNIPROT_KB + "B0LM41_HUMAN" ) ) { + if ( !TreePanelUtil.createUriForSeqWeb( n, null, null ) + .equals( ForesterUtil.UNIPROT_KB + "B0LM41_HUMAN" ) ) { return false; } n.setName( "NP_001025424" ); - if ( !TreePanelUtil.createUriForSeqWeb( n, null, null ).equals( ForesterUtil.NCBI_PROTEIN + "NP_001025424" ) ) { + if ( !TreePanelUtil.createUriForSeqWeb( n, null, null ) + .equals( ForesterUtil.NCBI_PROTEIN + "NP_001025424" ) ) { return false; } n.setName( "_NM_001030253-" ); - if ( !TreePanelUtil.createUriForSeqWeb( n, null, null ).equals( ForesterUtil.NCBI_NUCCORE + "NM_001030253" ) ) { + if ( !TreePanelUtil.createUriForSeqWeb( n, null, null ) + .equals( ForesterUtil.NCBI_NUCCORE + "NM_001030253" ) ) { return false; } n.setName( "XM_002122186" ); - if ( !TreePanelUtil.createUriForSeqWeb( n, null, null ).equals( ForesterUtil.NCBI_NUCCORE + "XM_002122186" ) ) { + if ( !TreePanelUtil.createUriForSeqWeb( n, null, null ) + .equals( ForesterUtil.NCBI_NUCCORE + "XM_002122186" ) ) { return false; } n.setName( "dgh_AAA34956_gdg" ); if ( !TreePanelUtil.createUriForSeqWeb( n, null, null ).equals( ForesterUtil.NCBI_PROTEIN + "AAA34956" ) ) { return false; } - n.setName( "j40f4_Q06891.1_fndn2 fnr3" ); - if ( !TreePanelUtil.createUriForSeqWeb( n, null, null ).equals( ForesterUtil.NCBI_PROTEIN + "Q06891.1" ) ) { + n.setName( "AAA34956" ); + if ( !TreePanelUtil.createUriForSeqWeb( n, null, null ).equals( ForesterUtil.NCBI_PROTEIN + "AAA34956" ) ) { return false; } n.setName( "GI:394892" ); @@ -2523,6 +3812,16 @@ public final class Test { System.out.println( TreePanelUtil.createUriForSeqWeb( n, null, null ) ); return false; } + n.setName( "P12345" ); + if ( !TreePanelUtil.createUriForSeqWeb( n, null, null ).equals( ForesterUtil.UNIPROT_KB + "P12345" ) ) { + System.out.println( TreePanelUtil.createUriForSeqWeb( n, null, null ) ); + return false; + } + n.setName( "gi_fdgjmn-3jk5-243 mnefmn fg023-0 P12345 4395jtmnsrg02345m1ggi92450jrg890j4t0j240" ); + if ( !TreePanelUtil.createUriForSeqWeb( n, null, null ).equals( ForesterUtil.UNIPROT_KB + "P12345" ) ) { + System.out.println( TreePanelUtil.createUriForSeqWeb( n, null, null ) ); + return false; + } } catch ( final Exception e ) { e.printStackTrace( System.out ); @@ -2914,7 +4213,7 @@ public final class Test { if ( t4.getNumberOfExternalNodes() != 5 ) { return false; } - String s = w.toNewHampshire( t4, false, true ).toString(); + String s = w.toNewHampshire( t4, true ).toString(); if ( !s.equals( "((A,(B11,B12)),(C,D));" ) ) { return false; } @@ -2935,7 +4234,7 @@ public final class Test { if ( !n.getName().equals( "D" ) ) { return false; } - s = w.toNewHampshire( t4, false, true ).toString(); + s = w.toNewHampshire( t4, true ).toString(); if ( !s.equals( "((A,B12),D);" ) ) { return false; } @@ -2944,7 +4243,7 @@ public final class Test { if ( t5.getNumberOfExternalNodes() != 5 ) { return false; } - s = w.toNewHampshire( t5, false, true ).toString(); + s = w.toNewHampshire( t5, true ).toString(); if ( !s.equals( "(((B11,B12),B2),(C,D));" ) ) { return false; } @@ -2953,7 +4252,7 @@ public final class Test { if ( t6.getNumberOfExternalNodes() != 5 ) { return false; } - s = w.toNewHampshire( t6, false, false ).toString(); + s = w.toNewHampshire( t6, false ).toString(); if ( !s.equals( "((A,(B12,B2)),(C,D));" ) ) { return false; } @@ -2962,7 +4261,7 @@ public final class Test { if ( t7.getNumberOfExternalNodes() != 5 ) { return false; } - s = w.toNewHampshire( t7, false, true ).toString(); + s = w.toNewHampshire( t7, true ).toString(); if ( !s.equals( "((A,(B11,B2)),(C,D));" ) ) { return false; } @@ -2971,7 +4270,7 @@ public final class Test { if ( t8.getNumberOfExternalNodes() != 5 ) { return false; } - s = w.toNewHampshire( t8, false, false ).toString(); + s = w.toNewHampshire( t8, false ).toString(); if ( !s.equals( "((A,(B11,B12)),(C,D));" ) ) { return false; } @@ -2980,7 +4279,7 @@ public final class Test { if ( t9.getNumberOfExternalNodes() != 5 ) { return false; } - s = w.toNewHampshire( t9, false, true ).toString(); + s = w.toNewHampshire( t9, true ).toString(); if ( !s.equals( "((A,((B11,B12),B2)),D);" ) ) { return false; } @@ -2989,7 +4288,7 @@ public final class Test { if ( t10.getNumberOfExternalNodes() != 5 ) { return false; } - s = w.toNewHampshire( t10, false, true ).toString(); + s = w.toNewHampshire( t10, true ).toString(); if ( !s.equals( "((A,((B11,B12),B2)),C);" ) ) { return false; } @@ -2998,7 +4297,7 @@ public final class Test { if ( t11.getNumberOfExternalNodes() != 2 ) { return false; } - s = w.toNewHampshire( t11, false, true ).toString(); + s = w.toNewHampshire( t11, true ).toString(); if ( !s.equals( "(B,C);" ) ) { return false; } @@ -3006,7 +4305,7 @@ public final class Test { if ( t11.getNumberOfExternalNodes() != 1 ) { return false; } - s = w.toNewHampshire( t11, false, false ).toString(); + s = w.toNewHampshire( t11, false ).toString(); if ( !s.equals( "B;" ) ) { return false; } @@ -3015,7 +4314,7 @@ public final class Test { if ( t12.getNumberOfExternalNodes() != 8 ) { return false; } - s = w.toNewHampshire( t12, false, true ).toString(); + s = w.toNewHampshire( t12, true ).toString(); if ( !s.equals( "((A1,A2,A3),(B1,B3),(C1,C2,C3));" ) ) { return false; } @@ -3023,7 +4322,7 @@ public final class Test { if ( t12.getNumberOfExternalNodes() != 7 ) { return false; } - s = w.toNewHampshire( t12, false, true ).toString(); + s = w.toNewHampshire( t12, true ).toString(); if ( !s.equals( "((A1,A2,A3),B1,(C1,C2,C3));" ) ) { return false; } @@ -3031,7 +4330,7 @@ public final class Test { if ( t12.getNumberOfExternalNodes() != 6 ) { return false; } - s = w.toNewHampshire( t12, false, true ).toString(); + s = w.toNewHampshire( t12, true ).toString(); if ( !s.equals( "((A1,A2,A3),B1,(C1,C2));" ) ) { return false; } @@ -3039,7 +4338,7 @@ public final class Test { if ( t12.getNumberOfExternalNodes() != 5 ) { return false; } - s = w.toNewHampshire( t12, false, true ).toString(); + s = w.toNewHampshire( t12, true ).toString(); if ( !s.equals( "((A2,A3),B1,(C1,C2));" ) ) { return false; } @@ -3047,7 +4346,7 @@ public final class Test { if ( t12.getNumberOfExternalNodes() != 4 ) { return false; } - s = w.toNewHampshire( t12, false, true ).toString(); + s = w.toNewHampshire( t12, true ).toString(); if ( !s.equals( "((A2,A3),(C1,C2));" ) ) { return false; } @@ -3055,7 +4354,7 @@ public final class Test { if ( t12.getNumberOfExternalNodes() != 3 ) { return false; } - s = w.toNewHampshire( t12, false, true ).toString(); + s = w.toNewHampshire( t12, true ).toString(); if ( !s.equals( "(A2,(C1,C2));" ) ) { return false; } @@ -3063,7 +4362,7 @@ public final class Test { if ( t12.getNumberOfExternalNodes() != 2 ) { return false; } - s = w.toNewHampshire( t12, false, true ).toString(); + s = w.toNewHampshire( t12, true ).toString(); if ( !s.equals( "(C1,C2);" ) ) { return false; } @@ -3072,7 +4371,7 @@ public final class Test { if ( t13.getNumberOfExternalNodes() != 4 ) { return false; } - s = w.toNewHampshire( t13, false, true ).toString(); + s = w.toNewHampshire( t13, true ).toString(); if ( !s.equals( "(A,B,C,E:5.0);" ) ) { return false; } @@ -3081,7 +4380,7 @@ public final class Test { if ( t14.getNumberOfExternalNodes() != 5 ) { return false; } - s = w.toNewHampshire( t14, false, true ).toString(); + s = w.toNewHampshire( t14, true ).toString(); if ( !s.equals( "((A,B,C,D:1.1),F);" ) ) { return false; } @@ -3311,64 +4610,197 @@ public final class Test { if ( !f.exists() ) { return false; } - if ( !f.isDirectory() ) { + if ( !f.isDirectory() ) { + return false; + } + if ( !f.canRead() ) { + return false; + } + } + catch ( final Exception e ) { + return false; + } + return true; + } + + private static boolean testEbiEntryRetrieval() { + try { + final SequenceDatabaseEntry entry = SequenceDbWsTools.obtainEntry( "AAK41263" ); + if ( !entry.getAccession().equals( "AAK41263" ) ) { + System.out.println( entry.getAccession() ); + return false; + } + if ( !entry.getTaxonomyScientificName().equals( "Sulfolobus solfataricus P2" ) ) { + System.out.println( entry.getTaxonomyScientificName() ); + return false; + } + if ( !entry.getSequenceName() + .equals( "Sulfolobus solfataricus P2 Glycogen debranching enzyme, hypothetical (treX-like)" ) ) { + System.out.println( entry.getSequenceName() ); + return false; + } + if ( !entry.getGeneName().equals( "treX-like" ) ) { + System.out.println( entry.getGeneName() ); + return false; + } + if ( !entry.getTaxonomyIdentifier().equals( "273057" ) ) { + System.out.println( entry.getTaxonomyIdentifier() ); + return false; + } + if ( !entry.getAnnotations().first().getRefValue().equals( "3.2.1.33" ) ) { + System.out.println( entry.getAnnotations().first().getRefValue() ); + return false; + } + if ( !entry.getAnnotations().first().getRefSource().equals( "EC" ) ) { + System.out.println( entry.getAnnotations().first().getRefSource() ); + return false; + } + if ( entry.getCrossReferences().size() < 1 ) { + return false; + } + final SequenceDatabaseEntry entry1 = SequenceDbWsTools.obtainEntry( "ABJ16409" ); + if ( !entry1.getAccession().equals( "ABJ16409" ) ) { + return false; + } + if ( !entry1.getTaxonomyScientificName().equals( "Felis catus" ) ) { + System.out.println( entry1.getTaxonomyScientificName() ); + return false; + } + if ( !entry1.getSequenceName().equals( "Felis catus (domestic cat) partial BCL2" ) ) { + System.out.println( entry1.getSequenceName() ); + return false; + } + if ( !entry1.getTaxonomyIdentifier().equals( "9685" ) ) { + System.out.println( entry1.getTaxonomyIdentifier() ); + return false; + } + if ( !entry1.getGeneName().equals( "BCL2" ) ) { + System.out.println( entry1.getGeneName() ); + return false; + } + if ( entry1.getCrossReferences().size() < 1 ) { + return false; + } + final SequenceDatabaseEntry entry2 = SequenceDbWsTools.obtainEntry( "NM_184234" ); + if ( !entry2.getAccession().equals( "NM_184234" ) ) { + return false; + } + if ( !entry2.getTaxonomyScientificName().equals( "Homo sapiens" ) ) { + System.out.println( entry2.getTaxonomyScientificName() ); + return false; + } + if ( !entry2.getSequenceName() + .equals( "Homo sapiens RNA binding motif protein 39 (RBM39), transcript variant 1, mRNA" ) ) { + System.out.println( entry2.getSequenceName() ); + return false; + } + if ( !entry2.getTaxonomyIdentifier().equals( "9606" ) ) { + System.out.println( entry2.getTaxonomyIdentifier() ); + return false; + } + if ( !entry2.getGeneName().equals( "RBM39" ) ) { + System.out.println( entry2.getGeneName() ); + return false; + } + if ( entry2.getCrossReferences().size() < 1 ) { + return false; + } + if ( !entry2.getChromosome().equals( "20" ) ) { + return false; + } + if ( !entry2.getMap().equals( "20q11.22" ) ) { + return false; + } + final SequenceDatabaseEntry entry3 = SequenceDbWsTools.obtainEntry( "HM043801" ); + if ( !entry3.getAccession().equals( "HM043801" ) ) { + return false; + } + if ( !entry3.getTaxonomyScientificName().equals( "Bursaphelenchus xylophilus" ) ) { + System.out.println( entry3.getTaxonomyScientificName() ); + return false; + } + if ( !entry3.getSequenceName().equals( "Bursaphelenchus xylophilus RAF gene, complete cds" ) ) { + System.out.println( entry3.getSequenceName() ); + return false; + } + if ( !entry3.getTaxonomyIdentifier().equals( "6326" ) ) { + System.out.println( entry3.getTaxonomyIdentifier() ); + return false; + } + if ( !entry3.getSequenceSymbol().equals( "RAF" ) ) { + System.out.println( entry3.getSequenceSymbol() ); + return false; + } + if ( !ForesterUtil.isEmpty( entry3.getGeneName() ) ) { + return false; + } + if ( entry3.getCrossReferences().size() < 1 ) { + return false; + } + final SequenceDatabaseEntry entry4 = SequenceDbWsTools.obtainEntry( "AAA36557.1" ); + if ( !entry4.getAccession().equals( "AAA36557" ) ) { + return false; + } + if ( !entry4.getTaxonomyScientificName().equals( "Homo sapiens" ) ) { + System.out.println( entry4.getTaxonomyScientificName() ); + return false; + } + if ( !entry4.getSequenceName().equals( "Homo sapiens (human) ras protein" ) ) { + System.out.println( entry4.getSequenceName() ); + return false; + } + if ( !entry4.getTaxonomyIdentifier().equals( "9606" ) ) { + System.out.println( entry4.getTaxonomyIdentifier() ); + return false; + } + if ( !entry4.getGeneName().equals( "ras" ) ) { + System.out.println( entry4.getGeneName() ); return false; } - if ( !f.canRead() ) { + final SequenceDatabaseEntry entry5 = SequenceDbWsTools.obtainEntry( "AAZ45343.1" ); + if ( !entry5.getAccession().equals( "AAZ45343" ) ) { + return false; + } + if ( !entry5.getTaxonomyScientificName().equals( "Dechloromonas aromatica RCB" ) ) { + System.out.println( entry5.getTaxonomyScientificName() ); + return false; + } + if ( !entry5.getSequenceName().equals( "Dechloromonas aromatica RCB 1,4-alpha-glucan branching enzyme" ) ) { + System.out.println( entry5.getSequenceName() ); + return false; + } + if ( !entry5.getTaxonomyIdentifier().equals( "159087" ) ) { + System.out.println( entry5.getTaxonomyIdentifier() ); + return false; + } + final SequenceDatabaseEntry entry6 = SequenceDbWsTools.obtainEntry( "M30539" ); + if ( !entry6.getAccession().equals( "M30539" ) ) { + return false; + } + if ( !entry6.getGeneName().equals( "ras" ) ) { + return false; + } + if ( !entry6.getSequenceName().equals( "Human SK2 c-Ha-ras-1 oncogene-encoded protein gene, exon 1" ) ) { + return false; + } + if ( !entry6.getTaxonomyIdentifier().equals( "9606" ) ) { + return false; + } + if ( !entry6.getTaxonomyScientificName().equals( "Homo sapiens" ) ) { + return false; + } + if ( entry6.getCrossReferences().size() < 1 ) { return false; } } - catch ( final Exception e ) { - return false; - } - return true; - } - - private static boolean testEmblEntryRetrieval() { - //The format for GenBank Accession numbers are: - //Nucleotide: 1 letter + 5 numerals OR 2 letters + 6 numerals - //Protein: 3 letters + 5 numerals - //http://www.ncbi.nlm.nih.gov/Sequin/acc.html - if ( !SequenceIdParser.parseGenbankAccessor( "AY423861" ).equals( "AY423861" ) ) { - return false; - } - if ( !SequenceIdParser.parseGenbankAccessor( ".AY423861.2" ).equals( "AY423861.2" ) ) { - return false; - } - if ( !SequenceIdParser.parseGenbankAccessor( "345_.AY423861.24_345" ).equals( "AY423861.24" ) ) { - return false; - } - if ( SequenceIdParser.parseGenbankAccessor( "AAY423861" ) != null ) { - return false; - } - if ( SequenceIdParser.parseGenbankAccessor( "AY4238612" ) != null ) { - return false; - } - if ( SequenceIdParser.parseGenbankAccessor( "AAY4238612" ) != null ) { - return false; - } - if ( SequenceIdParser.parseGenbankAccessor( "Y423861" ) != null ) { - return false; - } - if ( !SequenceIdParser.parseGenbankAccessor( "S12345" ).equals( "S12345" ) ) { - return false; - } - if ( !SequenceIdParser.parseGenbankAccessor( "|S12345|" ).equals( "S12345" ) ) { - return false; - } - if ( SequenceIdParser.parseGenbankAccessor( "|S123456" ) != null ) { - return false; - } - if ( SequenceIdParser.parseGenbankAccessor( "ABC123456" ) != null ) { - return false; - } - if ( !SequenceIdParser.parseGenbankAccessor( "ABC12345" ).equals( "ABC12345" ) ) { - return false; - } - if ( !SequenceIdParser.parseGenbankAccessor( "&ABC12345&" ).equals( "ABC12345" ) ) { - return false; + catch ( final IOException e ) { + System.out.println(); + System.out.println( "the following might be due to absence internet connection:" ); + e.printStackTrace( System.out ); + return true; } - if ( SequenceIdParser.parseGenbankAccessor( "ABCD12345" ) != null ) { + catch ( final Exception e ) { + e.printStackTrace(); return false; } return true; @@ -3455,7 +4887,8 @@ public final class Test { for( final PhylogenyNodeIterator iter = t5.iteratorExternalForward(); iter.hasNext(); ) { final PhylogenyNode node = iter.next(); } - final Phylogeny t6 = factory.create( "((((((A))),(((B))),((C)),((((D)))),E)),((F)))", new NHXParser() )[ 0 ]; + final Phylogeny t6 = factory.create( "((((((A))),(((B))),((C)),((((D)))),E)),((F)))", + new NHXParser() )[ 0 ]; final PhylogenyNodeIterator iter = t6.iteratorExternalForward(); if ( !iter.next().getName().equals( "A" ) ) { return false; @@ -3491,21 +4924,274 @@ public final class Test { if ( !ParserUtils.extractScientificNameFromNodeName( "BCDO2_Mus_musculus" ).equals( "Mus musculus" ) ) { return false; } - if ( !ParserUtils.extractScientificNameFromNodeName( "BCDO2_Mus_musculus_musculus" ) + if ( !ParserUtils.extractScientificNameFromNodeName( "BCDO2 Mus musculus" ).equals( "Mus musculus" ) ) { + return false; + } + if ( !ParserUtils.extractScientificNameFromNodeName( "Mus_musculus_BCDO2" ).equals( "Mus musculus" ) ) { + return false; + } + if ( !ParserUtils.extractScientificNameFromNodeName( "Mus musculus musculus BCDO2" ) + .equals( "Mus musculus musculus" ) ) { + return false; + } + if ( !ParserUtils.extractScientificNameFromNodeName( "Mus_musculus_musculus_BCDO2" ) + .equals( "Mus musculus musculus" ) ) { + return false; + } + if ( !ParserUtils.extractScientificNameFromNodeName( "BCDO2 Mus musculus musculus" ) + .equals( "Mus musculus musculus" ) ) { + return false; + } + if ( !ParserUtils.extractScientificNameFromNodeName( "Bcl Mus musculus musculus" ) + .equals( "Mus musculus musculus" ) ) { + return false; + } + if ( ParserUtils.extractScientificNameFromNodeName( "vcl Mus musculus musculus" ) != null ) { + return false; + } + if ( !ParserUtils.extractScientificNameFromNodeName( "could_be_anything_Mus_musculus_musculus_BCDO2" ) + .equals( "Mus musculus musculus" ) ) { + return false; + } + if ( !ParserUtils.extractScientificNameFromNodeName( "could_be_anything_Mus_musculus_musculus_Musculus" ) .equals( "Mus musculus musculus" ) ) { return false; } - if ( !ParserUtils.extractScientificNameFromNodeName( "BCDO2_Mus_musculus_musculus-12" ) + if ( ParserUtils + .extractScientificNameFromNodeName( "could_be_anything_Mus_musculus_musculus_musculus" ) != null ) { + return false; + } + if ( ParserUtils.extractScientificNameFromNodeName( "musculus" ) != null ) { + return false; + } + if ( ParserUtils.extractScientificNameFromNodeName( "mus_musculus" ) != null ) { + return false; + } + if ( ParserUtils.extractScientificNameFromNodeName( "mus_musculus_musculus" ) != null ) { + return false; + } + if ( !ParserUtils.extractScientificNameFromNodeName( "Mus_musculus_musculus_1" ) .equals( "Mus musculus musculus" ) ) { return false; } - if ( !ParserUtils.extractScientificNameFromNodeName( " -XS12_Mus_musculus-12" ).equals( "Mus musculus" ) ) { + if ( !ParserUtils.extractScientificNameFromNodeName( "Mus_musculus_1" ).equals( "Mus musculus" ) ) { + return false; + } + if ( ParserUtils.extractScientificNameFromNodeName( "Mus_musculus_bcl" ) != null ) { + return false; + } + if ( !ParserUtils.extractScientificNameFromNodeName( "Mus_musculus_BCL" ).equals( "Mus musculus" ) ) { return false; } - if ( !ParserUtils.extractScientificNameFromNodeName( " -1234_Mus_musculus-12 affrre e" ) + if ( ParserUtils.extractScientificNameFromNodeName( "Mus musculus bcl" ) != null ) { + return false; + } + if ( !ParserUtils.extractScientificNameFromNodeName( "Mus musculus BCL" ).equals( "Mus musculus" ) ) { + return false; + } + if ( !ParserUtils.extractScientificNameFromNodeName( "Mus musculus xBCL" ).equals( "Mus musculus" ) ) { + return false; + } + if ( !ParserUtils.extractScientificNameFromNodeName( "Mus musculus x1" ).equals( "Mus musculus" ) ) { + return false; + } + if ( !ParserUtils.extractScientificNameFromNodeName( " -XS12_Mus_musculus_12" ).equals( "Mus musculus" ) ) { + return false; + } + if ( !ParserUtils.extractScientificNameFromNodeName( " -1234_Mus_musculus_12 affrre e" ) + .equals( "Mus musculus" ) ) { + return false; + } + if ( !ParserUtils.extractScientificNameFromNodeName( " -1234_Mus_musculus_12_affrre_e" ) .equals( "Mus musculus" ) ) { return false; } + if ( !ParserUtils.extractScientificNameFromNodeName( "Mus_musculus" ).equals( "Mus musculus" ) ) { + return false; + } + if ( !ParserUtils.extractScientificNameFromNodeName( "Mus_musculus_musculus_2bcl2" ) + .equals( "Mus musculus musculus" ) ) { + return false; + } + if ( !ParserUtils.extractScientificNameFromNodeName( "Mus_musculus_musculus_2bcl2" ) + .equals( "Mus musculus musculus" ) ) { + return false; + } + if ( !ParserUtils.extractScientificNameFromNodeName( "Mus_musculus_musculus_bcl2" ) + .equals( "Mus musculus musculus" ) ) { + return false; + } + if ( !ParserUtils.extractScientificNameFromNodeName( "Mus_musculus_123" ).equals( "Mus musculus" ) ) { + return false; + } + if ( !ParserUtils.extractScientificNameFromNodeName( "Pilostyles mexicana Mexico Breedlove 27233" ) + .equals( "Pilostyles mexicana" ) ) { + return false; + } + if ( !ParserUtils.extractScientificNameFromNodeName( "Escherichia_coli_strain_K12/DH10B" ) + .equals( "Escherichia coli strain K12/DH10B" ) ) { + return false; + } + if ( !ParserUtils.extractScientificNameFromNodeName( "Escherichia_coli_str_K12/DH10B" ) + .equals( "Escherichia coli str. K12/DH10B" ) ) { + return false; + } + if ( !ParserUtils.extractScientificNameFromNodeName( "Escherichia coli str. K12/DH10B" ) + .equals( "Escherichia coli str. K12/DH10B" ) ) { + return false; + } + if ( !ParserUtils.extractScientificNameFromNodeName( "Arabidopsis_lyrata_subsp_lyrata" ) + .equals( "Arabidopsis lyrata subsp. lyrata" ) ) { + return false; + } + if ( !ParserUtils.extractScientificNameFromNodeName( "Arabidopsis lyrata subsp. lyrata" ) + .equals( "Arabidopsis lyrata subsp. lyrata" ) ) { + return false; + } + if ( !ParserUtils.extractScientificNameFromNodeName( "Arabidopsis lyrata subsp. lyrata 395" ) + .equals( "Arabidopsis lyrata subsp. lyrata" ) ) { + return false; + } + if ( !ParserUtils.extractScientificNameFromNodeName( "Arabidopsis lyrata subsp. lyrata bcl2" ) + .equals( "Arabidopsis lyrata subsp. lyrata" ) ) { + return false; + } + if ( !ParserUtils.extractScientificNameFromNodeName( "Arabidopsis lyrata subsp lyrata bcl2" ) + .equals( "Arabidopsis lyrata subsp. lyrata" ) ) { + return false; + } + if ( !ParserUtils.extractScientificNameFromNodeName( "Arabidopsis lyrata subspecies lyrata bcl2" ) + .equals( "Arabidopsis lyrata subspecies lyrata" ) ) { + return false; + } + if ( !ParserUtils.extractScientificNameFromNodeName( "Verbascum sinuatum var. adenosepalum bcl2" ) + .equals( "Verbascum sinuatum var. adenosepalum" ) ) { + return false; + } + if ( !ParserUtils.extractScientificNameFromNodeName( "Escherichia coli (strain K12)" ) + .equals( "Escherichia coli (strain K12)" ) ) { + return false; + } + if ( !ParserUtils.extractScientificNameFromNodeName( "Escherichia coli (strain K12) bcl2" ) + .equals( "Escherichia coli (strain K12)" ) ) { + return false; + } + if ( !ParserUtils.extractScientificNameFromNodeName( "Escherichia coli (str. K12)" ) + .equals( "Escherichia coli (str. K12)" ) ) { + return false; + } + if ( !ParserUtils.extractScientificNameFromNodeName( "Escherichia coli (str K12)" ) + .equals( "Escherichia coli (str. K12)" ) ) { + return false; + } + if ( !ParserUtils.extractScientificNameFromNodeName( "Escherichia coli (str. K12) bcl2" ) + .equals( "Escherichia coli (str. K12)" ) ) { + return false; + } + if ( !ParserUtils.extractScientificNameFromNodeName( "Escherichia coli (var K12) bcl2" ) + .equals( "Escherichia coli (var. K12)" ) ) { + return false; + } + if ( !ParserUtils.extractScientificNameFromNodeName( "Escherichia coli str. K-12 substr. MG1655star" ) + .equals( "Escherichia coli str. K-12 substr. MG1655star" ) ) { + return false; + } + if ( !ParserUtils.extractScientificNameFromNodeName( "Escherichia coli str K-12 substr MG1655star" ) + .equals( "Escherichia coli str. K-12 substr. MG1655star" ) ) { + return false; + } + if ( !ParserUtils + .extractScientificNameFromNodeName( "could be anything Escherichia coli str K-12 substr MG1655star" ) + .equals( "Escherichia coli str. K-12 substr. MG1655star" ) ) { + return false; + } + if ( !ParserUtils.extractScientificNameFromNodeName( "Escherichia coli str K-12 substr MG1655star gene1" ) + .equals( "Escherichia coli str. K-12 substr. MG1655star" ) ) { + return false; + } + if ( !ParserUtils + .extractScientificNameFromNodeName( "could be anything Escherichia coli str K-12 substr MG1655star GENE1" ) + .equals( "Escherichia coli str. K-12 substr. MG1655star" ) ) { + return false; + } + if ( !ParserUtils.extractScientificNameFromNodeName( "Escherichia_coli_str_K-12_substr_MG1655star" ) + .equals( "Escherichia coli str. K-12 substr. MG1655star" ) ) { + return false; + } + if ( !ParserUtils.extractScientificNameFromNodeName( "Escherichia_coli_str_K-12_substr_MG1655star" ) + .equals( "Escherichia coli str. K-12 substr. MG1655star" ) ) { + return false; + } + if ( !ParserUtils.extractScientificNameFromNodeName( "Macrocera sp." ).equals( "Macrocera sp." ) ) { + return false; + } + if ( !ParserUtils.extractScientificNameFromNodeName( "Macrocera sp. 123" ).equals( "Macrocera sp." ) ) { + return false; + } + if ( !ParserUtils.extractScientificNameFromNodeName( "Macrocera sp. K12" ).equals( "Macrocera sp." ) ) { + return false; + } + if ( !ParserUtils.extractScientificNameFromNodeName( "something Macrocera sp. K12" ) + .equals( "Macrocera sp." ) ) { + return false; + } + if ( !ParserUtils.extractScientificNameFromNodeName( "Macrocera sp" ).equals( "Macrocera sp." ) ) { + return false; + } + if ( !ParserUtils.extractScientificNameFromNodeName( "Sesamum rigidum ssp merenskyanum 07 48" ) + .equals( "Sesamum rigidum subsp. merenskyanum" ) ) { + return false; + } + if ( !ParserUtils.extractScientificNameFromNodeName( "Sesamum rigidum ssp. merenskyanum" ) + .equals( "Sesamum rigidum subsp. merenskyanum" ) ) { + return false; + } + if ( !ParserUtils.extractScientificNameFromNodeName( "Sesamum rigidum (ssp. merenskyanum)" ) + .equals( "Sesamum rigidum (subsp. merenskyanum)" ) ) { + return false; + } + if ( !ParserUtils.extractScientificNameFromNodeName( "Sesamum rigidum (ssp merenskyanum)" ) + .equals( "Sesamum rigidum (subsp. merenskyanum)" ) ) { + return false; + } + } + catch ( final Exception e ) { + e.printStackTrace( System.out ); + return false; + } + return true; + } + + private static boolean testExtractTaxonomyDataFromNodeName() { + try { + PhylogenyNode n = new PhylogenyNode( "tr|B1AM49|B1AM49_HUMAN" ); + if ( !ParserUtils.extractTaxonomyDataFromNodeName( n, TAXONOMY_EXTRACTION.AGGRESSIVE ).equals( "HUMAN" ) ) { + return false; + } + n = new PhylogenyNode( "tr|B1AM49|B1AM49_HUMAN~1-2" ); + if ( !ParserUtils.extractTaxonomyDataFromNodeName( n, TAXONOMY_EXTRACTION.AGGRESSIVE ).equals( "HUMAN" ) ) { + return false; + } + n = new PhylogenyNode( "tr|B1AM49|HNRPR_HUMAN" ); + if ( !ParserUtils.extractTaxonomyDataFromNodeName( n, TAXONOMY_EXTRACTION.AGGRESSIVE ).equals( "HUMAN" ) ) { + return false; + } + n = new PhylogenyNode( "tr|B1AM49|HNRPR_HUMAN|" ); + if ( !ParserUtils.extractTaxonomyDataFromNodeName( n, TAXONOMY_EXTRACTION.AGGRESSIVE ).equals( "HUMAN" ) ) { + return false; + } + n = new PhylogenyNode( "tr|B1AM49|HNRPR_HUMAN~12" ); + if ( !ParserUtils.extractTaxonomyDataFromNodeName( n, TAXONOMY_EXTRACTION.AGGRESSIVE ).equals( "HUMAN" ) ) { + return false; + } + n = new PhylogenyNode( "HNRPR_HUMAN" ); + if ( !ParserUtils.extractTaxonomyDataFromNodeName( n, TAXONOMY_EXTRACTION.AGGRESSIVE ).equals( "HUMAN" ) ) { + return false; + } + n = new PhylogenyNode( "HNRPR_HUMAN_X" ); + if ( !ParserUtils.extractTaxonomyDataFromNodeName( n, TAXONOMY_EXTRACTION.AGGRESSIVE ).equals( "HUMAN" ) ) { + return false; + } } catch ( final Exception e ) { e.printStackTrace( System.out ); @@ -3516,7 +5202,8 @@ public final class Test { private static boolean testExtractTaxonomyCodeFromNodeName() { try { - if ( ParserUtils.extractTaxonomyCodeFromNodeName( "MOUSE", TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ) != null ) { + if ( ParserUtils.extractTaxonomyCodeFromNodeName( "MOUSE", + TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ) != null ) { return false; } if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "SOYBN", TAXONOMY_EXTRACTION.AGGRESSIVE ) @@ -3531,13 +5218,16 @@ public final class Test { .equals( "ARATH" ) ) { return false; } - if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "RAT", TAXONOMY_EXTRACTION.AGGRESSIVE ).equals( "RAT" ) ) { + if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "RAT", TAXONOMY_EXTRACTION.AGGRESSIVE ) + .equals( "RAT" ) ) { return false; } - if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "RAT", TAXONOMY_EXTRACTION.AGGRESSIVE ).equals( "RAT" ) ) { + if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "RAT", TAXONOMY_EXTRACTION.AGGRESSIVE ) + .equals( "RAT" ) ) { return false; } - if ( ParserUtils.extractTaxonomyCodeFromNodeName( "RAT1", TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ) != null ) { + if ( ParserUtils.extractTaxonomyCodeFromNodeName( "RAT1", + TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ) != null ) { return false; } if ( !ParserUtils.extractTaxonomyCodeFromNodeName( " _SOYBN", TAXONOMY_EXTRACTION.AGGRESSIVE ) @@ -3572,18 +5262,20 @@ public final class Test { .equals( "SOYBN" ) ) { return false; } - if ( ParserUtils.extractTaxonomyCodeFromNodeName( "xxxSOYBNxxx", TAXONOMY_EXTRACTION.AGGRESSIVE ) != null ) { + if ( ParserUtils.extractTaxonomyCodeFromNodeName( "xxxSOYBNxxx", + TAXONOMY_EXTRACTION.AGGRESSIVE ) != null ) { return false; } if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "-SOYBN~", TAXONOMY_EXTRACTION.AGGRESSIVE ) .equals( "SOYBN" ) ) { return false; } - if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "NNN8_ECOLI/1-2:0.01", - TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ).equals( "ECOLI" ) ) { + if ( !ParserUtils + .extractTaxonomyCodeFromNodeName( "NNN8_ECOLI/1-2:0.01", TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ) + .equals( "ECOLI" ) ) { return false; } - if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "blag_9YX45-blag", TAXONOMY_EXTRACTION.AGGRESSIVE ) + if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "blagg_9YX45-blag", TAXONOMY_EXTRACTION.AGGRESSIVE ) .equals( "9YX45" ) ) { return false; } @@ -3611,15 +5303,18 @@ public final class Test { return false; } if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "BCL2_RAT function = 23445", - TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ).equals( "RAT" ) ) { + TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ) + .equals( "RAT" ) ) { return false; } if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "BCL2_RAT function = 23445", - TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ).equals( "RAT" ) ) { + TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ) + .equals( "RAT" ) ) { return false; } if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "BCL2_RAT|function = 23445", - TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ).equals( "RAT" ) ) { + TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ) + .equals( "RAT" ) ) { return false; } if ( ParserUtils.extractTaxonomyCodeFromNodeName( "BCL2_RATfunction = 23445", @@ -3647,7 +5342,8 @@ public final class Test { .equals( "MOUSE" ) ) { return false; } - if ( ParserUtils.extractTaxonomyCodeFromNodeName( "_MOUSE ", TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ) != null ) { + if ( ParserUtils.extractTaxonomyCodeFromNodeName( "_MOUSE ", + TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ) != null ) { return false; } } @@ -3662,166 +5358,166 @@ public final class Test { try { PhylogenyNode n = new PhylogenyNode(); n.setName( "tr|B3RJ64" ); - if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "B3RJ64" ) ) { + if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "B3RJ64" ) ) { return false; } n.setName( "tr.B3RJ64" ); - if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "B3RJ64" ) ) { + if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "B3RJ64" ) ) { return false; } n.setName( "tr=B3RJ64" ); - if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "B3RJ64" ) ) { + if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "B3RJ64" ) ) { return false; } n.setName( "tr-B3RJ64" ); - if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "B3RJ64" ) ) { + if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "B3RJ64" ) ) { return false; } n.setName( "tr/B3RJ64" ); - if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "B3RJ64" ) ) { + if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "B3RJ64" ) ) { return false; } n.setName( "tr\\B3RJ64" ); - if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "B3RJ64" ) ) { + if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "B3RJ64" ) ) { return false; } n.setName( "tr_B3RJ64" ); - if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "B3RJ64" ) ) { + if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "B3RJ64" ) ) { return false; } n.setName( " tr|B3RJ64 " ); - if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "B3RJ64" ) ) { + if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "B3RJ64" ) ) { return false; } n.setName( "-tr|B3RJ64-" ); - if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "B3RJ64" ) ) { + if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "B3RJ64" ) ) { return false; } n.setName( "-tr=B3RJ64-" ); - if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "B3RJ64" ) ) { + if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "B3RJ64" ) ) { return false; } n.setName( "_tr=B3RJ64_" ); - if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "B3RJ64" ) ) { + if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "B3RJ64" ) ) { return false; } n.setName( " tr_tr|B3RJ64_sp|123 " ); - if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "B3RJ64" ) ) { + if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "B3RJ64" ) ) { return false; } - n.setName( "sp|B3RJ64" ); - if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "B3RJ64" ) ) { + n.setName( "B3RJ64" ); + if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "B3RJ64" ) ) { return false; } - n.setName( "ssp|B3RJ64" ); - if ( ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ) != null ) { + n.setName( "sp|B3RJ64" ); + if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "B3RJ64" ) ) { return false; } n.setName( "sp|B3RJ64C" ); - if ( ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ) != null ) { + if ( SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ) != null ) { return false; } n.setName( "sp B3RJ64" ); - if ( ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ) != null ) { + if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "B3RJ64" ) ) { return false; } n.setName( "sp|B3RJ6X" ); - if ( ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ) != null ) { + if ( SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ) != null ) { return false; } n.setName( "sp|B3RJ6" ); - if ( ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ) != null ) { + if ( SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ) != null ) { return false; } n.setName( "K1PYK7_CRAGI" ); - if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "K1PYK7_CRAGI" ) ) { + if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "K1PYK7_CRAGI" ) ) { return false; } n.setName( "K1PYK7_PEA" ); - if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "K1PYK7_PEA" ) ) { + if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "K1PYK7_PEA" ) ) { return false; } n.setName( "K1PYK7_RAT" ); - if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "K1PYK7_RAT" ) ) { + if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "K1PYK7_RAT" ) ) { return false; } n.setName( "K1PYK7_PIG" ); - if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "K1PYK7_PIG" ) ) { + if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "K1PYK7_PIG" ) ) { return false; } n.setName( "~K1PYK7_PIG~" ); - if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "K1PYK7_PIG" ) ) { + if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "K1PYK7_PIG" ) ) { return false; } n.setName( "123456_ECOLI-K1PYK7_CRAGI-sp" ); - if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "K1PYK7_CRAGI" ) ) { + if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "K1PYK7_CRAGI" ) ) { return false; } n.setName( "K1PYKX_CRAGI" ); - if ( ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ) != null ) { + if ( SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ) != null ) { return false; } n.setName( "XXXXX_CRAGI" ); - if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "XXXXX_CRAGI" ) ) { + if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "XXXXX_CRAGI" ) ) { return false; } n.setName( "tr|H3IB65|H3IB65_STRPU~2-2" ); - if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "H3IB65" ) ) { + if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "H3IB65" ) ) { return false; } n.setName( "jgi|Lacbi2|181470|Lacbi1.estExt_GeneWisePlus_human.C_10729~2-3" ); - if ( ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ) != null ) { + if ( SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ) != null ) { return false; } n.setName( "sp|Q86U06|RBM23_HUMAN~2-2" ); - if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "Q86U06" ) ) { + if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "Q86U06" ) ) { return false; } n = new PhylogenyNode(); org.forester.phylogeny.data.Sequence seq = new org.forester.phylogeny.data.Sequence(); seq.setSymbol( "K1PYK7_CRAGI" ); n.getNodeData().addSequence( seq ); - if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "K1PYK7_CRAGI" ) ) { + if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "K1PYK7_CRAGI" ) ) { return false; } seq.setSymbol( "tr|B3RJ64" ); - if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "B3RJ64" ) ) { + if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "B3RJ64" ) ) { return false; } n = new PhylogenyNode(); seq = new org.forester.phylogeny.data.Sequence(); seq.setName( "K1PYK7_CRAGI" ); n.getNodeData().addSequence( seq ); - if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "K1PYK7_CRAGI" ) ) { + if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "K1PYK7_CRAGI" ) ) { return false; } seq.setName( "tr|B3RJ64" ); - if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "B3RJ64" ) ) { + if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "B3RJ64" ) ) { return false; } n = new PhylogenyNode(); seq = new org.forester.phylogeny.data.Sequence(); seq.setAccession( new Accession( "K1PYK8_CRAGI", "?" ) ); n.getNodeData().addSequence( seq ); - if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "K1PYK8_CRAGI" ) ) { + if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "K1PYK8_CRAGI" ) ) { return false; } n = new PhylogenyNode(); seq = new org.forester.phylogeny.data.Sequence(); seq.setAccession( new Accession( "tr|B3RJ64", "?" ) ); n.getNodeData().addSequence( seq ); - if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "B3RJ64" ) ) { + if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "B3RJ64" ) ) { return false; } // n = new PhylogenyNode(); n.setName( "ACP19736" ); - if ( !ForesterUtil.extractGenbankAccessor( n ).equals( "ACP19736" ) ) { + if ( !SequenceAccessionTools.obtainGenbankAccessorFromDataFields( n ).equals( "ACP19736" ) ) { return false; } n = new PhylogenyNode(); - n.setName( "_ACP19736_" ); - if ( !ForesterUtil.extractGenbankAccessor( n ).equals( "ACP19736" ) ) { + n.setName( "|ACP19736|" ); + if ( !SequenceAccessionTools.obtainGenbankAccessorFromDataFields( n ).equals( "ACP19736" ) ) { return false; } } @@ -3834,12 +5530,22 @@ public final class Test { private static boolean testFastaParser() { try { - if ( !FastaParser.isLikelyFasta( new FileInputStream( PATH_TO_TEST_DATA + "fasta_0.fasta" ) ) ) { + final FileInputStream fis1 = new FileInputStream( PATH_TO_TEST_DATA + "fasta_0.fasta" ); + if ( !FastaParser.isLikelyFasta( fis1 ) ) { + fis1.close(); return false; } - if ( FastaParser.isLikelyFasta( new FileInputStream( PATH_TO_TEST_DATA + "msa_3.txt" ) ) ) { + else { + fis1.close(); + } + final FileInputStream fis2 = new FileInputStream( PATH_TO_TEST_DATA + "msa_3.txt" ); + if ( FastaParser.isLikelyFasta( fis2 ) ) { + fis2.close(); return false; } + else { + fis2.close(); + } final Msa msa_0 = FastaParser.parseMsa( new FileInputStream( PATH_TO_TEST_DATA + "fasta_0.fasta" ) ); if ( !msa_0.getSequenceAsString( 0 ).toString().equalsIgnoreCase( "ACGTGKXFMFDMXEXXXSFMFMF" ) ) { return false; @@ -3850,7 +5556,7 @@ public final class Test { if ( !msa_0.getSequenceAsString( 1 ).toString().equalsIgnoreCase( "DKXASDFXSFXFKFKSXDFKSLX" ) ) { return false; } - if ( !msa_0.getSequenceAsString( 2 ).toString().equalsIgnoreCase( "SXDFKSXLFSFPWEXPRXWXERR" ) ) { + if ( !msa_0.getSequenceAsString( 2 ).toString().equalsIgnoreCase( "SXDFKSXLFSFPWEXPROWXERR" ) ) { return false; } if ( !msa_0.getSequenceAsString( 3 ).toString().equalsIgnoreCase( "AAAAAAAAAAAAAAAAAAAAAAA" ) ) { @@ -3860,8 +5566,59 @@ public final class Test { return false; } } - catch ( final Exception e ) { - e.printStackTrace(); + catch ( final Exception e ) { + e.printStackTrace(); + return false; + } + return true; + } + + private static boolean testGenbankAccessorParsing() { + //The format for GenBank Accession numbers are: + //Nucleotide: 1 letter + 5 numerals OR 2 letters + 6 numerals + //Protein: 3 letters + 5 numerals + //http://www.ncbi.nlm.nih.gov/Sequin/acc.html + if ( !SequenceAccessionTools.parseGenbankAccessorFromString( "AY423861" ).equals( "AY423861" ) ) { + return false; + } + if ( !SequenceAccessionTools.parseGenbankAccessorFromString( ".AY423861.2" ).equals( "AY423861.2" ) ) { + return false; + } + if ( !SequenceAccessionTools.parseGenbankAccessorFromString( "345_.AY423861.24_345" ) + .equals( "AY423861.24" ) ) { + return false; + } + if ( SequenceAccessionTools.parseGenbankAccessorFromString( "AAY423861" ) != null ) { + return false; + } + if ( SequenceAccessionTools.parseGenbankAccessorFromString( "AY4238612" ) != null ) { + return false; + } + if ( SequenceAccessionTools.parseGenbankAccessorFromString( "AAY4238612" ) != null ) { + return false; + } + if ( SequenceAccessionTools.parseGenbankAccessorFromString( "Y423861" ) != null ) { + return false; + } + if ( !SequenceAccessionTools.parseGenbankAccessorFromString( "S12345" ).equals( "S12345" ) ) { + return false; + } + if ( !SequenceAccessionTools.parseGenbankAccessorFromString( "|S12345|" ).equals( "S12345" ) ) { + return false; + } + if ( SequenceAccessionTools.parseGenbankAccessorFromString( "|S123456" ) != null ) { + return false; + } + if ( SequenceAccessionTools.parseGenbankAccessorFromString( "ABC123456" ) != null ) { + return false; + } + if ( !SequenceAccessionTools.parseGenbankAccessorFromString( "ABC12345" ).equals( "ABC12345" ) ) { + return false; + } + if ( !SequenceAccessionTools.parseGenbankAccessorFromString( "&ABC12345&" ).equals( "ABC12345" ) ) { + return false; + } + if ( SequenceAccessionTools.parseGenbankAccessorFromString( "ABCD12345" ) != null ) { return false; } return true; @@ -4308,9 +6065,9 @@ public final class Test { if ( !rt.getName().equals( "r" ) ) { return false; } - final Phylogeny p3 = factory - .create( "((((a,(b,c)bc)abc,(d,e)de)abcde,f)abcdef,(((g,h)gh,(i,j)ij)ghij,k)ghijk,l)", - new NHXParser() )[ 0 ]; + final Phylogeny p3 = factory.create( + "((((a,(b,c)bc)abc,(d,e)de)abcde,f)abcdef,(((g,h)gh,(i,j)ij)ghij,k)ghijk,l)", + new NHXParser() )[ 0 ]; final PhylogenyNode bc_3 = PhylogenyMethods.calculateLCA( p3.getNode( "b" ), p3.getNode( "c" ) ); if ( !bc_3.getName().equals( "bc" ) ) { return false; @@ -4390,14 +6147,15 @@ public final class Test { private static boolean testGetLCA2() { try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); - final Phylogeny p_a = factory.create( "(a)", new NHXParser() )[ 0 ]; + // final Phylogeny p_a = factory.create( "(a)", new NHXParser() )[ 0 ]; + final Phylogeny p_a = NHXParser.parse( "(a)" )[ 0 ]; PhylogenyMethods.preOrderReId( p_a ); final PhylogenyNode p_a_1 = PhylogenyMethods.calculateLCAonTreeWithIdsInPreOrder( p_a.getNode( "a" ), p_a.getNode( "a" ) ); if ( !p_a_1.getName().equals( "a" ) ) { return false; } - final Phylogeny p_b = factory.create( "((a)b)", new NHXParser() )[ 0 ]; + final Phylogeny p_b = NHXParser.parse( "((a)b)" )[ 0 ]; PhylogenyMethods.preOrderReId( p_b ); final PhylogenyNode p_b_1 = PhylogenyMethods.calculateLCAonTreeWithIdsInPreOrder( p_b.getNode( "b" ), p_b.getNode( "a" ) ); @@ -4593,9 +6351,9 @@ public final class Test { if ( !rt.getName().equals( "r" ) ) { return false; } - final Phylogeny p3 = factory - .create( "((((a,(b,c)bc)abc,(d,e)de)abcde,f)abcdef,(((g,h)gh,(i,j)ij)ghij,k)ghijk,l)", - new NHXParser() )[ 0 ]; + final Phylogeny p3 = factory.create( + "((((a,(b,c)bc)abc,(d,e)de)abcde,f)abcdef,(((g,h)gh,(i,j)ij)ghij,k)ghijk,l)", + new NHXParser() )[ 0 ]; PhylogenyMethods.preOrderReId( p3 ); final PhylogenyNode bc_3 = PhylogenyMethods.calculateLCAonTreeWithIdsInPreOrder( p3.getNode( "b" ), p3.getNode( "c" ) ); @@ -4719,10 +6477,14 @@ public final class Test { final String test_dir = Test.PATH_TO_TEST_DATA; try { final HmmscanPerDomainTableParser parser1 = new HmmscanPerDomainTableParser( new File( test_dir - + ForesterUtil.getFileSeparator() + "hmmscan30b3_output_1" ), "MONBR", INDIVIDUAL_SCORE_CUTOFF.NONE ); + + ForesterUtil.getFileSeparator() + "hmmscan30b3_output_1" ), + "MONBR", + INDIVIDUAL_SCORE_CUTOFF.NONE ); parser1.parse(); final HmmscanPerDomainTableParser parser2 = new HmmscanPerDomainTableParser( new File( test_dir - + ForesterUtil.getFileSeparator() + "hmmscan30b3_output_2" ), "MONBR", INDIVIDUAL_SCORE_CUTOFF.NONE ); + + ForesterUtil.getFileSeparator() + "hmmscan30b3_output_2" ), + "MONBR", + INDIVIDUAL_SCORE_CUTOFF.NONE ); final List proteins = parser2.parse(); if ( parser2.getProteinsEncountered() != 4 ) { return false; @@ -4736,7 +6498,10 @@ public final class Test { if ( parser2.getDomainsIgnoredDueToDuf() != 0 ) { return false; } - if ( parser2.getDomainsIgnoredDueToEval() != 0 ) { + if ( parser2.getDomainsIgnoredDueToFsEval() != 0 ) { + return false; + } + if ( parser2.getDomainsIgnoredDueToIEval() != 0 ) { return false; } final Protein p1 = proteins.get( 0 ); @@ -4776,12 +6541,6 @@ public final class Test { if ( !Test.isEqual( p4.getProteinDomain( 0 ).getPerDomainScore(), 135.7 ) ) { return false; } - if ( !Test.isEqual( p4.getProteinDomain( 0 ).getPerSequenceEvalue(), 8.3e-40 ) ) { - return false; - } - if ( !Test.isEqual( p4.getProteinDomain( 0 ).getPerSequenceScore(), 136.3 ) ) { - return false; - } if ( !Test.isEqual( p4.getProteinDomain( 0 ).getNumber(), 1 ) ) { return false; } @@ -5090,11 +6849,11 @@ public final class Test { private static boolean testMsaQualityMethod() { try { - final Sequence s0 = BasicSequence.createAaSequence( "a", "ABAXEFGHIJ" ); - final Sequence s1 = BasicSequence.createAaSequence( "b", "ABBXEFGHIJ" ); - final Sequence s2 = BasicSequence.createAaSequence( "c", "AXCXEFGHIJ" ); - final Sequence s3 = BasicSequence.createAaSequence( "d", "AXDDEFGHIJ" ); - final List l = new ArrayList(); + final MolecularSequence s0 = BasicSequence.createAaSequence( "a", "ABAXEFGHIJJE-" ); + final MolecularSequence s1 = BasicSequence.createAaSequence( "b", "ABBXEFGHIJJBB" ); + final MolecularSequence s2 = BasicSequence.createAaSequence( "c", "AXCXEFGHIJJ--" ); + final MolecularSequence s3 = BasicSequence.createAaSequence( "d", "AXDDEFGHIJ---" ); + final List l = new ArrayList(); l.add( s0 ); l.add( s1 ); l.add( s2 ); @@ -5112,6 +6871,238 @@ public final class Test { if ( !isEqual( 0.75, MsaMethods.calculateIdentityRatio( msa, 3 ) ) ) { return false; } + if ( !isEqual( 0.75, MsaMethods.calculateIdentityRatio( msa, 10 ) ) ) { + return false; + } + if ( !isEqual( 0.25, MsaMethods.calculateIdentityRatio( msa, 11 ) ) ) { + return false; + } + if ( !isEqual( 0.25, MsaMethods.calculateIdentityRatio( msa, 12 ) ) ) { + return false; + } + } + catch ( final Exception e ) { + e.printStackTrace( System.out ); + return false; + } + return true; + } + + private static boolean testMsaEntropy() { + try { + final MolecularSequence s0 = BasicSequence.createAaSequence( "a", "AAAAAAA" ); + final MolecularSequence s1 = BasicSequence.createAaSequence( "b", "AAAIACC" ); + final MolecularSequence s2 = BasicSequence.createAaSequence( "c", "AAIIIIF" ); + final MolecularSequence s3 = BasicSequence.createAaSequence( "d", "AIIIVVW" ); + final List l = new ArrayList(); + l.add( s0 ); + l.add( s1 ); + l.add( s2 ); + l.add( s3 ); + final Msa msa = BasicMsa.createInstance( l ); + //TODO need to DO the tests!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + //FIXME + // System.out.println( MsaMethods.calcNormalizedShannonsEntropy( 20, msa, 0 ) ); + // System.out.println( MsaMethods.calcNormalizedShannonsEntropy( 20, msa, 1 ) ); + // System.out.println( MsaMethods.calcNormalizedShannonsEntropy( 20, msa, 2 ) ); + // System.out.println( MsaMethods.calcNormalizedShannonsEntropy( 20, msa, 3 ) ); + // System.out.println( MsaMethods.calcNormalizedShannonsEntropy( 20, msa, 4 ) ); + // System.out.println( MsaMethods.calcNormalizedShannonsEntropy( 20, msa, 5 ) ); + // System.out.println( MsaMethods.calcNormalizedShannonsEntropy( 20, msa, 6 ) ); + // System.out.println(); + // System.out.println( MsaMethods.calcNormalizedShannonsEntropy( 6, msa, 0 ) ); + // System.out.println( MsaMethods.calcNormalizedShannonsEntropy( 6, msa, 1 ) ); + // System.out.println( MsaMethods.calcNormalizedShannonsEntropy( 6, msa, 2 ) ); + // System.out.println( MsaMethods.calcNormalizedShannonsEntropy( 6, msa, 3 ) ); + // System.out.println( MsaMethods.calcNormalizedShannonsEntropy( 6, msa, 4 ) ); + // System.out.println( MsaMethods.calcNormalizedShannonsEntropy( 6, msa, 5 ) ); + // System.out.println( MsaMethods.calcNormalizedShannonsEntropy( 6, msa, 6 ) ); + final List l2 = new ArrayList(); + l2.add( BasicSequence.createAaSequence( "1", "AAAAAAA" ) ); + l2.add( BasicSequence.createAaSequence( "2", "AAAIACC" ) ); + l2.add( BasicSequence.createAaSequence( "3", "AAIIIIF" ) ); + l2.add( BasicSequence.createAaSequence( "4", "AIIIVVW" ) ); + l2.add( BasicSequence.createAaSequence( "5", "AAAAAAA" ) ); + l2.add( BasicSequence.createAaSequence( "6", "AAAIACC" ) ); + l2.add( BasicSequence.createAaSequence( "7", "AAIIIIF" ) ); + l2.add( BasicSequence.createAaSequence( "8", "AIIIVVW" ) ); + l2.add( BasicSequence.createAaSequence( "9", "AAAAAAA" ) ); + l2.add( BasicSequence.createAaSequence( "10", "AAAIACC" ) ); + l2.add( BasicSequence.createAaSequence( "11", "AAIIIIF" ) ); + l2.add( BasicSequence.createAaSequence( "12", "AIIIVVW" ) ); + l2.add( BasicSequence.createAaSequence( "13", "AAIIIIF" ) ); + l2.add( BasicSequence.createAaSequence( "14", "AIIIVVW" ) ); + l2.add( BasicSequence.createAaSequence( "15", "AAAAAAA" ) ); + l2.add( BasicSequence.createAaSequence( "16", "AAAIACC" ) ); + l2.add( BasicSequence.createAaSequence( "17", "AAIIIIF" ) ); + l2.add( BasicSequence.createAaSequence( "18", "AIIIVVW" ) ); + l2.add( BasicSequence.createAaSequence( "19", "AAAAAAA" ) ); + l2.add( BasicSequence.createAaSequence( "20", "AAAIACC" ) ); + l2.add( BasicSequence.createAaSequence( "21", "AAIIIIF" ) ); + l2.add( BasicSequence.createAaSequence( "22", "AIIIVVW" ) ); + final Msa msa2 = BasicMsa.createInstance( l2 ); + // System.out.println(); + // System.out.println( MsaMethods.calcNormalizedShannonsEntropy( 20, msa2, 0 ) ); + // System.out.println( MsaMethods.calcNormalizedShannonsEntropy( 20, msa2, 1 ) ); + // System.out.println( MsaMethods.calcNormalizedShannonsEntropy( 20, msa2, 2 ) ); + } + catch ( final Exception e ) { + e.printStackTrace( System.out ); + return false; + } + return true; + } + + private static boolean testDeleteableMsa() { + try { + final MolecularSequence s0 = BasicSequence.createAaSequence( "a", "AAAA" ); + final MolecularSequence s1 = BasicSequence.createAaSequence( "b", "BAAA" ); + final MolecularSequence s2 = BasicSequence.createAaSequence( "c", "CAAA" ); + final MolecularSequence s3 = BasicSequence.createAaSequence( "d", "DAAA" ); + final MolecularSequence s4 = BasicSequence.createAaSequence( "e", "EAAA" ); + final MolecularSequence s5 = BasicSequence.createAaSequence( "f", "FAAA" ); + final List l0 = new ArrayList(); + l0.add( s0 ); + l0.add( s1 ); + l0.add( s2 ); + l0.add( s3 ); + l0.add( s4 ); + l0.add( s5 ); + final DeleteableMsa dmsa0 = DeleteableMsa.createInstance( l0 ); + dmsa0.deleteRow( "b", false ); + if ( !dmsa0.getIdentifier( 1 ).equals( "c" ) ) { + return false; + } + dmsa0.deleteRow( "e", false ); + dmsa0.deleteRow( "a", false ); + dmsa0.deleteRow( "f", false ); + if ( dmsa0.getLength() != 4 ) { + return false; + } + if ( dmsa0.getNumberOfSequences() != 2 ) { + return false; + } + if ( !dmsa0.getIdentifier( 0 ).equals( "c" ) ) { + return false; + } + if ( !dmsa0.getIdentifier( 1 ).equals( "d" ) ) { + return false; + } + if ( dmsa0.getResidueAt( 0, 0 ) != 'C' ) { + return false; + } + if ( !dmsa0.getSequenceAsString( 0 ).toString().equals( "CAAA" ) ) { + return false; + } + if ( dmsa0.getColumnAt( 0 ).size() != 2 ) { + return false; + } + dmsa0.deleteRow( "c", false ); + dmsa0.deleteRow( "d", false ); + if ( dmsa0.getNumberOfSequences() != 0 ) { + return false; + } + // + final MolecularSequence s_0 = BasicSequence.createAaSequence( "a", "--A---B-C--X----" ); + final MolecularSequence s_1 = BasicSequence.createAaSequence( "b", "--B-----C-------" ); + final MolecularSequence s_2 = BasicSequence.createAaSequence( "c", "--C--AB-C------Z" ); + final MolecularSequence s_3 = BasicSequence.createAaSequence( "d", "--D--AA-C-------" ); + final MolecularSequence s_4 = BasicSequence.createAaSequence( "e", "--E--AA-C-------" ); + final MolecularSequence s_5 = BasicSequence.createAaSequence( "f", "--F--AB-CD--Y---" ); + final List l1 = new ArrayList(); + l1.add( s_0 ); + l1.add( s_1 ); + l1.add( s_2 ); + l1.add( s_3 ); + l1.add( s_4 ); + l1.add( s_5 ); + final DeleteableMsa dmsa1 = DeleteableMsa.createInstance( l1 ); + dmsa1.deleteGapOnlyColumns(); + dmsa1.deleteRow( "a", false ); + dmsa1.deleteRow( "f", false ); + dmsa1.deleteRow( "d", false ); + dmsa1.deleteGapOnlyColumns(); + if ( !dmsa1.getSequenceAsString( 0 ).toString().equals( "B--C-" ) ) { + return false; + } + if ( !dmsa1.getSequenceAsString( 1 ).toString().equals( "CABCZ" ) ) { + return false; + } + if ( !dmsa1.getSequenceAsString( 2 ).toString().equals( "EAAC-" ) ) { + return false; + } + dmsa1.deleteRow( "c", false ); + dmsa1.deleteGapOnlyColumns(); + final Writer w0 = new StringWriter(); + dmsa1.write( w0, MSA_FORMAT.FASTA ); + final Writer w1 = new StringWriter(); + dmsa1.write( w1, MSA_FORMAT.PHYLIP ); + if ( !dmsa1.getSequenceAsString( 0 ).toString().equals( "B--C" ) ) { + return false; + } + if ( !dmsa1.getSequenceAsString( 1 ).toString().equals( "EAAC" ) ) { + return false; + } + final MolecularSequence s__0 = BasicSequence.createAaSequence( "a", "A------" ); + final MolecularSequence s__1 = BasicSequence.createAaSequence( "b", "BB-----" ); + final MolecularSequence s__2 = BasicSequence.createAaSequence( "c", "CCC----" ); + final MolecularSequence s__3 = BasicSequence.createAaSequence( "d", "DDDD---" ); + final MolecularSequence s__4 = BasicSequence.createAaSequence( "e", "EEEEE--" ); + final MolecularSequence s__5 = BasicSequence.createAaSequence( "f", "FFFFFF-" ); + final List l2 = new ArrayList(); + l2.add( s__0 ); + l2.add( s__1 ); + l2.add( s__2 ); + l2.add( s__3 ); + l2.add( s__4 ); + l2.add( s__5 ); + final DeleteableMsa dmsa2 = DeleteableMsa.createInstance( l2 ); + dmsa2.deleteGapColumns( 0.5 ); + if ( !dmsa2.getSequenceAsString( 0 ).toString().equals( "A---" ) ) { + return false; + } + if ( !dmsa2.getSequenceAsString( 1 ).toString().equals( "BB--" ) ) { + return false; + } + if ( !dmsa2.getSequenceAsString( 2 ).toString().equals( "CCC-" ) ) { + return false; + } + dmsa2.deleteGapColumns( 0.2 ); + if ( !dmsa2.getSequenceAsString( 0 ).toString().equals( "A-" ) ) { + return false; + } + if ( !dmsa2.getSequenceAsString( 1 ).toString().equals( "BB" ) ) { + return false; + } + if ( !dmsa2.getSequenceAsString( 2 ).toString().equals( "CC" ) ) { + return false; + } + dmsa2.deleteGapColumns( 0 ); + dmsa2.deleteRow( "a", false ); + dmsa2.deleteRow( "b", false ); + dmsa2.deleteRow( "f", false ); + dmsa2.deleteRow( "e", false ); + dmsa2.setIdentifier( 0, "new_c" ); + dmsa2.setIdentifier( 1, "new_d" ); + dmsa2.setResidueAt( 0, 0, 'x' ); + final MolecularSequence s = dmsa2.deleteRow( "new_d", true ); + if ( !s.getMolecularSequenceAsString().equals( "D" ) ) { + return false; + } + final Writer w = new StringWriter(); + dmsa2.write( w, MSA_FORMAT.PHYLIP ); + final String phylip = w.toString(); + if ( !phylip.equals( "1 1" + ForesterUtil.LINE_SEPARATOR + "new_c x" + ForesterUtil.LINE_SEPARATOR ) ) { + System.out.println( phylip ); + return false; + } + final Writer w2 = new StringWriter(); + dmsa2.write( w2, MSA_FORMAT.FASTA ); + final String fasta = w2.toString(); + if ( !fasta.equals( ">new_c" + ForesterUtil.LINE_SEPARATOR + "x" + ForesterUtil.LINE_SEPARATOR ) ) { + System.out.println( fasta ); + return false; + } } catch ( final Exception e ) { e.printStackTrace( System.out ); @@ -5126,7 +7117,7 @@ public final class Test { PhylogenyNode n; List ext = new ArrayList(); final StringBuffer sb0 = new StringBuffer( "((a,b)ab,(((c,d)cd,e)cde,(f,(g,h))fgh)cdefgh)abcdefgh" ); - final Phylogeny t0 = factory.create( sb0, new NHXParser() )[ 0 ]; + final Phylogeny t0 = factory.create( sb0.toString(), new NHXParser() )[ 0 ]; t0.getNode( "cd" ).setCollapse( true ); t0.getNode( "cde" ).setCollapse( true ); n = t0.getFirstExternalNode(); @@ -5154,7 +7145,7 @@ public final class Test { } ext.clear(); final StringBuffer sb1 = new StringBuffer( "((a,b)ab,(((c,d)cd,e)cde,(f,(g,h))fgh)cdefgh)abcdefgh" ); - final Phylogeny t1 = factory.create( sb1, new NHXParser() )[ 0 ]; + final Phylogeny t1 = factory.create( sb1.toString(), new NHXParser() )[ 0 ]; t1.getNode( "ab" ).setCollapse( true ); t1.getNode( "cd" ).setCollapse( true ); t1.getNode( "cde" ).setCollapse( true ); @@ -5179,11 +7170,9 @@ public final class Test { if ( !ext.get( 4 ).getName().equals( "h" ) ) { return false; } - // - // ext.clear(); final StringBuffer sb2 = new StringBuffer( "((a,b)ab,(((c,d)cd,e)cde,(f,(g,h)gh)fgh)cdefgh)abcdefgh" ); - final Phylogeny t2 = factory.create( sb2, new NHXParser() )[ 0 ]; + final Phylogeny t2 = factory.create( sb2.toString(), new NHXParser() )[ 0 ]; t2.getNode( "ab" ).setCollapse( true ); t2.getNode( "cd" ).setCollapse( true ); t2.getNode( "cde" ).setCollapse( true ); @@ -5209,11 +7198,9 @@ public final class Test { if ( !ext.get( 3 ).getName().equals( "gh" ) ) { return false; } - // - // ext.clear(); final StringBuffer sb3 = new StringBuffer( "((a,b)ab,(((c,d)cd,e)cde,(f,(g,h)gh)fgh)cdefgh)abcdefgh" ); - final Phylogeny t3 = factory.create( sb3, new NHXParser() )[ 0 ]; + final Phylogeny t3 = factory.create( sb3.toString(), new NHXParser() )[ 0 ]; t3.getNode( "ab" ).setCollapse( true ); t3.getNode( "cd" ).setCollapse( true ); t3.getNode( "cde" ).setCollapse( true ); @@ -5237,11 +7224,9 @@ public final class Test { if ( !ext.get( 2 ).getName().equals( "fgh" ) ) { return false; } - // - // ext.clear(); final StringBuffer sb4 = new StringBuffer( "((a,b)ab,(((c,d)cd,e)cde,(f,(g,h)gh)fgh)cdefgh)abcdefgh" ); - final Phylogeny t4 = factory.create( sb4, new NHXParser() )[ 0 ]; + final Phylogeny t4 = factory.create( sb4.toString(), new NHXParser() )[ 0 ]; t4.getNode( "ab" ).setCollapse( true ); t4.getNode( "cd" ).setCollapse( true ); t4.getNode( "cde" ).setCollapse( true ); @@ -5255,10 +7240,8 @@ public final class Test { if ( n.getNextExternalNodeWhileTakingIntoAccountCollapsedNodes() != null ) { return false; } - // - // final StringBuffer sb5 = new StringBuffer( "((a,b)ab,(((c,d)cd,e)cde,(f,(g,h))fgh)cdefgh)abcdefgh" ); - final Phylogeny t5 = factory.create( sb5, new NHXParser() )[ 0 ]; + final Phylogeny t5 = factory.create( sb5.toString(), new NHXParser() )[ 0 ]; ext.clear(); n = t5.getFirstExternalNode(); while ( n != null ) { @@ -5292,10 +7275,8 @@ public final class Test { if ( !ext.get( 7 ).getName().equals( "h" ) ) { return false; } - // - // final StringBuffer sb6 = new StringBuffer( "((a,b)ab,(((c,d)cd,e)cde,(f,(g,h))fgh)cdefgh)abcdefgh" ); - final Phylogeny t6 = factory.create( sb6, new NHXParser() )[ 0 ]; + final Phylogeny t6 = factory.create( sb6.toString(), new NHXParser() )[ 0 ]; ext.clear(); t6.getNode( "ab" ).setCollapse( true ); n = t6.getNode( "ab" ); @@ -5327,10 +7308,8 @@ public final class Test { if ( !ext.get( 6 ).getName().equals( "h" ) ) { return false; } - // - // final StringBuffer sb7 = new StringBuffer( "((a,b)ab,(((c,d)cd,e)cde,(f,(g,h))fgh)cdefgh)abcdefgh" ); - final Phylogeny t7 = factory.create( sb7, new NHXParser() )[ 0 ]; + final Phylogeny t7 = factory.create( sb7.toString(), new NHXParser() )[ 0 ]; ext.clear(); t7.getNode( "cd" ).setCollapse( true ); n = t7.getNode( "a" ); @@ -5362,10 +7341,8 @@ public final class Test { if ( !ext.get( 6 ).getName().equals( "h" ) ) { return false; } - // - // final StringBuffer sb8 = new StringBuffer( "((a,b)ab,(((c,d)cd,e)cde,(f,(g,h))fgh)cdefgh)abcdefgh" ); - final Phylogeny t8 = factory.create( sb8, new NHXParser() )[ 0 ]; + final Phylogeny t8 = factory.create( sb8.toString(), new NHXParser() )[ 0 ]; ext.clear(); t8.getNode( "cd" ).setCollapse( true ); t8.getNode( "c" ).setCollapse( true ); @@ -5400,10 +7377,8 @@ public final class Test { if ( !ext.get( 6 ).getName().equals( "h" ) ) { return false; } - // - // final StringBuffer sb9 = new StringBuffer( "((a,b)ab,(((c,d)cd,e)cde,(f,(g,h)gh)fgh)cdefgh)abcdefgh" ); - final Phylogeny t9 = factory.create( sb9, new NHXParser() )[ 0 ]; + final Phylogeny t9 = factory.create( sb9.toString(), new NHXParser() )[ 0 ]; ext.clear(); t9.getNode( "gh" ).setCollapse( true ); n = t9.getNode( "a" ); @@ -5435,10 +7410,8 @@ public final class Test { if ( !ext.get( 6 ).getName().equals( "gh" ) ) { return false; } - // - // final StringBuffer sb10 = new StringBuffer( "((a,b)ab,(((c,d)cd,e)cde,(f,(g,h)gh)fgh)cdefgh)abcdefgh" ); - final Phylogeny t10 = factory.create( sb10, new NHXParser() )[ 0 ]; + final Phylogeny t10 = factory.create( sb10.toString(), new NHXParser() )[ 0 ]; ext.clear(); t10.getNode( "gh" ).setCollapse( true ); t10.getNode( "g" ).setCollapse( true ); @@ -5472,10 +7445,8 @@ public final class Test { if ( !ext.get( 6 ).getName().equals( "gh" ) ) { return false; } - // - // final StringBuffer sb11 = new StringBuffer( "((a,b)ab,(((c,d)cd,e)cde,(f,(g,h)gh)fgh)cdefgh)abcdefgh" ); - final Phylogeny t11 = factory.create( sb11, new NHXParser() )[ 0 ]; + final Phylogeny t11 = factory.create( sb11.toString(), new NHXParser() )[ 0 ]; ext.clear(); t11.getNode( "gh" ).setCollapse( true ); t11.getNode( "fgh" ).setCollapse( true ); @@ -5505,10 +7476,8 @@ public final class Test { if ( !ext.get( 5 ).getName().equals( "fgh" ) ) { return false; } - // - // final StringBuffer sb12 = new StringBuffer( "((a,b)ab,(((c,d)cd,e)cde,(f,(g,h)gh)fgh)cdefgh)abcdefgh" ); - final Phylogeny t12 = factory.create( sb12, new NHXParser() )[ 0 ]; + final Phylogeny t12 = factory.create( sb12.toString(), new NHXParser() )[ 0 ]; ext.clear(); t12.getNode( "gh" ).setCollapse( true ); t12.getNode( "fgh" ).setCollapse( true ); @@ -5541,10 +7510,8 @@ public final class Test { if ( !ext.get( 5 ).getName().equals( "fgh" ) ) { return false; } - // - // final StringBuffer sb13 = new StringBuffer( "((a,b)ab,(((c,d)cd,e)cde,(f,(g,h)gh)fgh)cdefgh)abcdefgh" ); - final Phylogeny t13 = factory.create( sb13, new NHXParser() )[ 0 ]; + final Phylogeny t13 = factory.create( sb13.toString(), new NHXParser() )[ 0 ]; ext.clear(); t13.getNode( "ab" ).setCollapse( true ); t13.getNode( "b" ).setCollapse( true ); @@ -5573,10 +7540,8 @@ public final class Test { if ( !ext.get( 4 ).getName().equals( "fgh" ) ) { return false; } - // - // final StringBuffer sb14 = new StringBuffer( "((a,b,0)ab,(((c,d)cd,e)cde,(f,(g,h,1,2)gh,0)fgh)cdefgh)abcdefgh" ); - final Phylogeny t14 = factory.create( sb14, new NHXParser() )[ 0 ]; + final Phylogeny t14 = factory.create( sb14.toString(), new NHXParser() )[ 0 ]; ext.clear(); t14.getNode( "ab" ).setCollapse( true ); t14.getNode( "a" ).setCollapse( true ); @@ -5605,10 +7570,8 @@ public final class Test { if ( !ext.get( 4 ).getName().equals( "fgh" ) ) { return false; } - // - // final StringBuffer sb15 = new StringBuffer( "((a,b,0)ab,(((c,d)cd,e)cde,x,(f,(g,h,1,2)gh,0)fgh)cdefgh)abcdefgh" ); - final Phylogeny t15 = factory.create( sb15, new NHXParser() )[ 0 ]; + final Phylogeny t15 = factory.create( sb15.toString(), new NHXParser() )[ 0 ]; ext.clear(); t15.getNode( "ab" ).setCollapse( true ); t15.getNode( "a" ).setCollapse( true ); @@ -5643,7 +7606,7 @@ public final class Test { // // final StringBuffer sb16 = new StringBuffer( "((a,b,0)ab,(((c,d)cd,e)cde,x,(f,(g,h,1,2)gh,0)fgh)cdefgh)abcdefgh" ); - final Phylogeny t16 = factory.create( sb16, new NHXParser() )[ 0 ]; + final Phylogeny t16 = factory.create( sb16.toString(), new NHXParser() )[ 0 ]; ext.clear(); t16.getNode( "ab" ).setCollapse( true ); t16.getNode( "a" ).setCollapse( true ); @@ -6002,6 +7965,35 @@ public final class Test { if ( phylogenies[ 17 ].getNumberOfExternalNodes() != 10 ) { return false; } + final NexusPhylogeniesParser p2 = new NexusPhylogeniesParser(); + phylogenies = null; + phylogenies = factory.create( Test.PATH_TO_TEST_DATA + "S15613.nex", p2 ); + if ( phylogenies.length != 9 ) { + return false; + } + if ( !isEqual( 0.48039661496919533, + phylogenies[ 0 ].getNode( "Diadocidia_spinosula" ).getDistanceToParent() ) ) { + return false; + } + if ( !isEqual( 0.3959796191512233, + phylogenies[ 0 ].getNode( "Diadocidia_stanfordensis" ).getDistanceToParent() ) ) { + return false; + } + if ( !phylogenies[ 0 ].getName().equals( "Family Diadocidiidae MLT (Imported_tree_0)" ) ) { + return false; + } + if ( !phylogenies[ 1 ].getName().equals( "Family Diadocidiidae BAT (con_50_majrule)" ) ) { + return false; + } + if ( !phylogenies[ 2 ].getName().equals( "Family Diadocidiidae BAT (con_50_majrule)" ) ) { + return false; + } + if ( !isEqual( 0.065284, phylogenies[ 7 ].getNode( "Bradysia_amoena" ).getDistanceToParent() ) ) { + return false; + } + if ( !isEqual( 0.065284, phylogenies[ 8 ].getNode( "Bradysia_amoena" ).getDistanceToParent() ) ) { + return false; + } } catch ( final Exception e ) { e.printStackTrace( System.out ); @@ -6034,7 +8026,6 @@ public final class Test { if ( phy != null ) { return false; } - // p.reset(); if ( !p.hasNext() ) { return false; @@ -6056,7 +8047,6 @@ public final class Test { if ( phy != null ) { return false; } - //// p.setSource( Test.PATH_TO_TEST_DATA + "nexus_test_2.nex" ); if ( !p.hasNext() ) { return false; @@ -6078,7 +8068,6 @@ public final class Test { if ( phy != null ) { return false; } - // p.reset(); if ( !p.hasNext() ) { return false; @@ -6100,7 +8089,6 @@ public final class Test { if ( phy != null ) { return false; } - //// p.setSource( Test.PATH_TO_TEST_DATA + "nexus_test_3.nex" ); if ( !p.hasNext() ) { return false; @@ -6147,15 +8135,12 @@ public final class Test { if ( phy != null ) { return false; } - //// + // p.setSource( Test.PATH_TO_TEST_DATA + "nexus_test_4_1.nex" ); - // if ( phylogenies.length != 18 ) { - // return false; - // } - //0 if ( !p.hasNext() ) { return false; } + //0 phy = p.next(); if ( phy == null ) { return false; @@ -6189,6 +8174,7 @@ public final class Test { return false; } if ( phy.getNumberOfExternalNodes() != 3 ) { + System.out.println( phy.toString() ); return false; } if ( !phy.getName().equals( "" ) ) { @@ -6385,8 +8371,7 @@ public final class Test { System.out.println( phy.getNumberOfExternalNodes() ); return false; } - if ( !phy - .toNewHampshire() + if ( !phy.toNewHampshire() .equals( "(1:0.212481,8:0.297838,(9:0.222729,((6:0.201563,7:0.194547):0.282035,(4:1.146091,(3:1.008881,(10:0.384105,(2:0.235682,5:0.353432):0.32368):0.103875):0.41354):0.254687):0.095341):0.079254):0.0;" ) ) { System.out.println( phy.toNewHampshire() ); return false; @@ -6406,8 +8391,7 @@ public final class Test { System.out.println( phy.getNumberOfExternalNodes() ); return false; } - if ( !phy - .toNewHampshire() + if ( !phy.toNewHampshire() .equals( "(1:0.212481,8:0.297838,(9:0.222729,((6:0.201563,7:0.194547):0.282035,(4:1.146091,(3:1.008881,(10:0.384105,(2:0.235682,5:0.353432):0.32368):0.103875):0.41354):0.254687):0.095341):0.079254):0.0;" ) ) { System.out.println( phy.toNewHampshire() ); return false; @@ -6427,8 +8411,7 @@ public final class Test { System.out.println( phy.getNumberOfExternalNodes() ); return false; } - if ( !phy - .toNewHampshire() + if ( !phy.toNewHampshire() .equals( "(1:0.212481,8:0.297838,(9:0.222729,((6:0.201563,7:0.194547):0.282035,(4:1.146091,(3:1.008881,(10:0.384105,(2:0.235682,5:0.353432):0.32368):0.103875):0.41354):0.254687):0.095341):0.079254):0.0;" ) ) { System.out.println( phy.toNewHampshire() ); return false; @@ -6448,8 +8431,7 @@ public final class Test { System.out.println( phy.getNumberOfExternalNodes() ); return false; } - if ( !phy - .toNewHampshire() + if ( !phy.toNewHampshire() .equals( "(1:0.212481,8:0.297838,(9:0.222729,((6:0.201563,7:0.194547):0.282035,(4:1.146091,(3:1.008881,(10:0.384105,(2:0.235682,5:0.353432):0.32368):0.103875):0.41354):0.254687):0.095341):0.079254):0.0;" ) ) { System.out.println( phy.toNewHampshire() ); return false; @@ -6566,6 +8548,82 @@ public final class Test { if ( phy.isRooted() ) { return false; } + // + final NexusPhylogeniesParser p2 = new NexusPhylogeniesParser(); + p2.setSource( Test.PATH_TO_TEST_DATA + "S15613.nex" ); + // 0 + if ( !p2.hasNext() ) { + return false; + } + phy = p2.next(); + if ( !isEqual( 0.48039661496919533, phy.getNode( "Diadocidia_spinosula" ).getDistanceToParent() ) ) { + return false; + } + if ( !isEqual( 0.3959796191512233, phy.getNode( "Diadocidia_stanfordensis" ).getDistanceToParent() ) ) { + return false; + } + // 1 + if ( !p2.hasNext() ) { + return false; + } + phy = p2.next(); + // 2 + if ( !p2.hasNext() ) { + return false; + } + phy = p2.next(); + // 3 + if ( !p2.hasNext() ) { + return false; + } + phy = p2.next(); + // 4 + if ( !p2.hasNext() ) { + return false; + } + phy = p2.next(); + // 5 + if ( !p2.hasNext() ) { + return false; + } + phy = p2.next(); + // 6 + if ( !p2.hasNext() ) { + return false; + } + phy = p2.next(); + // 7 + if ( !p2.hasNext() ) { + return false; + } + phy = p2.next(); + // 8 + if ( !p2.hasNext() ) { + return false; + } + phy = p2.next(); + if ( !isEqual( 0.065284, phy.getNode( "Bradysia_amoena" ).getDistanceToParent() ) ) { + return false; + } + if ( p2.hasNext() ) { + return false; + } + phy = p2.next(); + if ( phy != null ) { + return false; + } + // 0 + p2.reset(); + if ( !p2.hasNext() ) { + return false; + } + phy = p2.next(); + if ( !isEqual( 0.48039661496919533, phy.getNode( "Diadocidia_spinosula" ).getDistanceToParent() ) ) { + return false; + } + if ( !isEqual( 0.3959796191512233, phy.getNode( "Diadocidia_stanfordensis" ).getDistanceToParent() ) ) { + return false; + } } catch ( final Exception e ) { e.printStackTrace( System.out ); @@ -6722,6 +8780,14 @@ public final class Test { .equals( "Aranaeus" ) ) { return false; } + phylogenies = factory.create( Test.PATH_TO_TEST_DATA + "S14117.nex", parser ); + if ( phylogenies.length != 3 ) { + return false; + } + if ( !isEqual( phylogenies[ 2 ].getNode( "Aloysia lycioides 251-76-02169" ).getDistanceToParent(), + 0.00100049 ) ) { + return false; + } } catch ( final Exception e ) { e.printStackTrace( System.out ); @@ -6741,25 +8807,25 @@ public final class Test { nhxp.setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.NO ); nhxp.setReplaceUnderscores( true ); final Phylogeny uc0 = factory.create( "(A__A_,_B_B)", nhxp )[ 0 ]; - if ( !uc0.getRoot().getChildNode( 0 ).getName().equals( "A A " ) ) { + if ( !uc0.getRoot().getChildNode( 0 ).getName().equals( "A A" ) ) { return false; } - if ( !uc0.getRoot().getChildNode( 1 ).getName().equals( " B B" ) ) { + if ( !uc0.getRoot().getChildNode( 1 ).getName().equals( "B B" ) ) { return false; } - final Phylogeny p1b = factory - .create( " \n \t \b \r \f ; ( \n \t \b \r \f; A ; \n \t \b \r \f, \n \t \b \r \f; B ; \n \t \b \r \f 1 \n \t \b \r \f ; \n \t \b \r \f );;;;; \n \t \b \r \f;;; \n \t \b \r \f ", - new NHXParser() )[ 0 ]; + final Phylogeny p1b = factory.create( + " \n \t \b \r \f ; ( \n \t \b \r \f; A ; \n \t \b \r \f, \n \t \b \r \f; B ; \n \t \b \r \f 1 \n \t \b \r \f ; \n \t \b \r \f );;;;; \n \t \b \r \f;;; \n \t \b \r \f ", + new NHXParser() )[ 0 ]; if ( !p1b.toNewHampshireX().equals( "(';A;',';B;1;')" ) ) { return false; } if ( !p1b.toNewHampshire().equals( "(';A;',';B;1;');" ) ) { return false; } - final Phylogeny p2 = factory.create( new StringBuffer( "(A,B2)" ), new NHXParser() )[ 0 ]; + final Phylogeny p2 = factory.create( new StringBuffer( "(A,B2)" ).toString(), new NHXParser() )[ 0 ]; final Phylogeny p3 = factory.create( new char[] { '(', 'A', ',', 'B', '3', ')' }, new NHXParser() )[ 0 ]; final Phylogeny p4 = factory.create( "(A,B4);", new NHXParser() )[ 0 ]; - final Phylogeny p5 = factory.create( new StringBuffer( "(A,B5);" ), new NHXParser() )[ 0 ]; + final Phylogeny p5 = factory.create( new StringBuffer( "(A,B5);" ).toString(), new NHXParser() )[ 0 ]; final Phylogeny[] p7 = factory.create( "(A,B7);(C,D7)", new NHXParser() ); final Phylogeny[] p8 = factory.create( "(A,B8) (C,D8)", new NHXParser() ); final Phylogeny[] p9 = factory.create( "(A,B9)\n(C,D9)", new NHXParser() ); @@ -6767,8 +8833,7 @@ public final class Test { final Phylogeny[] p11 = factory.create( "(A,B11);(C,D11) (E,F11)\t(G,H11)", new NHXParser() ); final Phylogeny[] p12 = factory.create( "(A,B12) (C,D12) (E,F12) (G,H12)", new NHXParser() ); final Phylogeny[] p13 = factory.create( " ; (;A; , ; B ; 1 3 ; \n)\t ( \n ;" - + " C ; ,; D;13;);;;;;;(;E;,;F;13 ;) ; " - + "; ; ( \t\n\r\b; G ;, ;H ;1 3; ) ; ; ;", + + " C ; ,; D;13;);;;;;;(;E;,;F;13 ;) ; " + "; ; ( \t\n\r\b; G ;, ;H ;1 3; ) ; ; ;", new NHXParser() ); if ( !p13[ 0 ].toNewHampshireX().equals( "(';A;',';B;13;')" ) ) { return false; @@ -7017,56 +9082,230 @@ public final class Test { if ( p46.length != 0 ) { return false; } - final Phylogeny p47 = factory.create( new StringBuffer( "((A,B)ab:2[0.44],C)" ), new NHXParser() )[ 0 ]; + final Phylogeny p47 = factory.create( new StringBuffer( "((A,B)ab:2[0.44],C)" ).toString(), + new NHXParser() )[ 0 ]; if ( !isEqual( 0.44, p47.getNode( "ab" ).getBranchData().getConfidence( 0 ).getValue() ) ) { return false; } - final Phylogeny p48 = factory.create( new StringBuffer( "((A,B)ab:2[88],C)" ), new NHXParser() )[ 0 ]; + final Phylogeny p48 = factory.create( new StringBuffer( "((A,B)ab:2[88],C)" ).toString(), + new NHXParser() )[ 0 ]; if ( !isEqual( 88, p48.getNode( "ab" ).getBranchData().getConfidence( 0 ).getValue() ) ) { return false; } final Phylogeny p49 = factory - .create( new StringBuffer( "((A,B)a[comment:a,b;(a)]b:2[0.44][comment(a,b,b);],C)" ), + .create( new StringBuffer( "((A,B)a[comment:a,b;(a)]b:2[0.44][comment(a,b,b);],C)" ).toString(), new NHXParser() )[ 0 ]; if ( !isEqual( 0.44, p49.getNode( "ab" ).getBranchData().getConfidence( 0 ).getValue() ) ) { return false; } - final Phylogeny p50 = factory.create( new StringBuffer( "((\"A\",B)ab:2[88],C)" ), new NHXParser() )[ 0 ]; + final Phylogeny p50 = factory.create( new StringBuffer( "((\"A\",B)ab:2[88],C)" ).toString(), + new NHXParser() )[ 0 ]; if ( p50.getNode( "A" ) == null ) { return false; } - if ( !p50.toNewHampshire( false, NH_CONVERSION_SUPPORT_VALUE_STYLE.IN_SQUARE_BRACKETS ) + if ( !p50.toNewHampshire( NH_CONVERSION_SUPPORT_VALUE_STYLE.IN_SQUARE_BRACKETS ) .equals( "((A,B)ab:2.0[88],C);" ) ) { return false; } - if ( !p50.toNewHampshire( false, NH_CONVERSION_SUPPORT_VALUE_STYLE.NONE ).equals( "((A,B)ab:2.0,C);" ) ) { + if ( !p50.toNewHampshire( NH_CONVERSION_SUPPORT_VALUE_STYLE.NONE ).equals( "((A,B)ab:2.0,C);" ) ) { return false; } - if ( !p50.toNewHampshire( false, NH_CONVERSION_SUPPORT_VALUE_STYLE.AS_INTERNAL_NODE_NAMES ) + if ( !p50.toNewHampshire( NH_CONVERSION_SUPPORT_VALUE_STYLE.AS_INTERNAL_NODE_NAMES ) .equals( "((A,B)88:2.0,C);" ) ) { return false; } - final Phylogeny p51 = factory.create( new StringBuffer( "((\"A(A\",B)ab:2[88],C)" ), new NHXParser() )[ 0 ]; + final Phylogeny p51 = factory.create( new StringBuffer( "((\"A(A\",B)ab:2[88],C)" ).toString(), + new NHXParser() )[ 0 ]; if ( p51.getNode( "A(A" ) == null ) { return false; } - final Phylogeny p52 = factory.create( new StringBuffer( "(('A(A',B)ab:2[88],C)" ), new NHXParser() )[ 0 ]; + final Phylogeny p52 = factory.create( new StringBuffer( "(('A(A',B)ab:2[88],C)" ).toString(), + new NHXParser() )[ 0 ]; if ( p52.getNode( "A(A" ) == null ) { return false; } final Phylogeny p53 = factory - .create( new StringBuffer( "(('A(A',\"B (x (a' ,b) f(x);\"[com])[ment]ab:2[88],C)" ), + .create( new StringBuffer( "(('A(A',\"B (x (a' ,b) f(x);\"[com])[ment]ab:2[88],C)" ).toString(), new NHXParser() )[ 0 ]; if ( p53.getNode( "B (x (a' ,b) f(x);" ) == null ) { return false; } - // - final Phylogeny p54 = factory.create( new StringBuffer( "((A,B):[88],C)" ), new NHXParser() )[ 0 ]; + final Phylogeny p54 = factory.create( new StringBuffer( "((A,B):[88],C)" ).toString(), + new NHXParser() )[ 0 ]; if ( p54.getNode( "A" ) == null ) { return false; } - if ( !p54.toNewHampshire( false, NH_CONVERSION_SUPPORT_VALUE_STYLE.IN_SQUARE_BRACKETS ) - .equals( "((A,B)[88],C);" ) ) { + if ( !p54.toNewHampshire( NH_CONVERSION_SUPPORT_VALUE_STYLE.IN_SQUARE_BRACKETS ) + .equals( "((A,B)[88],C);" ) ) { + return false; + } + final Phylogeny p55 = factory + .create( new StringBuffer( "((\"lcl|HPV32_L1.:1 s\":0.195593,\"lcl|HPV30_L1.1|;a\":0.114237):0.0359322,\"lcl|HPV56_L1.1|,d\":0.0727412,\"lcl|HPV66_L1.1x\":0.0798012);" ) + .toString(), new NHXParser() )[ 0 ]; + if ( !p55.toNewHampshire() + .equals( "(('lcl|HPV32_L1.:1 s':0.195593,'lcl|HPV30_L1.1|;a':0.114237):0.0359322,'lcl|HPV56_L1.1|,d':0.0727412,lcl|HPV66_L1.1x:0.0798012);" ) ) { + System.out.println( p55.toNewHampshire() ); + return false; + } + final Phylogeny p56 = factory + .create( new StringBuffer( "((\"lcl|HPV32_L1.:1 s\":0.195593,\"lcl|HPV30_L1.1|;a\":0.114\n237):0.0359322,\"lcl|HPV56_L1.1|,d\":0.0727412,\"lcl|HPV66_L1.1:x\":0.0798012);" ) + .toString(), new NHXParser() )[ 0 ]; + if ( !p56.toNewHampshire() + .equals( "(('lcl|HPV32_L1.:1 s':0.195593,'lcl|HPV30_L1.1|;a':0.114237):0.0359322,'lcl|HPV56_L1.1|,d':0.0727412,'lcl|HPV66_L1.1:x':0.0798012);" ) ) { + System.out.println( p56.toNewHampshire() ); + return false; + } + final Phylogeny p57 = factory + .create( new StringBuffer( "((\"lcl|HPV32_L1.:1 s\":0.195593,\"lcl|HPV30_L1.1|;a\":0.114\n237):0.0359322,\"lcl|HPV56_L1.1|,d\":0.0727412,\"lcl|HPV66_L1.1:x\":0.0798012);" ) + .toString(), new NHXParser() )[ 0 ]; + if ( !p57.toNewHampshire() + .equals( "(('lcl|HPV32_L1.:1 s':0.195593,'lcl|HPV30_L1.1|;a':0.114237):0.0359322,'lcl|HPV56_L1.1|,d':0.0727412,'lcl|HPV66_L1.1:x':0.0798012);" ) ) { + System.out.println( p56.toNewHampshire() ); + return false; + } + final String s58 = "('Homo \"man\" sapiens:1',\"Homo 'man' sapiens;\")';root \"1_ )';"; + final Phylogeny p58 = factory.create( s58, new NHXParser() )[ 0 ]; + if ( !p58.toNewHampshire().equals( s58 ) ) { + System.out.println( p58.toNewHampshire() ); + return false; + } + final String s59 = "('Homo \"man sapiens:1',\"Homo 'man sapiens\")\"root; '1_ )\";"; + final Phylogeny p59 = factory.create( s59, new NHXParser() )[ 0 ]; + if ( !p59.toNewHampshire().equals( s59 ) ) { + System.out.println( p59.toNewHampshire() ); + return false; + } + final String s60 = "('\" ;,:\":\"',\"'abc def' g's_\",'=:0.45+,.:%~`!@#$%^&*()_-+={} | ;,');"; + final Phylogeny p60 = factory.create( s60, new NHXParser() )[ 0 ]; + if ( !p60.toNewHampshire().equals( s60 ) ) { + System.out.println( p60.toNewHampshire() ); + return false; + } + final String s61 = "('H[omo] \"man\" sapiens:1',\"H[omo] 'man' sapiens;\",H[omo] sapiens)';root \"1_ )';"; + final Phylogeny p61 = factory.create( s61, new NHXParser() )[ 0 ]; + if ( !p61.toNewHampshire() + .equals( "('H{omo} \"man\" sapiens:1',\"H{omo} 'man' sapiens;\",Hsapiens)';root \"1_ )';" ) ) { + System.out.println( p61.toNewHampshire() ); + return false; + } + final String s62 = "(1[&type=\"X\",size=123,subtree=(1,2);]:0.003,2[&type=\"(X,Y:3)\"]:0.004)[&type=\"(X,Y)\"]:0.0;"; + final Phylogeny p62 = factory.create( s62, new NHXParser() )[ 0 ]; + if ( !p62.toNewHampshire().equals( "(1:0.003,2:0.004):0.0;" ) ) { + System.out.println( p62.toNewHampshire() ); + return false; + } + final String s63 = "(1:0.003[&type=\"X\",size=123,subtree=(1,2);],2:0.004[&type=\"(X,Y:3)\"]):0.0[&type=\"(X,Y)\"];"; + final Phylogeny p63 = factory.create( s63, new NHXParser() )[ 0 ]; + if ( !p63.toNewHampshire().equals( "(1:0.003,2:0.004):0.0;" ) ) { + System.out.println( p63.toNewHampshire() ); + return false; + } + final String s64 = "((1,2):[95.5],3);"; + final Phylogeny p64 = factory.create( s64, new NHXParser() )[ 0 ]; + if ( !p64.toNewHampshireX().equals( "((1,2)[&&NHX:B=95.5],3)" ) ) { + System.out.println( p64.toNewHampshireX() ); + return false; + } + final String s65 = "((1:0.1,2:0.2):0.3[10.2],3);"; + final Phylogeny p65 = factory.create( s65, new NHXParser() )[ 0 ]; + if ( !p65.toNewHampshireX().equals( "((1:0.1,2:0.2):0.3[&&NHX:B=10.2],3)" ) ) { + System.out.println( p65.toNewHampshireX() ); + return false; + } + final Phylogeny p66 = factory.create( "((A,B)ab:2[0.44],C)", new NHXParser() )[ 0 ]; + if ( !isEqual( 0.44, p66.getNode( "ab" ).getBranchData().getConfidence( 0 ).getValue() ) ) { + return false; + } + final Phylogeny p67 = factory.create( "((A,B):2[0.67],C)", new NHXParser() )[ 0 ]; + if ( !isEqual( 0.67, p67.getNode( "A" ).getParent().getBranchData().getConfidence( 0 ).getValue() ) ) { + return false; + } + final Phylogeny p68 = factory.create( "((A,B):[0.68],C)", new NHXParser() )[ 0 ]; + if ( !isEqual( 0.68, p68.getNode( "A" ).getParent().getBranchData().getConfidence( 0 ).getValue() ) ) { + return false; + } + final Phylogeny p69 = factory.create( "((A,B)[0.69],C)", new NHXParser() )[ 0 ]; + if ( !isEqual( 0.69, p69.getNode( "A" ).getParent().getBranchData().getConfidence( 0 ).getValue() ) ) { + return false; + } + final Phylogeny p70 = factory.create( "((A,B)[+0.7],C)", new NHXParser() )[ 0 ]; + if ( !isEqual( 0.7, p70.getNode( "A" ).getParent().getBranchData().getConfidence( 0 ).getValue() ) ) { + return false; + } + final Phylogeny p71 = factory.create( "((A,B)[-0.71],C)", new NHXParser() )[ 0 ]; + if ( !isEqual( -0.71, p71.getNode( "A" ).getParent().getBranchData().getConfidence( 0 ).getValue() ) ) { + return false; + } + final Phylogeny p72 = factory.create( "((A,B)[],C)", new NHXParser() )[ 0 ]; + if ( !p72.toNewHampshireX().equals( "((A,B),C)" ) ) { + return false; + } + final Phylogeny p73 = factory.create( "((A,B)[12x],C)", new NHXParser() )[ 0 ]; + if ( !p73.toNewHampshireX().equals( "((A,B),C)" ) ) { + return false; + } + final Phylogeny p74 = factory.create( "((A,B)[12+],C)", new NHXParser() )[ 0 ]; + if ( !p74.toNewHampshireX().equals( "((A,B),C)" ) ) { + return false; + } + final Phylogeny p75 = factory.create( "((A,B)ab[222]:3,C)", new NHXParser() )[ 0 ]; + if ( !isEqual( 222, p75.getNode( "A" ).getParent().getBranchData().getConfidence( 0 ).getValue() ) ) { + return false; + } + final Phylogeny p76 = factory.create( "((A,B)[100]:12,C)", new NHXParser() )[ 0 ]; + if ( !isEqual( 100, p76.getNode( "A" ).getParent().getBranchData().getConfidence( 0 ).getValue() ) ) { + return false; + } + final Phylogeny p77 = factory.create( "((A,B)abcde:13[77],C)", new NHXParser() )[ 0 ]; + if ( !isEqual( 77, p77.getNode( "A" ).getParent().getBranchData().getConfidence( 0 ).getValue() ) ) { + return false; + } + final Phylogeny p78 = factory.create( "((A,B):14[0],C)", new NHXParser() )[ 0 ]; + if ( !isEqual( 0, p78.getNode( "A" ).getParent().getBranchData().getConfidence( 0 ).getValue() ) ) { + return false; + } + final String the_one = "((((((a,b)ab:3.0[2],c):12.0[100],(d,e)de)abcde:13.0[2],f):14.0[0]):0.0[0]):0.0[0];"; + final Phylogeny p79 = factory.create( + "((((((a,b)ab[2]:3,c)[100]:12,(d,e)de)abcde:13[2],f):14[0]):0[0])[0]:0;", + new NHXParser() )[ 0 ]; + final String str79 = p79.toNewHampshire( NH_CONVERSION_SUPPORT_VALUE_STYLE.IN_SQUARE_BRACKETS ); + if ( !str79.equals( the_one ) ) { + System.out.println( str79 ); + return false; + } + final Phylogeny p80 = factory.create( + "((((((a[a)],b[12])ab[2]:3,c)[+100]:12,(d,e)de[12d,)])ab[]c[]de:13[2],f):14[0]):0[0])[0]:0;", + new NHXParser() )[ 0 ]; + final String str80 = p80.toNewHampshire( NH_CONVERSION_SUPPORT_VALUE_STYLE.IN_SQUARE_BRACKETS ); + if ( !str80.equals( the_one ) ) { + System.out.println( str80 ); + return false; + } + } + catch ( final Exception e ) { + e.printStackTrace( System.out ); + return false; + } + return true; + } + + private static boolean testNHParsingSpecialChars() { + try { + final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); + final String i0 = "(A!+=~QWERTY!@#$%^&*-,€‚ƒ„…†‡ˆ‰Š‹ŒŽ‘’“”•–—˜˜˜™š›œžŸ¡¢£¤¥¦§¨©ª«¬®¯°±¹²³´µ¶·¸º»¼¿À÷þÿ)"; + final Phylogeny p0 = factory.create( i0, new NHXParser() )[ 0 ]; + if ( !p0.toNewHampshireX().equals( i0 ) ) { + System.out.println(); + System.out.println( p0.toNewHampshireX() ); + System.out.println( i0 ); + return false; + } + final String i1 = "(हिंदी,한글,ไทย,'Tiếng Việt',ひらがなカタカナ漢字,繁體字,русский)"; + final Phylogeny p1 = factory.create( i1, new NHXParser() )[ 0 ]; + if ( !p1.toNewHampshireX().equals( i1 ) ) { + System.out.println(); + System.out.println( p1.toNewHampshireX() ); + System.out.println( i1 ); return false; } } @@ -7459,6 +9698,67 @@ public final class Test { if ( p.next() != null ) { return false; } + // + final String p30_str = "(A,B);(C,D)"; + final NHXParser p30 = new NHXParser(); + p30.setSource( p30_str ); + if ( !p30.hasNext() ) { + return false; + } + Phylogeny phy30 = p30.next(); + if ( !phy30.toNewHampshire().equals( "(A,B);" ) ) { + System.out.println( phy30.toNewHampshire() ); + return false; + } + if ( !p30.hasNext() ) { + return false; + } + Phylogeny phy301 = p30.next(); + if ( !phy301.toNewHampshire().equals( "(C,D);" ) ) { + System.out.println( phy301.toNewHampshire() ); + return false; + } + if ( p30.hasNext() ) { + return false; + } + if ( p30.hasNext() ) { + return false; + } + if ( p30.next() != null ) { + return false; + } + if ( p30.next() != null ) { + return false; + } + p30.reset(); + if ( !p30.hasNext() ) { + return false; + } + phy30 = p30.next(); + if ( !phy30.toNewHampshire().equals( "(A,B);" ) ) { + System.out.println( phy30.toNewHampshire() ); + return false; + } + if ( !p30.hasNext() ) { + return false; + } + phy301 = p30.next(); + if ( !phy301.toNewHampshire().equals( "(C,D);" ) ) { + System.out.println( phy301.toNewHampshire() ); + return false; + } + if ( p30.hasNext() ) { + return false; + } + if ( p30.hasNext() ) { + return false; + } + if ( p30.next() != null ) { + return false; + } + if ( p30.next() != null ) { + return false; + } } catch ( final Exception e ) { e.printStackTrace( System.out ); @@ -7496,6 +9796,14 @@ public final class Test { System.out.println( n6.toNewHampshireX() ); return false; } + final PhylogenyNode n7 = new PhylogenyNode(); + n7.setName( " gks:dr-m4 \" ' `@:[]sadq04 " ); + if ( !n7.toNewHampshire( true, PhylogenyNode.NH_CONVERSION_SUPPORT_VALUE_STYLE.IN_SQUARE_BRACKETS ) + .equals( "'gks:dr-m4 \" ` `@:[]sadq04'" ) ) { + System.out.println( n7 + .toNewHampshire( true, PhylogenyNode.NH_CONVERSION_SUPPORT_VALUE_STYLE.IN_SQUARE_BRACKETS ) ); + return false; + } } catch ( final Exception e ) { e.printStackTrace( System.out ); @@ -7743,8 +10051,8 @@ public final class Test { return false; } final PhylogenyNode n13 = PhylogenyNode - .createInstanceFromNhxString( "blah_12345/1-2", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); - if ( !n13.getName().equals( "blah_12345/1-2" ) ) { + .createInstanceFromNhxString( "BLAH_12345/1-2", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); + if ( !n13.getName().equals( "BLAH_12345/1-2" ) ) { return false; } if ( PhylogenyMethods.getSpecies( n13 ).equals( "12345" ) ) { @@ -7809,7 +10117,8 @@ public final class Test { return false; } final PhylogenyNode n19 = PhylogenyNode - .createInstanceFromNhxString( "blah_1-roejojoej", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); + .createInstanceFromNhxString( "BLAH_1-roejojoej", + NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); if ( !n19.getNodeData().getTaxonomy().getIdentifier().getValue().equals( "1" ) ) { return false; } @@ -7817,7 +10126,7 @@ public final class Test { return false; } final PhylogenyNode n30 = PhylogenyNode - .createInstanceFromNhxString( "blah_1234567-roejojoej", + .createInstanceFromNhxString( "BLAH_1234567-roejojoej", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); if ( !n30.getNodeData().getTaxonomy().getIdentifier().getValue().equals( "1234567" ) ) { return false; @@ -7826,7 +10135,7 @@ public final class Test { return false; } final PhylogenyNode n31 = PhylogenyNode - .createInstanceFromNhxString( "blah_12345678-roejojoej", + .createInstanceFromNhxString( "BLAH_12345678-roejojoej", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); if ( n31.getNodeData().isHasTaxonomy() ) { return false; @@ -7837,7 +10146,7 @@ public final class Test { return false; } final PhylogenyNode n40 = PhylogenyNode - .createInstanceFromNhxString( "bcl2_12345", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); + .createInstanceFromNhxString( "BCL2_12345", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); if ( !n40.getNodeData().getTaxonomy().getIdentifier().getValue().equals( "12345" ) ) { return false; } @@ -7869,10 +10178,184 @@ public final class Test { return true; } + private static boolean testNHXNodeParsing2() { + try { + final PhylogenyNode n0_0 = PhylogenyNode.createInstanceFromNhxString( "n0:[ignore me 123]:1E-3", + NHXParser.TAXONOMY_EXTRACTION.NO, + true, + true ); + if ( !n0_0.getName().equals( "n0" ) ) { + return false; + } + if ( !isEqual( n0_0.getDistanceToParent(), 0.001 ) ) { + return false; + } + final PhylogenyNode n0_1 = PhylogenyNode.createInstanceFromNhxString( "n0[ignore me 123]:1E-3", + NHXParser.TAXONOMY_EXTRACTION.NO, + true, + true ); + if ( !n0_1.getName().equals( "n0" ) ) { + return false; + } + if ( !isEqual( n0_1.getDistanceToParent(), 0.001 ) ) { + return false; + } + final PhylogenyNode n0_2 = PhylogenyNode.createInstanceFromNhxString( "n0:1E-3[ignore me 123]", + NHXParser.TAXONOMY_EXTRACTION.NO, + true, + true ); + if ( !n0_2.getName().equals( "n0" ) ) { + return false; + } + if ( !isEqual( n0_2.getDistanceToParent(), 0.001 ) ) { + return false; + } + final PhylogenyNode n0_3 = PhylogenyNode.createInstanceFromNhxString( "n0:1E-3:[ignore me 123]", + NHXParser.TAXONOMY_EXTRACTION.NO, + true, + true ); + if ( !n0_3.getName().equals( "n0" ) ) { + return false; + } + if ( !isEqual( n0_3.getDistanceToParent(), 0.001 ) ) { + return false; + } + final PhylogenyNode n0_4 = PhylogenyNode.createInstanceFromNhxString( "n0:0.001:[ignore me 123]", + NHXParser.TAXONOMY_EXTRACTION.NO, + true, + true ); + if ( !n0_4.getName().equals( "n0" ) ) { + return false; + } + if ( !isEqual( n0_4.getDistanceToParent(), 0.001 ) ) { + return false; + } + final PhylogenyNode n1_0 = PhylogenyNode + .createInstanceFromNhxString( "xyz|A/American_duck/NH/00321/|Duck|Canada[&!color=#FFFFFF]", + NHXParser.TAXONOMY_EXTRACTION.NO, + true, + true ); + if ( !n1_0.getName().equals( "xyz|A/American duck/NH/00321/|Duck|Canada" ) ) { + return false; + } + if ( n1_0.getBranchData().getBranchColor().getValue().getGreen() != 255 ) { + return false; + } + final PhylogenyNode n1_1 = PhylogenyNode + .createInstanceFromNhxString( "xyz|A/American_duck/NH/00321/|Duck|Canada[&!color=#FFFFFF]:0.001", + NHXParser.TAXONOMY_EXTRACTION.NO, + true, + true ); + if ( !n1_1.getName().equals( "xyz|A/American duck/NH/00321/|Duck|Canada" ) ) { + return false; + } + if ( n1_1.getBranchData().getBranchColor().getValue().getGreen() != 255 ) { + return false; + } + if ( !isEqual( n1_1.getDistanceToParent(), 0.001 ) ) { + return false; + } + final PhylogenyNode n1_2 = PhylogenyNode + .createInstanceFromNhxString( "xyz|A/American_duck/NH/00321/|Duck|Canada:0.001[&!color=#FFFFFF]", + NHXParser.TAXONOMY_EXTRACTION.NO, + true, + true ); + if ( !n1_2.getName().equals( "xyz|A/American duck/NH/00321/|Duck|Canada" ) ) { + return false; + } + if ( n1_2.getBranchData().getBranchColor().getValue().getGreen() != 255 ) { + return false; + } + if ( !isEqual( n1_2.getDistanceToParent(), 0.001 ) ) { + return false; + } + final PhylogenyNode n1_3 = PhylogenyNode + .createInstanceFromNhxString( "xyz|A/American_duck/NH/00321/|Duck|Canada:1e-3[&boostrap=69,&!color=#FFFFFF]", + NHXParser.TAXONOMY_EXTRACTION.NO, + true, + true ); + if ( !n1_3.getName().equals( "xyz|A/American duck/NH/00321/|Duck|Canada" ) ) { + return false; + } + if ( n1_3.getBranchData().getBranchColor().getValue().getGreen() != 255 ) { + return false; + } + if ( !isEqual( n1_3.getDistanceToParent(), 0.001 ) ) { + return false; + } + if ( !isEqual( n1_3.getBranchData().getConfidence( 0 ).getValue(), 69 ) ) { + return false; + } + if ( !n1_3.getBranchData().getConfidence( 0 ).getType().equals( "bootstrap" ) ) { + return false; + } + final PhylogenyNode n1_4 = PhylogenyNode + .createInstanceFromNhxString( "xyz|A/American_duck/NH/00321/|Duck|Canada[&bootstrap=69,&!colour=#FFFFFF]:1e-3", + NHXParser.TAXONOMY_EXTRACTION.NO, + true, + true ); + if ( !n1_4.getName().equals( "xyz|A/American duck/NH/00321/|Duck|Canada" ) ) { + return false; + } + if ( n1_4.getBranchData().getBranchColor().getValue().getGreen() != 255 ) { + return false; + } + if ( !isEqual( n1_4.getDistanceToParent(), 0.001 ) ) { + return false; + } + if ( !isEqual( n1_4.getBranchData().getConfidence( 0 ).getValue(), 69 ) ) { + return false; + } + if ( !n1_4.getBranchData().getConfidence( 0 ).getType().equals( "bootstrap" ) ) { + return false; + } + final PhylogenyNode n1_5 = PhylogenyNode + .createInstanceFromNhxString( "xyz|A/American_duck/NH/00321/|Duck|Canada:1e-3[69.0]", + NHXParser.TAXONOMY_EXTRACTION.NO, + true, + true ); + if ( !n1_5.getName().equals( "xyz|A/American duck/NH/00321/|Duck|Canada" ) ) { + return false; + } + if ( !isEqual( n1_5.getDistanceToParent(), 0.001 ) ) { + return false; + } + if ( !isEqual( n1_5.getBranchData().getConfidence( 0 ).getValue(), 69 ) ) { + return false; + } + final PhylogenyNode n1_6 = PhylogenyNode + .createInstanceFromNhxString( "xyz|A/American_duck/NH/00321/|Duck|Canada[&prob=0.9500000000000000e+00,prob_stddev=0.1100000000000000e+00]:1e-3", + NHXParser.TAXONOMY_EXTRACTION.NO, + true, + true ); + if ( !n1_6.getName().equals( "xyz|A/American duck/NH/00321/|Duck|Canada" ) ) { + return false; + } + if ( !isEqual( n1_6.getDistanceToParent(), 0.001 ) ) { + return false; + } + if ( !isEqual( n1_6.getBranchData().getConfidence( 0 ).getStandardDeviation(), 0.11 ) ) { + return false; + } + if ( !isEqual( n1_6.getBranchData().getConfidence( 0 ).getValue(), 0.95 ) ) { + return false; + } + if ( !n1_6.getBranchData().getConfidence( 0 ).getType().equals( "posterior probability" ) ) { + return false; + } + } + catch ( final Exception e ) { + e.printStackTrace( System.out ); + return false; + } + return true; + } + private static boolean testNHXParsing() { try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); - final Phylogeny p1 = factory.create( "(A [&&NHX:S=a_species],B1[&&NHX:S=b_species])", new NHXParser() )[ 0 ]; + final Phylogeny p1 = factory.create( "(A [&&NHX:S=a_species],B1[&&NHX:S=b_species])", + new NHXParser() )[ 0 ]; if ( !p1.toNewHampshireX().equals( "(A[&&NHX:S=a_species],B1[&&NHX:S=b_species])" ) ) { return false; } @@ -7886,21 +10369,21 @@ public final class Test { if ( !p2b[ 0 ].toNewHampshireX().equals( "(((((((A:0.2):0.2):0.3):0.4):0.5):0.6):0.7):0.8" ) ) { return false; } - final Phylogeny[] p3 = factory - .create( "[ comment&&NHX,())))](((((((A:0.2[&&NHX:S=qwerty]):0.2[&&NHX:S=uiop]):0.3[&&NHX:S=asdf]):0.4[&&NHX:S=zxc]):0.5[&&NHX:S=a]):0.6[&&NHX:S=asd]):0.7[&&NHX:S=za]):0.8[&&NHX:S=zaq]", - new NHXParser() ); + final Phylogeny[] p3 = factory.create( + "[ comment&&NHX,())))](((((((A:0.2[&&NHX:S=qwerty]):0.2[&&NHX:S=uiop]):0.3[&&NHX:S=asdf]):0.4[&&NHX:S=zxc]):0.5[&&NHX:S=a]):0.6[&&NHX:S=asd]):0.7[&&NHX:S=za]):0.8[&&NHX:S=zaq]", + new NHXParser() ); if ( !p3[ 0 ].toNewHampshireX().equals( p2_S ) ) { return false; } - final Phylogeny[] p4 = factory - .create( "(((((((A:0.2[&&NHX:S=qwerty]):0.2[&&NHX:S=uiop]):0.3[&&NHX:S=asdf]):0.4[&&NHX:S=zxc]):0.5[&&NHX:S=a]):0.6[&&NHX:S=asd]):0.7[&&NHX:S=za]):0.8[&&NHX:S=zaq][comment(]", - new NHXParser() ); + final Phylogeny[] p4 = factory.create( + "(((((((A:0.2[&&NHX:S=qwerty]):0.2[&&NHX:S=uiop]):0.3[&&NHX:S=asdf]):0.4[&&NHX:S=zxc]):0.5[&&NHX:S=a]):0.6[&&NHX:S=asd]):0.7[&&NHX:S=za]):0.8[&&NHX:S=zaq][comment(]", + new NHXParser() ); if ( !p4[ 0 ].toNewHampshireX().equals( p2_S ) ) { return false; } - final Phylogeny[] p5 = factory - .create( "[] ( [][ ][ ] ([((( &&NHXcomment only![[[[[[]([]((((A:0.2[&&NHX:S=q[comment )))]werty][,,,,))]):0.2[&&NHX:S=uiop]):0.3[&&NHX:S=a[comment,,))]sdf])[comment(((]:0.4[&&NHX:S=zxc][comment(((][comment(((]):0.5[&&NHX:S=a]):0.6[&&NHX:S=a[comment(((]sd]):0.7[&&NHX:S=za]):0.8[&&NHX:S=zaq][comment(((]", - new NHXParser() ); + final Phylogeny[] p5 = factory.create( + "[] ( [][ ][ ] ([((( &&NHXcomment only![[[[[[]([]((((A:0.2[&&NHX:S=q[comment )))]werty][,,,,))]):0.2[&&NHX:S=uiop]):0.3[&&NHX:S=a[comment,,))]sdf])[comment(((]:0.4[&&NHX:S=zxc][comment(((][comment(((]):0.5[&&NHX:S=a]):0.6[&&NHX:S=a[comment(((]sd]):0.7[&&NHX:S=za]):0.8[&&NHX:S=zaq][comment(((]", + new NHXParser() ); if ( !p5[ 0 ].toNewHampshireX().equals( p2_S ) ) { return false; } @@ -7926,12 +10409,23 @@ public final class Test { if ( !p9.toNewHampshireX().equals( "((A:0.2,B:0.3):0.5[&&NHX:B=91],C:0.1)root:0.1[&&NHX:B=100]" ) ) { return false; } - final Phylogeny p10 = factory - .create( " [79] ( (A [co mment] :0 .2[comment],B:0.3[com])[com ment]: 0. 5 \t[ 9 1 ][ comment],C: 0.1)[comment]root:0.1[100] [comment]", - new NHXParser() )[ 0 ]; + final Phylogeny p10 = factory.create( + " [79] ( (A [co mment] :0 .2[comment],B:0.3[com])[com ment]: 0. 5 \t[ 9 1 ][ comment],C: 0.1)[comment]root:0.1[100] [comment]", + new NHXParser() )[ 0 ]; if ( !p10.toNewHampshireX().equals( "((A:0.2,B:0.3):0.5[&&NHX:B=91],C:0.1)root:0.1[&&NHX:B=100]" ) ) { return false; } + final Phylogeny p11 = factory.create( + " [79] ( ('A: \" ' [co mment] :0 .2[comment],B:0.3[com])[com ment]: 0. 5 \t[ 9 1 ][ comment],C: 0.1)[comment]root:0.1[100] [comment]", + new NHXParser() )[ 0 ]; + if ( !p11.toNewHampshireX().equals( "(('A: \"':0.2,B:0.3):0.5[&&NHX:B=91],C:0.1)root:0.1[&&NHX:B=100]" ) ) { + return false; + } + final Phylogeny p12 = factory.create( "((A:0.2,B:0.3):0.5[&&NHX:B=91],C:0.1)root:0.1[&&NHX:B=100]", + new NHXParser() )[ 0 ]; + if ( !p12.toNewHampshireX().equals( "((A:0.2,B:0.3):0.5[&&NHX:B=91],C:0.1)root:0.1[&&NHX:B=100]" ) ) { + return false; + } } catch ( final Exception e ) { e.printStackTrace( System.out ); @@ -7970,15 +10464,14 @@ public final class Test { } final Phylogeny p2 = factory .create( "(1[something_else(?)s,prob=0.9500000000000000e+00{}(((,p)rob_stddev=0.110000000000e+00," - + "prob_range={1.000000000000000e+00,1.000000000000000e+00},prob(percent)=\"100\"," - + "prob+-sd=\"100+-0\"]:4.129000000000000e-02[&length_mean=4.153987461671767e-02," - + "length_median=4.129000000000000e-02,length_95%HPD={3.217800000000000e-02," - + "5.026800000000000e-02}],2[&prob=0.810000000000000e+00,prob_stddev=0.000000000000000e+00," - + "prob_range={1.000000000000000e+00,1.000000000000000e+00},prob(percent)=\"100\"," - + "prob+-sd=\"100+-0\"]:6.375699999999999e-02[&length_mean=6.395210411945065e-02," - + "length_median=6.375699999999999e-02,length_95%HPD={5.388600000000000e-02," - + "7.369400000000000e-02}])", - new NHXParser() )[ 0 ]; + + "prob_range={1.000000000000000e+00,1.000000000000000e+00},prob(percent)=\"100\"," + + "prob+-sd=\"100+-0\"]:4.129000000000000e-02[&length_mean=4.153987461671767e-02," + + "length_median=4.129000000000000e-02,length_95%HPD={3.217800000000000e-02," + + "5.026800000000000e-02}],2[&prob=0.810000000000000e+00,prob_stddev=0.000000000000000e+00," + + "prob_range={1.000000000000000e+00,1.000000000000000e+00},prob(percent)=\"100\"," + + "prob+-sd=\"100+-0\"]:6.375699999999999e-02[&length_mean=6.395210411945065e-02," + + "length_median=6.375699999999999e-02,length_95%HPD={5.388600000000000e-02," + + "7.369400000000000e-02}])", new NHXParser() )[ 0 ]; if ( p2.getNode( "1" ) == null ) { return false; } @@ -8022,13 +10515,13 @@ public final class Test { if ( phy.getNodes( "'single quotes' inside double quotes" ).size() != 1 ) { return false; } - if ( phy.getNodes( "double quotes inside single quotes" ).size() != 1 ) { + if ( phy.getNodes( "\"double quotes\" inside single quotes" ).size() != 1 ) { return false; } if ( phy.getNodes( "noquotes" ).size() != 1 ) { return false; } - if ( phy.getNodes( "A ( B C '" ).size() != 1 ) { + if ( phy.getNodes( "A ( B C '" ).size() != 1 ) { return false; } final NHXParser p1p = new NHXParser(); @@ -8055,10 +10548,10 @@ public final class Test { if ( !p4.toNewHampshire().equals( "('A)','B(),; x');" ) ) { return false; } - final Phylogeny p10 = factory - .create( " [79] ( (\"A \n\tB \" [co mment] :0 .2[comment],'B':0.3[com])[com ment]: 0. 5 \t[ 9 1 ][ comment],'C (or D?\\//;,))': 0.1)[comment]'\nroot is here (cool, was! ) ':0.1[100] [comment]", - new NHXParser() )[ 0 ]; - final String p10_clean_str = "(('A B':0.2,B:0.3):0.5[&&NHX:B=91],'C (or D?\\//;,))':0.1)'root is here (cool, was! )':0.1[&&NHX:B=100]"; + final Phylogeny p10 = factory.create( + " [79] ( (\"A \n\tB \" [co mment] :0 .2[comment],'B':0.3[com])[com ment]: 0. 5 \t[ 9 1 ][ comment],'C (or D?\\//;,))': 0.1)[comment]'\nroot is here (cool, was! ) ':0.1[100] [comment]", + new NHXParser() )[ 0 ]; + final String p10_clean_str = "(('A B':0.2,B:0.3):0.5[&&NHX:B=91],'C (or D?\\//;,))':0.1)'root is here (cool, was! )':0.1[&&NHX:B=100]"; if ( !p10.toNewHampshireX().equals( p10_clean_str ) ) { return false; } @@ -8066,11 +10559,10 @@ public final class Test { if ( !p11.toNewHampshireX().equals( p10_clean_str ) ) { return false; } - // - final Phylogeny p12 = factory - .create( " [79] ( (\"A \n\tB \" [[][] :0 .2[comment][\t&\t&\n N\tH\tX:S=mo\tnkey !],'\tB\t\b\t\n\f\rB B ':0.0\b3[])\t[com ment]: 0. 5 \t[ 9 1 ][ \ncomment],'C\t (or D?\\//;,))': 0.\b1)[comment]'\nroot \tis here (cool, \b\t\n\f\r was! ) ':0.1[100] [comment]", - new NHXParser() )[ 0 ]; - final String p12_clean_str = "(('A B':0.2[&&NHX:S=monkey!],'BB B':0.03):0.5[&&NHX:B=91],'C (or D?\\//;,))':0.1)'root is here (cool, was! )':0.1[&&NHX:B=100]"; + final Phylogeny p12 = factory.create( + " [79] ( (\"A \n\tB \" [[][] :0 .2[comment][\t&\t&\n N\tH\tX:S=mo\tnkey !],'\tB\t\b\t\n\f\rB B ':0.0\b3[])\t[com ment]: 0. 5 \t[ 9 1 ][ \ncomment],'C\t (or D?\\//;,))': 0.\b1)[comment]'\nroot \tis here (cool, \b\t\n\f\r was! ) ':0.1[100] [comment]", + new NHXParser() )[ 0 ]; + final String p12_clean_str = "(('A B':0.2[&&NHX:S=monkey!],'BB B':0.03):0.5[&&NHX:B=91],'C (or D?\\//;,))':0.1)'root is here (cool, was! )':0.1[&&NHX:B=100]"; if ( !p12.toNewHampshireX().equals( p12_clean_str ) ) { return false; } @@ -8078,7 +10570,7 @@ public final class Test { if ( !p13.toNewHampshireX().equals( p12_clean_str ) ) { return false; } - final String p12_clean_str_nh = "(('A B':0.2,'BB B':0.03):0.5,'C (or D?\\//;,))':0.1)'root is here (cool, was! )':0.1;"; + final String p12_clean_str_nh = "(('A B':0.2,'BB B':0.03):0.5,'C (or D?\\//;,))':0.1)'root is here (cool, was! )':0.1;"; if ( !p13.toNewHampshire().equals( p12_clean_str_nh ) ) { return false; } @@ -8173,7 +10665,7 @@ public final class Test { // Do nothing -- means were not running from jar. } if ( xml_parser == null ) { - xml_parser = new PhyloXmlParser(); + xml_parser = PhyloXmlParser.createPhyloXmlParser(); if ( USE_LOCAL_PHYLOXML_SCHEMA ) { xml_parser.setValidateAgainstSchema( PHYLOXML_LOCAL_XSD ); } @@ -8181,8 +10673,8 @@ public final class Test { xml_parser.setValidateAgainstSchema( PHYLOXML_REMOTE_XSD ); } } - final Phylogeny[] phylogenies_0 = factory.create( Test.PATH_TO_TEST_DATA + "phyloxml_distribution.xml", - xml_parser ); + final Phylogeny[] phylogenies_0 = factory + .create( new File( Test.PATH_TO_TEST_DATA + "phyloxml_distribution.xml" ), xml_parser ); if ( xml_parser.getErrorCount() > 0 ) { System.out.println( xml_parser.getErrorMessages().toString() ); return false; @@ -8481,7 +10973,8 @@ public final class Test { for( it0.reset(); it0.hasNext(); ) { it0.next(); } - final Phylogeny t1 = factory.create( "(((A,B)ab,(C,D)cd)abcd,((E,F)ef,(G,H)gh)efgh)r", new NHXParser() )[ 0 ]; + final Phylogeny t1 = factory.create( "(((A,B)ab,(C,D)cd)abcd,((E,F)ef,(G,H)gh)efgh)r", + new NHXParser() )[ 0 ]; final PhylogenyNodeIterator it = t1.iteratorPostorder(); if ( !it.next().getName().equals( "A" ) ) { return false; @@ -8575,7 +11068,8 @@ public final class Test { if ( it.hasNext() ) { return false; } - final Phylogeny t1 = factory.create( "(((A,B)ab,(C,D)cd)abcd,((E,F)ef,(G,H)gh)efgh)r", new NHXParser() )[ 0 ]; + final Phylogeny t1 = factory.create( "(((A,B)ab,(C,D)cd)abcd,((E,F)ef,(G,H)gh)efgh)r", + new NHXParser() )[ 0 ]; it = t1.iteratorPreorder(); if ( !it.next().getName().equals( "r" ) ) { return false; @@ -8635,7 +11129,7 @@ public final class Test { private static boolean testPropertiesMap() { try { - final PropertiesMap pm = new PropertiesMap(); + final PropertiesList pm = new PropertiesList(); final Property p0 = new Property( "dimensions:diameter", "1", "metric:mm", "xsd:decimal", AppliesTo.NODE ); final Property p1 = new Property( "dimensions:length", "2", "metric:mm", "xsd:decimal", AppliesTo.NODE ); final Property p2 = new Property( "something:else", @@ -8646,10 +11140,10 @@ public final class Test { pm.addProperty( p0 ); pm.addProperty( p1 ); pm.addProperty( p2 ); - if ( !pm.getProperty( "dimensions:diameter" ).getValue().equals( "1" ) ) { + if ( !pm.getProperties( "dimensions:diameter" ).get( 0 ).getValue().equals( "1" ) ) { return false; } - if ( !pm.getProperty( "dimensions:length" ).getValue().equals( "2" ) ) { + if ( !pm.getProperties( "dimensions:length" ).get( 0 ).getValue().equals( "2" ) ) { return false; } if ( pm.getProperties().size() != 3 ) { @@ -8664,16 +11158,6 @@ public final class Test { if ( pm.getProperties().size() != 3 ) { return false; } - pm.removeProperty( "dimensions:diameter" ); - if ( pm.getProperties().size() != 2 ) { - return false; - } - if ( pm.getPropertiesWithGivenReferencePrefix( "dimensions" ).size() != 1 ) { - return false; - } - if ( pm.getPropertiesWithGivenReferencePrefix( "something" ).size() != 1 ) { - return false; - } } catch ( final Exception e ) { e.printStackTrace( System.out ); @@ -9003,12 +11487,12 @@ public final class Test { if ( !gene1.getRoot().isDuplication() ) { return false; } - final Phylogeny species2 = factory - .create( "(((([&&NHX:S=A],[&&NHX:S=B]),[&&NHX:S=C]),[&&NHX:S=D]),([&&NHX:S=E],[&&NHX:S=F]))", - new NHXParser() )[ 0 ]; - final Phylogeny gene2 = factory - .create( "(((([&&NHX:S=A],[&&NHX:S=B])ab,[&&NHX:S=C])abc,[&&NHX:S=D])abcd,([&&NHX:S=E],[&&NHX:S=F])ef)r", - new NHXParser() )[ 0 ]; + final Phylogeny species2 = factory.create( + "(((([&&NHX:S=A],[&&NHX:S=B]),[&&NHX:S=C]),[&&NHX:S=D]),([&&NHX:S=E],[&&NHX:S=F]))", + new NHXParser() )[ 0 ]; + final Phylogeny gene2 = factory.create( + "(((([&&NHX:S=A],[&&NHX:S=B])ab,[&&NHX:S=C])abc,[&&NHX:S=D])abcd,([&&NHX:S=E],[&&NHX:S=F])ef)r", + new NHXParser() )[ 0 ]; species2.setRooted( true ); gene2.setRooted( true ); final SDI sdi2 = new SDI( gene2, species2 ); @@ -9033,12 +11517,12 @@ public final class Test { if ( !gene2.getNode( "r" ).isHasAssignedEvent() ) { return false; } - final Phylogeny species3 = factory - .create( "(((([&&NHX:S=A],[&&NHX:S=B]),[&&NHX:S=C]),[&&NHX:S=D]),([&&NHX:S=E],[&&NHX:S=F]))", - new NHXParser() )[ 0 ]; - final Phylogeny gene3 = factory - .create( "(((([&&NHX:S=A],[&&NHX:S=A])aa,[&&NHX:S=C])abc,[&&NHX:S=D])abcd,([&&NHX:S=E],[&&NHX:S=F])ef)r", - new NHXParser() )[ 0 ]; + final Phylogeny species3 = factory.create( + "(((([&&NHX:S=A],[&&NHX:S=B]),[&&NHX:S=C]),[&&NHX:S=D]),([&&NHX:S=E],[&&NHX:S=F]))", + new NHXParser() )[ 0 ]; + final Phylogeny gene3 = factory.create( + "(((([&&NHX:S=A],[&&NHX:S=A])aa,[&&NHX:S=C])abc,[&&NHX:S=D])abcd,([&&NHX:S=E],[&&NHX:S=F])ef)r", + new NHXParser() )[ 0 ]; species3.setRooted( true ); gene3.setRooted( true ); final SDI sdi3 = new SDI( gene3, species3 ); @@ -9051,12 +11535,12 @@ public final class Test { if ( !gene3.getNode( "aa" ).isHasAssignedEvent() ) { return false; } - final Phylogeny species4 = factory - .create( "(((([&&NHX:S=A],[&&NHX:S=B]),[&&NHX:S=C]),[&&NHX:S=D]),([&&NHX:S=E],[&&NHX:S=F]))", - new NHXParser() )[ 0 ]; - final Phylogeny gene4 = factory - .create( "(((([&&NHX:S=A],[&&NHX:S=C])ac,[&&NHX:S=B])abc,[&&NHX:S=D])abcd,([&&NHX:S=E],[&&NHX:S=F])ef)r", - new NHXParser() )[ 0 ]; + final Phylogeny species4 = factory.create( + "(((([&&NHX:S=A],[&&NHX:S=B]),[&&NHX:S=C]),[&&NHX:S=D]),([&&NHX:S=E],[&&NHX:S=F]))", + new NHXParser() )[ 0 ]; + final Phylogeny gene4 = factory.create( + "(((([&&NHX:S=A],[&&NHX:S=C])ac,[&&NHX:S=B])abc,[&&NHX:S=D])abcd,([&&NHX:S=E],[&&NHX:S=F])ef)r", + new NHXParser() )[ 0 ]; species4.setRooted( true ); gene4.setRooted( true ); final SDI sdi4 = new SDI( gene4, species4 ); @@ -9078,12 +11562,12 @@ public final class Test { if ( gene4.getNumberOfExternalNodes() != 6 ) { return false; } - final Phylogeny species5 = factory - .create( "(((([&&NHX:S=A],[&&NHX:S=B]),[&&NHX:S=C]),[&&NHX:S=D]),([&&NHX:S=E],[&&NHX:S=F]))", - new NHXParser() )[ 0 ]; - final Phylogeny gene5 = factory - .create( "(((([&&NHX:S=A],[&&NHX:S=D])ad,[&&NHX:S=C])adc,[&&NHX:S=B])abcd,([&&NHX:S=E],[&&NHX:S=F])ef)r", - new NHXParser() )[ 0 ]; + final Phylogeny species5 = factory.create( + "(((([&&NHX:S=A],[&&NHX:S=B]),[&&NHX:S=C]),[&&NHX:S=D]),([&&NHX:S=E],[&&NHX:S=F]))", + new NHXParser() )[ 0 ]; + final Phylogeny gene5 = factory.create( + "(((([&&NHX:S=A],[&&NHX:S=D])ad,[&&NHX:S=C])adc,[&&NHX:S=B])abcd,([&&NHX:S=E],[&&NHX:S=F])ef)r", + new NHXParser() )[ 0 ]; species5.setRooted( true ); gene5.setRooted( true ); final SDI sdi5 = new SDI( gene5, species5 ); @@ -9108,15 +11592,15 @@ public final class Test { // Trees from Louxin Zhang 1997 "On a Mirkin-Muchnik-Smith // Conjecture for Comparing Molecular Phylogenies" // J. of Comput Bio. Vol. 4, No 2, pp.177-187 - final Phylogeny species6 = factory - .create( "(((1:[&&NHX:S=1],5:[&&NHX:S=5])1-5,((4:[&&NHX:S=4],6:[&&NHX:S=6])4-6,2:[&&NHX:S=2])4-6-2)1-5-4-6-2," - + "((9:[&&NHX:S=9],3:[&&NHX:S=3])9-3,(8:[&&NHX:S=8],7:[&&NHX:S=7])8-7)9-3-8-7)", - new NHXParser() )[ 0 ]; - final Phylogeny gene6 = factory - .create( "(((1:0.1[&&NHX:S=1],2:0.1[&&NHX:S=2])1-2:0.1,3:0.1[&&NHX:S=3])1-2-3:0.1," - + "((4:0.1[&&NHX:S=4],(5:0.1[&&NHX:S=5],6:0.1[&&NHX:S=6])5-6:0.1)4-5-6:0.1," - + "(7:0.1[&&NHX:S=7],(8:0.1[&&NHX:S=8],9:0.1[&&NHX:S=9])8-9:0.1)7-8-9:0.1)4-5-6-7-8-9:0.1)r;", - new NHXParser() )[ 0 ]; + final Phylogeny species6 = factory.create( + "(((1:[&&NHX:S=1],5:[&&NHX:S=5])1-5,((4:[&&NHX:S=4],6:[&&NHX:S=6])4-6,2:[&&NHX:S=2])4-6-2)1-5-4-6-2," + + "((9:[&&NHX:S=9],3:[&&NHX:S=3])9-3,(8:[&&NHX:S=8],7:[&&NHX:S=7])8-7)9-3-8-7)", + new NHXParser() )[ 0 ]; + final Phylogeny gene6 = factory.create( + "(((1:0.1[&&NHX:S=1],2:0.1[&&NHX:S=2])1-2:0.1,3:0.1[&&NHX:S=3])1-2-3:0.1," + + "((4:0.1[&&NHX:S=4],(5:0.1[&&NHX:S=5],6:0.1[&&NHX:S=6])5-6:0.1)4-5-6:0.1," + + "(7:0.1[&&NHX:S=7],(8:0.1[&&NHX:S=8],9:0.1[&&NHX:S=9])8-9:0.1)7-8-9:0.1)4-5-6-7-8-9:0.1)r;", + new NHXParser() )[ 0 ]; species6.setRooted( true ); gene6.setRooted( true ); final SDI sdi6 = new SDI( gene6, species6 ); @@ -9238,7 +11722,8 @@ public final class Test { private static boolean testSDIunrooted() { try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); - final Phylogeny p0 = factory.create( "((((A,B)ab,(C1,C2)cc)abc,D)abcd,(E,F)ef)abcdef", new NHXParser() )[ 0 ]; + final Phylogeny p0 = factory.create( "((((A,B)ab,(C1,C2)cc)abc,D)abcd,(E,F)ef)abcdef", + new NHXParser() )[ 0 ]; final List l = SDIR.getBranchesInPreorder( p0 ); final Iterator iter = l.iterator(); PhylogenyBranch br = iter.next(); @@ -9403,12 +11888,12 @@ public final class Test { if ( iter2.hasNext() ) { return false; } - final Phylogeny species0 = factory - .create( "(((([&&NHX:S=A],[&&NHX:S=B]),[&&NHX:S=C]),[&&NHX:S=D]),([&&NHX:S=E],[&&NHX:S=F]))", - new NHXParser() )[ 0 ]; - final Phylogeny gene1 = factory - .create( "(((((A:0.6[&&NHX:S=A],B:0.1[&&NHX:S=B])ab:0.1,C:0.1[&&NHX:S=C])abc:0.3,D:1.0[&&NHX:S=D])abcd:0.2,E:0.1[&&NHX:S=E])abcde:0.2,F:0.2[&&NHX:S=F])", - new NHXParser() )[ 0 ]; + final Phylogeny species0 = factory.create( + "(((([&&NHX:S=A],[&&NHX:S=B]),[&&NHX:S=C]),[&&NHX:S=D]),([&&NHX:S=E],[&&NHX:S=F]))", + new NHXParser() )[ 0 ]; + final Phylogeny gene1 = factory.create( + "(((((A:0.6[&&NHX:S=A],B:0.1[&&NHX:S=B])ab:0.1,C:0.1[&&NHX:S=C])abc:0.3,D:1.0[&&NHX:S=D])abcd:0.2,E:0.1[&&NHX:S=E])abcde:0.2,F:0.2[&&NHX:S=F])", + new NHXParser() )[ 0 ]; species0.setRooted( true ); gene1.setRooted( true ); final SDIR sdi_unrooted = new SDIR(); @@ -9428,9 +11913,9 @@ public final class Test { if ( sdi_unrooted.getMinimalMappingCost() != Integer.MAX_VALUE ) { return false; } - final Phylogeny gene2 = factory - .create( "(((((A:2.6[&&NHX:S=A],B:0.1[&&NHX:S=B])ab:0.1,C:0.1[&&NHX:S=C])abc:0.3,D:1.0[&&NHX:S=D])abcd:0.2,E:0.1[&&NHX:S=E])abcde:0.2,F:0.2[&&NHX:S=F])", - new NHXParser() )[ 0 ]; + final Phylogeny gene2 = factory.create( + "(((((A:2.6[&&NHX:S=A],B:0.1[&&NHX:S=B])ab:0.1,C:0.1[&&NHX:S=C])abc:0.3,D:1.0[&&NHX:S=D])abcd:0.2,E:0.1[&&NHX:S=E])abcde:0.2,F:0.2[&&NHX:S=F])", + new NHXParser() )[ 0 ]; gene2.setRooted( true ); sdi_unrooted.infer( gene2, species0, false, false, true, true, 10 ); if ( sdi_unrooted.getCount() != 1 ) { @@ -9448,17 +11933,17 @@ public final class Test { if ( sdi_unrooted.getMinimalMappingCost() != Integer.MAX_VALUE ) { return false; } - final Phylogeny species6 = factory - .create( "(((1:[&&NHX:S=1],5:[&&NHX:S=5])1-5,((4:[&&NHX:S=4],6:[&&NHX:S=6])4-6,2:[&&NHX:S=2])4-6-2)1-5-4-6-2," - + "((9:[&&NHX:S=9],3:[&&NHX:S=3])9-3,(8:[&&NHX:S=8],7:[&&NHX:S=7])8-7)9-3-8-7)", - new NHXParser() )[ 0 ]; - final Phylogeny gene6 = factory - .create( "((5:0.1[&&NHX:S=5],6:0.1[&&NHX:S=6])5-6:0.05[&&NHX:S=6],(4:0.1[&&NHX:S=4]," - + "(((1:0.1[&&NHX:S=1],2:0.1[&&NHX:S=2])1-2:0.1[&&NHX:S=2],3:0.25[&&NHX:S=3])1-2-3:0.2[&&NHX:S=2]," - + "(7:0.1[&&NHX:S=7],(8:0.1[&&NHX:S=8]," - + "9:0.1[&&NHX:S=9])8-9:0.1[&&NHX:S=9])7-8-9:0.1[&&NHX:S=8])" - + "4-5-6-7-8-9:0.1[&&NHX:S=5])4-5-6:0.05[&&NHX:S=5])", - new NHXParser() )[ 0 ]; + final Phylogeny species6 = factory.create( + "(((1:[&&NHX:S=1],5:[&&NHX:S=5])1-5,((4:[&&NHX:S=4],6:[&&NHX:S=6])4-6,2:[&&NHX:S=2])4-6-2)1-5-4-6-2," + + "((9:[&&NHX:S=9],3:[&&NHX:S=3])9-3,(8:[&&NHX:S=8],7:[&&NHX:S=7])8-7)9-3-8-7)", + new NHXParser() )[ 0 ]; + final Phylogeny gene6 = factory.create( + "((5:0.1[&&NHX:S=5],6:0.1[&&NHX:S=6])5-6:0.05[&&NHX:S=6],(4:0.1[&&NHX:S=4]," + + "(((1:0.1[&&NHX:S=1],2:0.1[&&NHX:S=2])1-2:0.1[&&NHX:S=2],3:0.25[&&NHX:S=3])1-2-3:0.2[&&NHX:S=2]," + + "(7:0.1[&&NHX:S=7],(8:0.1[&&NHX:S=8]," + + "9:0.1[&&NHX:S=9])8-9:0.1[&&NHX:S=9])7-8-9:0.1[&&NHX:S=8])" + + "4-5-6-7-8-9:0.1[&&NHX:S=5])4-5-6:0.05[&&NHX:S=5])", + new NHXParser() )[ 0 ]; species6.setRooted( true ); gene6.setRooted( true ); Phylogeny[] p6 = sdi_unrooted.infer( gene6, species6, false, true, true, true, 10 ); @@ -9486,133 +11971,318 @@ public final class Test { if ( !p6[ 0 ].getNode( "7-8-9" ).isDuplication() ) { return false; } - if ( p6[ 0 ].getNode( "1-2" ).isDuplication() ) { + if ( p6[ 0 ].getNode( "1-2" ).isDuplication() ) { + return false; + } + if ( p6[ 0 ].getNode( "1-2-3" ).isDuplication() ) { + return false; + } + if ( p6[ 0 ].getNode( "5-6" ).isDuplication() ) { + return false; + } + if ( p6[ 0 ].getNode( "8-9" ).isDuplication() ) { + return false; + } + if ( p6[ 0 ].getNode( "4-5-6-7-8-9" ).isDuplication() ) { + return false; + } + p6 = null; + final Phylogeny species7 = factory.create( + "(((1:[&&NHX:S=1],5:[&&NHX:S=5])1-5,((4:[&&NHX:S=4],6:[&&NHX:S=6])4-6,2:[&&NHX:S=2])4-6-2)1-5-4-6-2," + + "((9:[&&NHX:S=9],3:[&&NHX:S=3])9-3,(8:[&&NHX:S=8],7:[&&NHX:S=7])8-7)9-3-8-7)", + new NHXParser() )[ 0 ]; + final Phylogeny gene7 = factory.create( + "((5:0.1[&&NHX:S=5],6:0.1[&&NHX:S=6])5-6:0.05[&&NHX:S=6],(4:0.1[&&NHX:S=4]," + + "(((1:0.1[&&NHX:S=1],2:0.1[&&NHX:S=2])1-2:0.1[&&NHX:S=2],3:0.25[&&NHX:S=3])1-2-3:0.2[&&NHX:S=2]," + + "(7:0.1[&&NHX:S=7],(8:0.1[&&NHX:S=8]," + + "9:0.1[&&NHX:S=9])8-9:0.1[&&NHX:S=9])7-8-9:0.1[&&NHX:S=8])" + + "4-5-6-7-8-9:0.1[&&NHX:S=5])4-5-6:0.05[&&NHX:S=5])", + new NHXParser() )[ 0 ]; + species7.setRooted( true ); + gene7.setRooted( true ); + Phylogeny[] p7 = sdi_unrooted.infer( gene7, species7, true, true, true, true, 10 ); + if ( sdi_unrooted.getCount() != 1 ) { + return false; + } + if ( !Test.isEqual( sdi_unrooted.getMinimalDiffInSubTreeHeights(), 0.0 ) ) { + return false; + } + if ( !Test.isEqual( sdi_unrooted.getMinimalTreeHeight(), 0.375 ) ) { + return false; + } + if ( sdi_unrooted.getMinimalDuplications() != 3 ) { + return false; + } + if ( sdi_unrooted.getMinimalMappingCost() != 17 ) { + return false; + } + if ( !p7[ 0 ].getRoot().isDuplication() ) { + return false; + } + if ( !p7[ 0 ].getNode( "4-5-6" ).isDuplication() ) { + return false; + } + if ( !p7[ 0 ].getNode( "7-8-9" ).isDuplication() ) { + return false; + } + if ( p7[ 0 ].getNode( "1-2" ).isDuplication() ) { + return false; + } + if ( p7[ 0 ].getNode( "1-2-3" ).isDuplication() ) { + return false; + } + if ( p7[ 0 ].getNode( "5-6" ).isDuplication() ) { + return false; + } + if ( p7[ 0 ].getNode( "8-9" ).isDuplication() ) { + return false; + } + if ( p7[ 0 ].getNode( "4-5-6-7-8-9" ).isDuplication() ) { + return false; + } + p7 = null; + final Phylogeny species8 = factory.create( + "(((1:[&&NHX:S=1],5:[&&NHX:S=5])1-5,((4:[&&NHX:S=4],6:[&&NHX:S=6])4-6,2:[&&NHX:S=2])4-6-2)1-5-4-6-2," + + "((9:[&&NHX:S=9],3:[&&NHX:S=3])9-3,(8:[&&NHX:S=8],7:[&&NHX:S=7])8-7)9-3-8-7)", + new NHXParser() )[ 0 ]; + final Phylogeny gene8 = factory.create( + "((5:0.1[&&NHX:S=5],6:0.1[&&NHX:S=6])5-6:0.05[&&NHX:S=6],(4:0.1[&&NHX:S=4]," + + "(((1:0.1[&&NHX:S=1],2:0.1[&&NHX:S=2])1-2:0.1[&&NHX:S=2],3:0.25[&&NHX:S=3])1-2-3:0.2[&&NHX:S=2]," + + "(7:0.1[&&NHX:S=7],(8:0.1[&&NHX:S=8]," + + "9:0.1[&&NHX:S=9])8-9:0.1[&&NHX:S=9])7-8-9:0.1[&&NHX:S=8])" + + "4-5-6-7-8-9:0.1[&&NHX:S=5])4-5-6:0.05[&&NHX:S=5])", + new NHXParser() )[ 0 ]; + species8.setRooted( true ); + gene8.setRooted( true ); + Phylogeny[] p8 = sdi_unrooted.infer( gene8, species8, false, false, true, true, 10 ); + if ( sdi_unrooted.getCount() != 1 ) { + return false; + } + if ( !Test.isEqual( sdi_unrooted.getMinimalDiffInSubTreeHeights(), 0.0 ) ) { + return false; + } + if ( !Test.isEqual( sdi_unrooted.getMinimalTreeHeight(), 0.375 ) ) { + return false; + } + if ( sdi_unrooted.getMinimalDuplications() != 3 ) { + return false; + } + if ( sdi_unrooted.getMinimalMappingCost() != Integer.MAX_VALUE ) { + return false; + } + if ( !p8[ 0 ].getRoot().isDuplication() ) { + return false; + } + if ( !p8[ 0 ].getNode( "4-5-6" ).isDuplication() ) { + return false; + } + if ( !p8[ 0 ].getNode( "7-8-9" ).isDuplication() ) { + return false; + } + if ( p8[ 0 ].getNode( "1-2" ).isDuplication() ) { + return false; + } + if ( p8[ 0 ].getNode( "1-2-3" ).isDuplication() ) { + return false; + } + if ( p8[ 0 ].getNode( "5-6" ).isDuplication() ) { + return false; + } + if ( p8[ 0 ].getNode( "8-9" ).isDuplication() ) { return false; } - if ( p6[ 0 ].getNode( "1-2-3" ).isDuplication() ) { + if ( p8[ 0 ].getNode( "4-5-6-7-8-9" ).isDuplication() ) { return false; } - if ( p6[ 0 ].getNode( "5-6" ).isDuplication() ) { + p8 = null; + } + catch ( final Exception e ) { + e.printStackTrace( System.out ); + return false; + } + return true; + } + + private static boolean testSequenceDbWsTools1() { + try { + final PhylogenyNode n = new PhylogenyNode(); + n.setName( "NP_001025424" ); + Accession acc = SequenceDbWsTools.obtainSeqAccession( n ); + if ( ( acc == null ) || !acc.getSource().equals( Source.REFSEQ.toString() ) + || !acc.getValue().equals( "NP_001025424" ) ) { return false; } - if ( p6[ 0 ].getNode( "8-9" ).isDuplication() ) { + n.setName( "340 0559 -- _NP_001025424_dsfdg15 05" ); + acc = SequenceDbWsTools.obtainSeqAccession( n ); + if ( ( acc == null ) || !acc.getSource().equals( Source.REFSEQ.toString() ) + || !acc.getValue().equals( "NP_001025424" ) ) { return false; } - if ( p6[ 0 ].getNode( "4-5-6-7-8-9" ).isDuplication() ) { + n.setName( "NP_001025424.1" ); + acc = SequenceDbWsTools.obtainSeqAccession( n ); + if ( ( acc == null ) || !acc.getSource().equals( Source.REFSEQ.toString() ) + || !acc.getValue().equals( "NP_001025424" ) ) { return false; } - p6 = null; - final Phylogeny species7 = factory - .create( "(((1:[&&NHX:S=1],5:[&&NHX:S=5])1-5,((4:[&&NHX:S=4],6:[&&NHX:S=6])4-6,2:[&&NHX:S=2])4-6-2)1-5-4-6-2," - + "((9:[&&NHX:S=9],3:[&&NHX:S=3])9-3,(8:[&&NHX:S=8],7:[&&NHX:S=7])8-7)9-3-8-7)", - new NHXParser() )[ 0 ]; - final Phylogeny gene7 = factory - .create( "((5:0.1[&&NHX:S=5],6:0.1[&&NHX:S=6])5-6:0.05[&&NHX:S=6],(4:0.1[&&NHX:S=4]," - + "(((1:0.1[&&NHX:S=1],2:0.1[&&NHX:S=2])1-2:0.1[&&NHX:S=2],3:0.25[&&NHX:S=3])1-2-3:0.2[&&NHX:S=2]," - + "(7:0.1[&&NHX:S=7],(8:0.1[&&NHX:S=8]," - + "9:0.1[&&NHX:S=9])8-9:0.1[&&NHX:S=9])7-8-9:0.1[&&NHX:S=8])" - + "4-5-6-7-8-9:0.1[&&NHX:S=5])4-5-6:0.05[&&NHX:S=5])", - new NHXParser() )[ 0 ]; - species7.setRooted( true ); - gene7.setRooted( true ); - Phylogeny[] p7 = sdi_unrooted.infer( gene7, species7, true, true, true, true, 10 ); - if ( sdi_unrooted.getCount() != 1 ) { + n.setName( "NM_001030253" ); + acc = SequenceDbWsTools.obtainSeqAccession( n ); + if ( ( acc == null ) || !acc.getSource().equals( Source.REFSEQ.toString() ) + || !acc.getValue().equals( "NM_001030253" ) ) { return false; } - if ( !Test.isEqual( sdi_unrooted.getMinimalDiffInSubTreeHeights(), 0.0 ) ) { + n.setName( "BCL2_HUMAN" ); + acc = SequenceDbWsTools.obtainSeqAccession( n ); + if ( ( acc == null ) || !acc.getSource().equals( Source.UNIPROT.toString() ) + || !acc.getValue().equals( "BCL2_HUMAN" ) ) { + System.out.println( acc.toString() ); return false; } - if ( !Test.isEqual( sdi_unrooted.getMinimalTreeHeight(), 0.375 ) ) { + n.setName( "P10415" ); + acc = SequenceDbWsTools.obtainSeqAccession( n ); + if ( ( acc == null ) || !acc.getSource().equals( Source.UNIPROT.toString() ) + || !acc.getValue().equals( "P10415" ) ) { + System.out.println( acc.toString() ); return false; } - if ( sdi_unrooted.getMinimalDuplications() != 3 ) { + n.setName( " P10415 " ); + acc = SequenceDbWsTools.obtainSeqAccession( n ); + if ( ( acc == null ) || !acc.getSource().equals( Source.UNIPROT.toString() ) + || !acc.getValue().equals( "P10415" ) ) { + System.out.println( acc.toString() ); return false; } - if ( sdi_unrooted.getMinimalMappingCost() != 17 ) { + n.setName( "_P10415|" ); + acc = SequenceDbWsTools.obtainSeqAccession( n ); + if ( ( acc == null ) || !acc.getSource().equals( Source.UNIPROT.toString() ) + || !acc.getValue().equals( "P10415" ) ) { + System.out.println( acc.toString() ); return false; } - if ( !p7[ 0 ].getRoot().isDuplication() ) { + n.setName( "AY695820" ); + acc = SequenceDbWsTools.obtainSeqAccession( n ); + if ( ( acc == null ) || !acc.getSource().equals( Source.NCBI.toString() ) + || !acc.getValue().equals( "AY695820" ) ) { + System.out.println( acc.toString() ); return false; } - if ( !p7[ 0 ].getNode( "4-5-6" ).isDuplication() ) { + n.setName( "_AY695820_" ); + acc = SequenceDbWsTools.obtainSeqAccession( n ); + if ( ( acc == null ) || !acc.getSource().equals( Source.NCBI.toString() ) + || !acc.getValue().equals( "AY695820" ) ) { + System.out.println( acc.toString() ); return false; } - if ( !p7[ 0 ].getNode( "7-8-9" ).isDuplication() ) { + n.setName( "AAA59452" ); + acc = SequenceDbWsTools.obtainSeqAccession( n ); + if ( ( acc == null ) || !acc.getSource().equals( Source.NCBI.toString() ) + || !acc.getValue().equals( "AAA59452" ) ) { + System.out.println( acc.toString() ); return false; } - if ( p7[ 0 ].getNode( "1-2" ).isDuplication() ) { + n.setName( "_AAA59452_" ); + acc = SequenceDbWsTools.obtainSeqAccession( n ); + if ( ( acc == null ) || !acc.getSource().equals( Source.NCBI.toString() ) + || !acc.getValue().equals( "AAA59452" ) ) { + System.out.println( acc.toString() ); return false; } - if ( p7[ 0 ].getNode( "1-2-3" ).isDuplication() ) { + n.setName( "AAA59452.1" ); + acc = SequenceDbWsTools.obtainSeqAccession( n ); + if ( ( acc == null ) || !acc.getSource().equals( Source.NCBI.toString() ) + || !acc.getValue().equals( "AAA59452.1" ) ) { + System.out.println( acc.toString() ); return false; } - if ( p7[ 0 ].getNode( "5-6" ).isDuplication() ) { + n.setName( "_AAA59452.1_" ); + acc = SequenceDbWsTools.obtainSeqAccession( n ); + if ( ( acc == null ) || !acc.getSource().equals( Source.NCBI.toString() ) + || !acc.getValue().equals( "AAA59452.1" ) ) { + System.out.println( acc.toString() ); return false; } - if ( p7[ 0 ].getNode( "8-9" ).isDuplication() ) { + n.setName( "GI:94894583" ); + acc = SequenceDbWsTools.obtainSeqAccession( n ); + if ( ( acc == null ) || !acc.getSource().equals( Source.GI.toString() ) + || !acc.getValue().equals( "94894583" ) ) { + System.out.println( acc.toString() ); return false; } - if ( p7[ 0 ].getNode( "4-5-6-7-8-9" ).isDuplication() ) { + n.setName( "gi|71845847|1,4-alpha-glucan branching enzyme [Dechloromonas aromatica RCB]" ); + acc = SequenceDbWsTools.obtainSeqAccession( n ); + if ( ( acc == null ) || !acc.getSource().equals( Source.GI.toString() ) + || !acc.getValue().equals( "71845847" ) ) { + System.out.println( acc.toString() ); return false; } - p7 = null; - final Phylogeny species8 = factory - .create( "(((1:[&&NHX:S=1],5:[&&NHX:S=5])1-5,((4:[&&NHX:S=4],6:[&&NHX:S=6])4-6,2:[&&NHX:S=2])4-6-2)1-5-4-6-2," - + "((9:[&&NHX:S=9],3:[&&NHX:S=3])9-3,(8:[&&NHX:S=8],7:[&&NHX:S=7])8-7)9-3-8-7)", - new NHXParser() )[ 0 ]; - final Phylogeny gene8 = factory - .create( "((5:0.1[&&NHX:S=5],6:0.1[&&NHX:S=6])5-6:0.05[&&NHX:S=6],(4:0.1[&&NHX:S=4]," - + "(((1:0.1[&&NHX:S=1],2:0.1[&&NHX:S=2])1-2:0.1[&&NHX:S=2],3:0.25[&&NHX:S=3])1-2-3:0.2[&&NHX:S=2]," - + "(7:0.1[&&NHX:S=7],(8:0.1[&&NHX:S=8]," - + "9:0.1[&&NHX:S=9])8-9:0.1[&&NHX:S=9])7-8-9:0.1[&&NHX:S=8])" - + "4-5-6-7-8-9:0.1[&&NHX:S=5])4-5-6:0.05[&&NHX:S=5])", - new NHXParser() )[ 0 ]; - species8.setRooted( true ); - gene8.setRooted( true ); - Phylogeny[] p8 = sdi_unrooted.infer( gene8, species8, false, false, true, true, 10 ); - if ( sdi_unrooted.getCount() != 1 ) { + n.setName( "gi|71845847|gb|AAZ45343.1| 1,4-alpha-glucan branching enzyme [Dechloromonas aromatica RCB]" ); + acc = SequenceDbWsTools.obtainSeqAccession( n ); + if ( ( acc == null ) || !acc.getSource().equals( Source.NCBI.toString() ) + || !acc.getValue().equals( "AAZ45343.1" ) ) { + System.out.println( acc.toString() ); return false; } - if ( !Test.isEqual( sdi_unrooted.getMinimalDiffInSubTreeHeights(), 0.0 ) ) { + } + catch ( final Exception e ) { + return false; + } + return true; + } + + private static boolean testSequenceDbWsTools2() { + try { + final PhylogenyNode n1 = new PhylogenyNode( "NP_001025424" ); + SequenceDbWsTools.obtainSeqInformation( n1 ); + if ( !n1.getNodeData().getSequence().getName().equals( "Bcl2" ) ) { return false; } - if ( !Test.isEqual( sdi_unrooted.getMinimalTreeHeight(), 0.375 ) ) { + if ( !n1.getNodeData().getTaxonomy().getScientificName().equals( "Danio rerio" ) ) { return false; } - if ( sdi_unrooted.getMinimalDuplications() != 3 ) { + if ( !n1.getNodeData().getSequence().getAccession().getSource().equals( Source.REFSEQ.toString() ) ) { return false; } - if ( sdi_unrooted.getMinimalMappingCost() != Integer.MAX_VALUE ) { + if ( !n1.getNodeData().getSequence().getAccession().getValue().equals( "NP_001025424" ) ) { return false; } - if ( !p8[ 0 ].getRoot().isDuplication() ) { + final PhylogenyNode n2 = new PhylogenyNode( "NM_001030253" ); + SequenceDbWsTools.obtainSeqInformation( n2 ); + if ( !n2.getNodeData().getSequence().getName() + .equals( "Danio rerio B-cell CLL/lymphoma 2a (bcl2a), mRNA" ) ) { return false; } - if ( !p8[ 0 ].getNode( "4-5-6" ).isDuplication() ) { + if ( !n2.getNodeData().getTaxonomy().getScientificName().equals( "Danio rerio" ) ) { return false; } - if ( !p8[ 0 ].getNode( "7-8-9" ).isDuplication() ) { + if ( !n2.getNodeData().getSequence().getAccession().getSource().equals( Source.REFSEQ.toString() ) ) { return false; } - if ( p8[ 0 ].getNode( "1-2" ).isDuplication() ) { + if ( !n2.getNodeData().getSequence().getAccession().getValue().equals( "NM_001030253" ) ) { return false; } - if ( p8[ 0 ].getNode( "1-2-3" ).isDuplication() ) { + final PhylogenyNode n3 = new PhylogenyNode( "NM_184234.2" ); + SequenceDbWsTools.obtainSeqInformation( n3 ); + if ( !n3.getNodeData().getSequence().getName() + .equals( "Homo sapiens RNA binding motif protein 39 (RBM39), transcript variant 1, mRNA" ) ) { return false; } - if ( p8[ 0 ].getNode( "5-6" ).isDuplication() ) { + if ( !n3.getNodeData().getTaxonomy().getScientificName().equals( "Homo sapiens" ) ) { return false; } - if ( p8[ 0 ].getNode( "8-9" ).isDuplication() ) { + if ( !n3.getNodeData().getSequence().getAccession().getSource().equals( Source.REFSEQ.toString() ) ) { return false; } - if ( p8[ 0 ].getNode( "4-5-6-7-8-9" ).isDuplication() ) { + if ( !n3.getNodeData().getSequence().getAccession().getValue().equals( "NM_184234" ) ) { return false; } - p8 = null; } - catch ( final Exception e ) { + catch ( final IOException e ) { + System.out.println(); + System.out.println( "the following might be due to absence internet connection:" ); e.printStackTrace( System.out ); + return true; + } + catch ( final Exception e ) { + e.printStackTrace(); return false; } return true; @@ -9620,123 +12290,135 @@ public final class Test { private static boolean testSequenceIdParsing() { try { - Identifier id = SequenceIdParser.parse( "gb_ADF31344_segmented_worms_" ); - if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getProvider() ) - || !id.getValue().equals( "ADF31344" ) || !id.getProvider().equals( "ncbi" ) ) { + Accession id = SequenceAccessionTools.parseAccessorFromString( "gb_ADF31344_segmented_worms_" ); + if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getSource() ) + || !id.getValue().equals( "ADF31344" ) || !id.getSource().equals( "ncbi" ) ) { if ( id != null ) { System.out.println( "value =" + id.getValue() ); - System.out.println( "provider=" + id.getProvider() ); + System.out.println( "provider=" + id.getSource() ); } return false; } - // - id = SequenceIdParser.parse( "segmented worms|gb_ADF31344" ); - if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getProvider() ) - || !id.getValue().equals( "ADF31344" ) || !id.getProvider().equals( "ncbi" ) ) { + id = SequenceAccessionTools.parseAccessorFromString( "segmented worms|gb_ADF31344" ); + if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getSource() ) + || !id.getValue().equals( "ADF31344" ) || !id.getSource().equals( "ncbi" ) ) { if ( id != null ) { System.out.println( "value =" + id.getValue() ); - System.out.println( "provider=" + id.getProvider() ); + System.out.println( "provider=" + id.getSource() ); } return false; } - // - id = SequenceIdParser.parse( "segmented worms gb_ADF31344 and more" ); - if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getProvider() ) - || !id.getValue().equals( "ADF31344" ) || !id.getProvider().equals( "ncbi" ) ) { + id = SequenceAccessionTools.parseAccessorFromString( "segmented worms gb_ADF31344 and more" ); + if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getSource() ) + || !id.getValue().equals( "ADF31344" ) || !id.getSource().equals( "ncbi" ) ) { + if ( id != null ) { + System.out.println( "value =" + id.getValue() ); + System.out.println( "provider=" + id.getSource() ); + } + return false; + } + id = SequenceAccessionTools.parseAccessorFromString( "gb_AAA96518_1" ); + if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getSource() ) + || !id.getValue().equals( "AAA96518" ) || !id.getSource().equals( "ncbi" ) ) { + if ( id != null ) { + System.out.println( "value =" + id.getValue() ); + System.out.println( "provider=" + id.getSource() ); + } + return false; + } + id = SequenceAccessionTools.parseAccessorFromString( "gb_EHB07727_1_rodents_" ); + if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getSource() ) + || !id.getValue().equals( "EHB07727" ) || !id.getSource().equals( "ncbi" ) ) { if ( id != null ) { System.out.println( "value =" + id.getValue() ); - System.out.println( "provider=" + id.getProvider() ); + System.out.println( "provider=" + id.getSource() ); } return false; } - // - id = SequenceIdParser.parse( "gb_AAA96518_1" ); - if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getProvider() ) - || !id.getValue().equals( "AAA96518" ) || !id.getProvider().equals( "ncbi" ) ) { + id = SequenceAccessionTools.parseAccessorFromString( "dbj_BAF37827_1_turtles_" ); + if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getSource() ) + || !id.getValue().equals( "BAF37827" ) || !id.getSource().equals( "ncbi" ) ) { if ( id != null ) { System.out.println( "value =" + id.getValue() ); - System.out.println( "provider=" + id.getProvider() ); + System.out.println( "provider=" + id.getSource() ); } return false; } - // - id = SequenceIdParser.parse( "gb_EHB07727_1_rodents_" ); - if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getProvider() ) - || !id.getValue().equals( "EHB07727" ) || !id.getProvider().equals( "ncbi" ) ) { + id = SequenceAccessionTools.parseAccessorFromString( "emb_CAA73223_1_primates_" ); + if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getSource() ) + || !id.getValue().equals( "CAA73223" ) || !id.getSource().equals( "ncbi" ) ) { if ( id != null ) { System.out.println( "value =" + id.getValue() ); - System.out.println( "provider=" + id.getProvider() ); + System.out.println( "provider=" + id.getSource() ); } return false; } - // - id = SequenceIdParser.parse( "dbj_BAF37827_1_turtles_" ); - if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getProvider() ) - || !id.getValue().equals( "BAF37827" ) || !id.getProvider().equals( "ncbi" ) ) { + id = SequenceAccessionTools.parseAccessorFromString( "mites|ref_XP_002434188_1" ); + if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getSource() ) + || !id.getValue().equals( "XP_002434188" ) || !id.getSource().equals( "refseq" ) ) { if ( id != null ) { System.out.println( "value =" + id.getValue() ); - System.out.println( "provider=" + id.getProvider() ); + System.out.println( "provider=" + id.getSource() ); } return false; } - // - id = SequenceIdParser.parse( "emb_CAA73223_1_primates_" ); - if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getProvider() ) - || !id.getValue().equals( "CAA73223" ) || !id.getProvider().equals( "ncbi" ) ) { + id = SequenceAccessionTools.parseAccessorFromString( "mites_ref_XP_002434188_1_bla_XP_12345" ); + if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getSource() ) + || !id.getValue().equals( "XP_002434188" ) || !id.getSource().equals( "refseq" ) ) { if ( id != null ) { System.out.println( "value =" + id.getValue() ); - System.out.println( "provider=" + id.getProvider() ); + System.out.println( "provider=" + id.getSource() ); } return false; } - // - id = SequenceIdParser.parse( "mites|ref_XP_002434188_1" ); - if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getProvider() ) - || !id.getValue().equals( "XP_002434188" ) || !id.getProvider().equals( "refseq" ) ) { + id = SequenceAccessionTools.parseAccessorFromString( "P4A123" ); + if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getSource() ) + || !id.getValue().equals( "P4A123" ) || !id.getSource().equals( "uniprot" ) ) { if ( id != null ) { System.out.println( "value =" + id.getValue() ); - System.out.println( "provider=" + id.getProvider() ); + System.out.println( "provider=" + id.getSource() ); } return false; } - // - id = SequenceIdParser.parse( "mites_ref_XP_002434188_1_bla_XP_12345" ); - if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getProvider() ) - || !id.getValue().equals( "XP_002434188" ) || !id.getProvider().equals( "refseq" ) ) { + id = SequenceAccessionTools.parseAccessorFromString( "XP_12345" ); + if ( id != null ) { + System.out.println( "value =" + id.getValue() ); + System.out.println( "provider=" + id.getSource() ); + return false; + } + id = SequenceAccessionTools.parseAccessorFromString( "N3B004Z009" ); + if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getSource() ) + || !id.getValue().equals( "N3B004Z009" ) || !id.getSource().equals( "uniprot" ) ) { if ( id != null ) { System.out.println( "value =" + id.getValue() ); - System.out.println( "provider=" + id.getProvider() ); + System.out.println( "provider=" + id.getSource() ); } return false; } - // - id = SequenceIdParser.parse( "P4A123" ); - if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getProvider() ) - || !id.getValue().equals( "P4A123" ) || !id.getProvider().equals( "sp" ) ) { + id = SequenceAccessionTools.parseAccessorFromString( "A4CAA4ZBB9" ); + if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getSource() ) + || !id.getValue().equals( "A4CAA4ZBB9" ) || !id.getSource().equals( "uniprot" ) ) { if ( id != null ) { System.out.println( "value =" + id.getValue() ); - System.out.println( "provider=" + id.getProvider() ); + System.out.println( "provider=" + id.getSource() ); } return false; } - // - id = SequenceIdParser.parse( "pllf[pok P4A123_osdjfosnqo035-9233332904i000490 vf tmv x45" ); - if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getProvider() ) - || !id.getValue().equals( "P4A123" ) || !id.getProvider().equals( "sp" ) ) { + id = SequenceAccessionTools.parseAccessorFromString( "ecoli_A4CAA4ZBB9_rt" ); + if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getSource() ) + || !id.getValue().equals( "A4CAA4ZBB9" ) || !id.getSource().equals( "uniprot" ) ) { if ( id != null ) { System.out.println( "value =" + id.getValue() ); - System.out.println( "provider=" + id.getProvider() ); + System.out.println( "provider=" + id.getSource() ); } return false; } - // - id = SequenceIdParser.parse( "XP_12345" ); + id = SequenceAccessionTools.parseAccessorFromString( "Q4CAA4ZBB9" ); if ( id != null ) { System.out.println( "value =" + id.getValue() ); - System.out.println( "provider=" + id.getProvider() ); + System.out.println( "provider=" + id.getSource() ); return false; } - // lcl_91970_unknown_ } catch ( final Exception e ) { e.printStackTrace( System.out ); @@ -9908,14 +12590,12 @@ public final class Test { if ( !s0.match( query_nodes ) ) { return false; } - // query_nodes = new HashSet(); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "F" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "G" ) ); if ( !s0.match( query_nodes ) ) { return false; } - // query_nodes = new HashSet(); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "E" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "D" ) ); @@ -9925,7 +12605,6 @@ public final class Test { if ( !s0.match( query_nodes ) ) { return false; } - // query_nodes = new HashSet(); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "F" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "G" ) ); @@ -9933,7 +12612,6 @@ public final class Test { if ( !s0.match( query_nodes ) ) { return false; } - // query_nodes = new HashSet(); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "F" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "G" ) ); @@ -9942,14 +12620,12 @@ public final class Test { if ( !s0.match( query_nodes ) ) { return false; } - // query_nodes = new HashSet(); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "F" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); if ( s0.match( query_nodes ) ) { return false; } - // query_nodes = new HashSet(); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "E" ) ); @@ -9958,7 +12634,6 @@ public final class Test { if ( s0.match( query_nodes ) ) { return false; } - // query_nodes = new HashSet(); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "F" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "G" ) ); @@ -9968,7 +12643,6 @@ public final class Test { if ( s0.match( query_nodes ) ) { return false; } - // query_nodes = new HashSet(); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "B" ) ); @@ -9976,49 +12650,42 @@ public final class Test { if ( s0.match( query_nodes ) ) { return false; } - // query_nodes = new HashSet(); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "D" ) ); if ( s0.match( query_nodes ) ) { return false; } - // query_nodes = new HashSet(); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "B" ) ); if ( s0.match( query_nodes ) ) { return false; } - // query_nodes = new HashSet(); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "C" ) ); if ( s0.match( query_nodes ) ) { return false; } - // query_nodes = new HashSet(); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "E" ) ); if ( s0.match( query_nodes ) ) { return false; } - // query_nodes = new HashSet(); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "F" ) ); if ( s0.match( query_nodes ) ) { return false; } - // query_nodes = new HashSet(); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "G" ) ); if ( s0.match( query_nodes ) ) { return false; } - // query_nodes = new HashSet(); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "F" ) ); @@ -10026,7 +12693,6 @@ public final class Test { if ( s0.match( query_nodes ) ) { return false; } - // query_nodes = new HashSet(); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "B" ) ); @@ -10034,7 +12700,6 @@ public final class Test { if ( s0.match( query_nodes ) ) { return false; } - // query_nodes = new HashSet(); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "E" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "D" ) ); @@ -10042,7 +12707,6 @@ public final class Test { if ( s0.match( query_nodes ) ) { return false; } - // query_nodes = new HashSet(); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "E" ) ); query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "D" ) ); @@ -10531,23 +13195,15 @@ public final class Test { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); final Phylogeny t0_1 = factory.create( "(((A,B),C),(D,E))", new NHXParser() )[ 0 ]; final Phylogeny[] phylogenies_1 = factory.create( "(((A,B),C),(D,E)) " + "(((C,B),A),(D,E))" - + "(((A,B),C),(D,E)) " + "(((A,B),C),(D,E))" - + "(((A,B),C),(D,E))" + "(((C,B),A),(D,E))" - + "(((E,B),D),(C,A))" + "(((C,B),A),(D,E))" - + "(((A,B),C),(D,E))" + "(((A,B),C),(D,E))", + + "(((A,B),C),(D,E)) " + "(((A,B),C),(D,E))" + "(((A,B),C),(D,E))" + "(((C,B),A),(D,E))" + + "(((E,B),D),(C,A))" + "(((C,B),A),(D,E))" + "(((A,B),C),(D,E))" + "(((A,B),C),(D,E))", new NHXParser() ); SupportCount.count( t0_1, phylogenies_1, true, false ); final Phylogeny t0_2 = factory.create( "(((((A,B),C),D),E),(F,G))", new NHXParser() )[ 0 ]; final Phylogeny[] phylogenies_2 = factory.create( "(((((A,B),C),D),E),(F,G))" - + "(((((A,B),C),D),E),((F,G),X))" - + "(((((A,Y),B),C),D),((F,G),E))" - + "(((((A,B),C),D),E),(F,G))" - + "(((((A,B),C),D),E),(F,G))" - + "(((((A,B),C),D),E),(F,G))" - + "(((((A,B),C),D),E),(F,G),Z)" - + "(((((A,B),C),D),E),(F,G))" - + "((((((A,B),C),D),E),F),G)" - + "(((((X,Y),F,G),E),((A,B),C)),D)", + + "(((((A,B),C),D),E),((F,G),X))" + "(((((A,Y),B),C),D),((F,G),E))" + "(((((A,B),C),D),E),(F,G))" + + "(((((A,B),C),D),E),(F,G))" + "(((((A,B),C),D),E),(F,G))" + "(((((A,B),C),D),E),(F,G),Z)" + + "(((((A,B),C),D),E),(F,G))" + "((((((A,B),C),D),E),F),G)" + "(((((X,Y),F,G),E),((A,B),C)),D)", new NHXParser() ); SupportCount.count( t0_2, phylogenies_2, true, false ); final PhylogenyNodeIterator it = t0_2.iteratorPostorder(); @@ -10593,8 +13249,8 @@ public final class Test { return false; } final Phylogeny t0_4 = factory.create( "(((((A,B)1,C)2,D)3,E)4,F)", new NHXParser() )[ 0 ]; - final Phylogeny[] phylogenies_4 = factory.create( "((((((A,X),C),B),D),E),F) " - + "(((A,B,Z),C,Q),(((D,Y),E),F))", new NHXParser() ); + final Phylogeny[] phylogenies_4 = factory + .create( "((((((A,X),C),B),D),E),F) " + "(((A,B,Z),C,Q),(((D,Y),E),F))", new NHXParser() ); SupportCount.count( t0_4, phylogenies_4, true, false ); t0_4.reRoot( t0_4.getNode( "F" ).getId() ); if ( PhylogenyMethods.getConfidenceValue( t0_4.getNode( "1" ) ) != 1 ) { @@ -10664,8 +13320,9 @@ public final class Test { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); final Phylogeny p1 = factory.create( "(((A,B)ab:97,C)abc:57,((D,E)de:10,(F,G)fg:50,(H,I)hi:64)defghi)", new NHXParser() )[ 0 ]; - final Phylogeny p2 = factory - .create( "(((A:0.1,B:0.3)ab:0.4,C)abc:0.5,((D,E)de,(F,G)fg,(H,I)hi:0.59)defghi)", new NHXParser() )[ 0 ]; + final Phylogeny p2 = factory.create( + "(((A:0.1,B:0.3)ab:0.4,C)abc:0.5,((D,E)de,(F,G)fg,(H,I)hi:0.59)defghi)", + new NHXParser() )[ 0 ]; if ( PhylogenyMethods.getConfidenceValue( p2.getNode( "ab" ) ) >= 0.0 ) { return false; } @@ -10725,7 +13382,7 @@ public final class Test { return false; } final PhylogenyNode n3 = PhylogenyNode - .createInstanceFromNhxString( "blag_12345", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); + .createInstanceFromNhxString( "BLAGG_12345", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); if ( !n3.getNodeData().getTaxonomy().getIdentifier().getValue().equals( "12345" ) ) { System.out.println( n3.toString() ); return false; @@ -10743,66 +13400,170 @@ public final class Test { return false; } final PhylogenyNode n6 = PhylogenyNode - .createInstanceFromNhxString( "blag-12345-blag", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); + .createInstanceFromNhxString( "BLAGG-12345-blag", + NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); if ( n6.getNodeData().isHasTaxonomy() ) { System.out.println( n6.toString() ); return false; } final PhylogenyNode n7 = PhylogenyNode - .createInstanceFromNhxString( "blag-12345_blag", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); + .createInstanceFromNhxString( "BL-12345_blag", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); if ( n7.getNodeData().isHasTaxonomy() ) { System.out.println( n7.toString() ); return false; } final PhylogenyNode n8 = PhylogenyNode - .createInstanceFromNhxString( "blag_12345-blag", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); + .createInstanceFromNhxString( "BLAGG_12345-blag", + NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); if ( !n8.getNodeData().getTaxonomy().getIdentifier().getValue().equals( "12345" ) ) { System.out.println( n8.toString() ); return false; } final PhylogenyNode n9 = PhylogenyNode - .createInstanceFromNhxString( "blag_12345/blag", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); + .createInstanceFromNhxString( "BLAGG_12345/blag", + NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); if ( !n9.getNodeData().getTaxonomy().getIdentifier().getValue().equals( "12345" ) ) { System.out.println( n9.toString() ); return false; } final PhylogenyNode n10x = PhylogenyNode - .createInstanceFromNhxString( "blag_12X45-blag", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); + .createInstanceFromNhxString( "BLAG!_12X45-blag", + NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); if ( n10x.getNodeData().isHasTaxonomy() ) { System.out.println( n10x.toString() ); return false; } final PhylogenyNode n10xx = PhylogenyNode - .createInstanceFromNhxString( "blag_1YX45-blag", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); + .createInstanceFromNhxString( "BLAG!_1YX45-blag", + NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); if ( n10xx.getNodeData().isHasTaxonomy() ) { System.out.println( n10xx.toString() ); return false; } final PhylogenyNode n10 = PhylogenyNode - .createInstanceFromNhxString( "blag_9YX45-blag", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); + .createInstanceFromNhxString( "BLAGG_9YX45-blag", + NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); if ( !n10.getNodeData().getTaxonomy().getTaxonomyCode().equals( "9YX45" ) ) { System.out.println( n10.toString() ); return false; } + final PhylogenyNode n10v = PhylogenyNode + .createInstanceFromNhxString( "BLAGG_BPM1-blag", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); + if ( !n10v.getNodeData().getTaxonomy().getTaxonomyCode().equals( "BPM1" ) ) { + System.out.println( n10v.toString() ); + return false; + } + final PhylogenyNode n10v2 = PhylogenyNode + .createInstanceFromNhxString( "BLAGG_ABV-blag", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); + if ( !n10v2.getNodeData().getTaxonomy().getTaxonomyCode().equals( "ABV" ) ) { + System.out.println( n10v2.toString() ); + return false; + } final PhylogenyNode n11 = PhylogenyNode - .createInstanceFromNhxString( "BLAG_Mus_musculus", NHXParser.TAXONOMY_EXTRACTION.AGGRESSIVE ); + .createInstanceFromNhxString( "BLAG@_Mus_musculus", NHXParser.TAXONOMY_EXTRACTION.AGGRESSIVE ); if ( !n11.getNodeData().getTaxonomy().getScientificName().equals( "Mus musculus" ) ) { System.out.println( n11.toString() ); return false; } final PhylogenyNode n12 = PhylogenyNode - .createInstanceFromNhxString( "BLAG_Mus_musculus_musculus", + .createInstanceFromNhxString( "BLA_G_Mus_musculus_musculus", NHXParser.TAXONOMY_EXTRACTION.AGGRESSIVE ); if ( !n12.getNodeData().getTaxonomy().getScientificName().equals( "Mus musculus musculus" ) ) { System.out.println( n12.toString() ); return false; } final PhylogenyNode n13 = PhylogenyNode - .createInstanceFromNhxString( "BLAG_Mus_musculus1", NHXParser.TAXONOMY_EXTRACTION.AGGRESSIVE ); + .createInstanceFromNhxString( "BLAaG_Mus_musculus1", NHXParser.TAXONOMY_EXTRACTION.AGGRESSIVE ); if ( n13.getNodeData().isHasTaxonomy() ) { System.out.println( n13.toString() ); return false; } + final PhylogenyNode n14 = PhylogenyNode + .createInstanceFromNhxString( "Mus_musculus_392", NHXParser.TAXONOMY_EXTRACTION.AGGRESSIVE ); + if ( !n14.getNodeData().getTaxonomy().getScientificName().equals( "Mus musculus" ) ) { + System.out.println( n14.toString() ); + return false; + } + final PhylogenyNode n15 = PhylogenyNode + .createInstanceFromNhxString( "Mus_musculus_K392", NHXParser.TAXONOMY_EXTRACTION.AGGRESSIVE ); + if ( !n15.getNodeData().getTaxonomy().getScientificName().equals( "Mus musculus" ) ) { + System.out.println( n15.toString() ); + return false; + } + final PhylogenyNode n16 = PhylogenyNode + .createInstanceFromNhxString( "Mus musculus 392", NHXParser.TAXONOMY_EXTRACTION.AGGRESSIVE ); + if ( !n16.getNodeData().getTaxonomy().getScientificName().equals( "Mus musculus" ) ) { + System.out.println( n16.toString() ); + return false; + } + final PhylogenyNode n17 = PhylogenyNode + .createInstanceFromNhxString( "Mus musculus K392", NHXParser.TAXONOMY_EXTRACTION.AGGRESSIVE ); + if ( !n17.getNodeData().getTaxonomy().getScientificName().equals( "Mus musculus" ) ) { + System.out.println( n17.toString() ); + return false; + } + final PhylogenyNode n18 = PhylogenyNode + .createInstanceFromNhxString( "Mus_musculus_musculus_392", + NHXParser.TAXONOMY_EXTRACTION.AGGRESSIVE ); + if ( !n18.getNodeData().getTaxonomy().getScientificName().equals( "Mus musculus musculus" ) ) { + System.out.println( n18.toString() ); + return false; + } + final PhylogenyNode n19 = PhylogenyNode + .createInstanceFromNhxString( "Mus_musculus_musculus_K392", + NHXParser.TAXONOMY_EXTRACTION.AGGRESSIVE ); + if ( !n19.getNodeData().getTaxonomy().getScientificName().equals( "Mus musculus musculus" ) ) { + System.out.println( n19.toString() ); + return false; + } + final PhylogenyNode n20 = PhylogenyNode + .createInstanceFromNhxString( "Mus musculus musculus 392", + NHXParser.TAXONOMY_EXTRACTION.AGGRESSIVE ); + if ( !n20.getNodeData().getTaxonomy().getScientificName().equals( "Mus musculus musculus" ) ) { + System.out.println( n20.toString() ); + return false; + } + final PhylogenyNode n21 = PhylogenyNode + .createInstanceFromNhxString( "Mus musculus musculus K392", + NHXParser.TAXONOMY_EXTRACTION.AGGRESSIVE ); + if ( !n21.getNodeData().getTaxonomy().getScientificName().equals( "Mus musculus musculus" ) ) { + System.out.println( n21.toString() ); + return false; + } + final PhylogenyNode n23 = PhylogenyNode + .createInstanceFromNhxString( "9EMVE_Nematostella_vectensis", + NHXParser.TAXONOMY_EXTRACTION.AGGRESSIVE ); + if ( !n23.getNodeData().getTaxonomy().getScientificName().equals( "Nematostella vectensis" ) ) { + System.out.println( n23.toString() ); + return false; + } + final PhylogenyNode n24 = PhylogenyNode + .createInstanceFromNhxString( "9EMVE_Nematostella", NHXParser.TAXONOMY_EXTRACTION.AGGRESSIVE ); + if ( !n24.getNodeData().getTaxonomy().getTaxonomyCode().equals( "9EMVE" ) ) { + System.out.println( n24.toString() ); + return false; + } + // + final PhylogenyNode n25 = PhylogenyNode + .createInstanceFromNhxString( "Nematostella_vectensis_NEMVE", + NHXParser.TAXONOMY_EXTRACTION.AGGRESSIVE ); + if ( !n25.getNodeData().getTaxonomy().getTaxonomyCode().equals( "NEMVE" ) ) { + System.out.println( n25.toString() ); + return false; + } + final PhylogenyNode n26 = PhylogenyNode + .createInstanceFromNhxString( "Nematostella_vectensis_9EMVE", + NHXParser.TAXONOMY_EXTRACTION.AGGRESSIVE ); + if ( !n26.getNodeData().getTaxonomy().getScientificName().equals( "Nematostella vectensis" ) ) { + System.out.println( n26.toString() ); + return false; + } + final PhylogenyNode n27 = PhylogenyNode + .createInstanceFromNhxString( "Nematostella_9EMVE", NHXParser.TAXONOMY_EXTRACTION.AGGRESSIVE ); + if ( !n27.getNodeData().getTaxonomy().getTaxonomyCode().equals( "9EMVE" ) ) { + System.out.println( n27.toString() ); + return false; + } } catch ( final Exception e ) { e.printStackTrace( System.out ); @@ -10811,6 +13572,41 @@ public final class Test { return true; } + private static boolean testTreeCopy() { + try { + final String str_0 = "((((a,b),c),d)[&&NHX:S=lizards],e[&&NHX:S=reptiles])r[&&NHX:S=animals]"; + final Phylogeny t0 = Phylogeny.createInstanceFromNhxString( str_0 ); + final Phylogeny t1 = t0.copy(); + if ( !t1.toNewHampshireX().equals( t0.toNewHampshireX() ) ) { + return false; + } + if ( !t1.toNewHampshireX().equals( str_0 ) ) { + return false; + } + t0.deleteSubtree( t0.getNode( "c" ), true ); + t0.deleteSubtree( t0.getNode( "a" ), true ); + t0.getRoot().getNodeData().getTaxonomy().setScientificName( "metazoa" ); + t0.getNode( "b" ).setName( "Bee" ); + if ( !t0.toNewHampshireX().equals( "((Bee,d)[&&NHX:S=lizards],e[&&NHX:S=reptiles])r[&&NHX:S=metazoa]" ) ) { + return false; + } + if ( !t1.toNewHampshireX().equals( str_0 ) ) { + return false; + } + t0.deleteSubtree( t0.getNode( "e" ), true ); + t0.deleteSubtree( t0.getNode( "Bee" ), true ); + t0.deleteSubtree( t0.getNode( "d" ), true ); + if ( !t1.toNewHampshireX().equals( str_0 ) ) { + return false; + } + } + catch ( final Exception e ) { + e.printStackTrace(); + return false; + } + return true; + } + private static boolean testTreeMethods() { try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); @@ -10838,10 +13634,79 @@ public final class Test { } return true; } + + private static boolean testPhylogenyMethods() { + try { + final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); + final Phylogeny t0 = factory.create( "((((A,B)ab,C)abc,D)abcd,E)r", new NHXParser() )[ 0 ]; + + if ( PhylogenyMethods.calculateLevel( t0.getNode( "A" ) ) != 0 ) { + return false; + } + if ( PhylogenyMethods.calculateLevel( t0.getNode( "B" ) ) != 0 ) { + return false; + } + if ( PhylogenyMethods.calculateLevel( t0.getNode( "ab" ) ) != 1 ) { + return false; + } + if ( PhylogenyMethods.calculateLevel( t0.getNode( "C" ) ) != 0 ) { + return false; + } + if ( PhylogenyMethods.calculateLevel( t0.getNode( "abc" ) ) != 2 ) { + return false; + } + if ( PhylogenyMethods.calculateLevel( t0.getNode( "D" ) ) != 0 ) { + return false; + } + if ( PhylogenyMethods.calculateLevel( t0.getNode( "abcd" ) ) != 3 ) { + return false; + } + if ( PhylogenyMethods.calculateLevel( t0.getNode( "E" ) ) != 0 ) { + return false; + } + if ( PhylogenyMethods.calculateLevel( t0.getNode( "r" ) ) != 4 ) { + return false; + } + final Phylogeny t1 = factory.create( "((((A,B)ab,C)abc,D)abcd,E,((((((X)1)2)3)4)5)6)r", new NHXParser() )[ 0 ]; + if ( PhylogenyMethods.calculateLevel( t1.getNode( "r" ) ) != 7 ) { + return false; + } + if ( PhylogenyMethods.calculateLevel( t1.getNode( "X" ) ) != 0 ) { + return false; + } + if ( PhylogenyMethods.calculateLevel( t1.getNode( "6" ) ) != 6 ) { + return false; + } + if ( PhylogenyMethods.calculateLevel( t1.getNode( "5" ) ) != 5 ) { + return false; + } + if ( PhylogenyMethods.calculateLevel( t1.getNode( "4" ) ) != 4 ) { + return false; + } + if ( PhylogenyMethods.calculateLevel( t1.getNode( "3" ) ) != 3 ) { + return false; + } + if ( PhylogenyMethods.calculateLevel( t1.getNode( "2" ) ) != 2 ) { + return false; + } + if ( PhylogenyMethods.calculateLevel( t1.getNode( "1" ) ) != 1 ) { + return false; + } + if ( PhylogenyMethods.calculateLevel( t1.getNode( "abcd" ) ) != 3 ) { + return false; + } + + } + catch ( final Exception e ) { + e.printStackTrace( System.out ); + return false; + } + return true; + } private static boolean testUniprotEntryRetrieval() { try { - final SequenceDatabaseEntry entry = SequenceDbWsTools.obtainUniProtEntry( "P12345", 200 ); + final SequenceDatabaseEntry entry = SequenceDbWsTools.obtainUniProtEntry( "P12345", 5000 ); if ( !entry.getAccession().equals( "P12345" ) ) { return false; } @@ -10860,6 +13725,16 @@ public final class Test { if ( !entry.getTaxonomyIdentifier().equals( "9986" ) ) { return false; } + if ( entry.getMolecularSequence() == null ) { + return false; + } + if ( !entry.getMolecularSequence().getMolecularSequenceAsString() + .startsWith( "MALLHSARVLSGVASAFHPGLAAAASARASSWWAHVEMGPPDPILGVTEAYKRDTNSKKMNLGVGAYRDDNGKPYVLPSVRKAEAQIAAKGLDKEYLPIGGLAEFCRASAELALGENSEV" ) + || !entry.getMolecularSequence().getMolecularSequenceAsString().endsWith( "LAHAIHQVTK" ) ) { + System.out.println( "got: " + entry.getMolecularSequence().getMolecularSequenceAsString() ); + System.out.println( "expected something else." ); + return false; + } } catch ( final IOException e ) { System.out.println(); @@ -10867,6 +13742,10 @@ public final class Test { e.printStackTrace( System.out ); return true; } + catch ( final NullPointerException f ) { + f.printStackTrace( System.out ); + return false; + } catch ( final Exception e ) { return false; } @@ -11056,60 +13935,4 @@ public final class Test { } return true; } - - private static boolean testWabiTxSearch() { - try { - String result = ""; - result = TxSearch.searchSimple( "nematostella" ); - result = TxSearch.getTxId( "nematostella" ); - if ( !result.equals( "45350" ) ) { - return false; - } - result = TxSearch.getTxName( "45350" ); - if ( !result.equals( "Nematostella" ) ) { - return false; - } - result = TxSearch.getTxId( "nematostella vectensis" ); - if ( !result.equals( "45351" ) ) { - return false; - } - result = TxSearch.getTxName( "45351" ); - if ( !result.equals( "Nematostella vectensis" ) ) { - return false; - } - result = TxSearch.getTxId( "Bacillus subtilis subsp. subtilis str. N170" ); - if ( !result.equals( "536089" ) ) { - return false; - } - result = TxSearch.getTxName( "536089" ); - if ( !result.equals( "Bacillus subtilis subsp. subtilis str. N170" ) ) { - return false; - } - final List queries = new ArrayList(); - queries.add( "Campylobacter coli" ); - queries.add( "Escherichia coli" ); - queries.add( "Arabidopsis" ); - queries.add( "Trichoplax" ); - queries.add( "Samanea saman" ); - queries.add( "Kluyveromyces marxianus" ); - queries.add( "Bacillus subtilis subsp. subtilis str. N170" ); - queries.add( "Bornavirus parrot/PDD/2008" ); - final List ranks = new ArrayList(); - ranks.add( RANKS.SUPERKINGDOM ); - ranks.add( RANKS.KINGDOM ); - ranks.add( RANKS.FAMILY ); - ranks.add( RANKS.GENUS ); - ranks.add( RANKS.TRIBE ); - result = TxSearch.searchLineage( queries, ranks ); - result = TxSearch.searchParam( "Homo sapiens", TAX_NAME_CLASS.ALL, TAX_RANK.SPECIES, 10, true ); - result = TxSearch.searchParam( "Samanea saman", TAX_NAME_CLASS.SCIENTIFIC_NAME, TAX_RANK.ALL, 10, true ); - } - catch ( final Exception e ) { - System.out.println(); - System.out.println( "the following might be due to absence internet connection:" ); - e.printStackTrace( System.out ); - return false; - } - return true; - } }