X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Ftest%2FTest.java;h=80a20b6939784166e02f09d83f4806c4c098bb8e;hb=6479c35c4734850f517a6ef8de0fce500fdd6693;hp=8f9341dbc491757f6dfee60b0a370a2c9c6b6111;hpb=cbc5c71b164a57b8ad6c988d015057c7f0972478;p=jalview.git diff --git a/forester/java/src/org/forester/test/Test.java b/forester/java/src/org/forester/test/Test.java index 8f9341d..80a20b6 100644 --- a/forester/java/src/org/forester/test/Test.java +++ b/forester/java/src/org/forester/test/Test.java @@ -29,6 +29,7 @@ import java.io.ByteArrayInputStream; import java.io.File; import java.io.FileInputStream; import java.io.IOException; +import java.net.URL; import java.util.ArrayList; import java.util.Date; import java.util.HashSet; @@ -36,9 +37,10 @@ import java.util.Iterator; import java.util.List; import java.util.Locale; import java.util.Set; +import java.util.SortedSet; import org.forester.application.support_transfer; -import org.forester.archaeopteryx.AptxUtil; +import org.forester.archaeopteryx.TreePanelUtil; import org.forester.development.DevelopmentTools; import org.forester.evoinference.TestPhylogenyReconstruction; import org.forester.evoinference.matrix.character.CharacterStateMatrix; @@ -70,6 +72,7 @@ import org.forester.phylogeny.PhylogenyMethods; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.PhylogenyNode.NH_CONVERSION_SUPPORT_VALUE_STYLE; import org.forester.phylogeny.data.Accession; +import org.forester.phylogeny.data.Accession.Source; import org.forester.phylogeny.data.BinaryCharacters; import org.forester.phylogeny.data.BranchWidth; import org.forester.phylogeny.data.Confidence; @@ -91,7 +94,6 @@ import org.forester.phylogeny.iterators.PhylogenyNodeIterator; import org.forester.protein.BasicDomain; import org.forester.protein.BasicProtein; import org.forester.protein.Domain; -import org.forester.protein.DomainId; import org.forester.protein.Protein; import org.forester.protein.ProteinId; import org.forester.rio.TestRIO; @@ -114,7 +116,7 @@ import org.forester.util.DescriptiveStatistics; import org.forester.util.ForesterConstants; import org.forester.util.ForesterUtil; import org.forester.util.GeneralTable; -import org.forester.util.SequenceIdParser; +import org.forester.util.SequenceAccessionTools; import org.forester.ws.seqdb.SequenceDatabaseEntry; import org.forester.ws.seqdb.SequenceDbWsTools; import org.forester.ws.seqdb.UniProtTaxonomy; @@ -126,20 +128,21 @@ import org.forester.ws.wabi.TxSearch.TAX_RANK; @SuppressWarnings( "unused") public final class Test { - private final static double ZERO_DIFF = 1.0E-9; - private final static String PATH_TO_TEST_DATA = System.getProperty( "user.dir" ) - + ForesterUtil.getFileSeparator() + "test_data" - + ForesterUtil.getFileSeparator(); private final static String PATH_TO_RESOURCES = System.getProperty( "user.dir" ) + ForesterUtil.getFileSeparator() + "resources" + ForesterUtil.getFileSeparator(); - private final static boolean USE_LOCAL_PHYLOXML_SCHEMA = true; - private static final String PHYLOXML_REMOTE_XSD = ForesterConstants.PHYLO_XML_LOCATION + "/" + private final static String PATH_TO_TEST_DATA = System.getProperty( "user.dir" ) + + ForesterUtil.getFileSeparator() + "test_data" + + ForesterUtil.getFileSeparator(); + private final static boolean PERFORM_DB_TESTS = false; + private static final String PHYLOXML_LOCAL_XSD = PATH_TO_RESOURCES + "phyloxml_schema/" + ForesterConstants.PHYLO_XML_VERSION + "/" + ForesterConstants.PHYLO_XML_XSD; - private static final String PHYLOXML_LOCAL_XSD = PATH_TO_RESOURCES + "phyloxml_schema/" + private static final String PHYLOXML_REMOTE_XSD = ForesterConstants.PHYLO_XML_LOCATION + "/" + ForesterConstants.PHYLO_XML_VERSION + "/" + ForesterConstants.PHYLO_XML_XSD; + private final static boolean USE_LOCAL_PHYLOXML_SCHEMA = true; + private final static double ZERO_DIFF = 1.0E-9; public static boolean isEqual( final double a, final double b ) { return ( ( Math.abs( a - b ) ) < Test.ZERO_DIFF ); @@ -172,15 +175,15 @@ public final class Test { System.exit( -1 ); } final long start_time = new Date().getTime(); - System.out.print( "Domain id: " ); - if ( !testDomainId() ) { - System.out.println( "failed." ); - failed++; + System.out.print( "Basic node methods: " ); + if ( Test.testBasicNodeMethods() ) { + System.out.println( "OK." ); + succeeded++; } else { - succeeded++; + System.out.println( "failed." ); + failed++; } - System.out.println( "OK." ); System.out.print( "Protein id: " ); if ( !testProteinId() ) { System.out.println( "failed." ); @@ -235,8 +238,8 @@ public final class Test { System.out.println( "failed." ); failed++; } - System.out.print( "Hmmscan output parser: " ); - if ( testHmmscanOutputParser() ) { + System.out.print( "UniProtKB id extraction: " ); + if ( Test.testExtractUniProtKbProteinSeqIdentifier() ) { System.out.println( "OK." ); succeeded++; } @@ -244,8 +247,8 @@ public final class Test { System.out.println( "failed." ); failed++; } - System.out.print( "Basic node methods: " ); - if ( Test.testBasicNodeMethods() ) { + System.out.print( "Sequence DB tools 1: " ); + if ( testSequenceDbWsTools1() ) { System.out.println( "OK." ); succeeded++; } @@ -253,8 +256,33 @@ public final class Test { System.out.println( "failed." ); failed++; } - System.out.print( "Taxonomy code extraction: " ); - if ( Test.testExtractTaxonomyCodeFromNodeName() ) { + if ( PERFORM_DB_TESTS ) { + System.out.print( "Ebi Entry Retrieval: " ); + if ( Test.testEbiEntryRetrieval() ) { + System.out.println( "OK." ); + succeeded++; + } + else { + System.out.println( "failed." ); + failed++; + } + } + // System.exit( 0 ); + if ( PERFORM_DB_TESTS ) { + System.out.print( "Sequence DB tools 2: " ); + if ( testSequenceDbWsTools2() ) { + System.out.println( "OK." ); + succeeded++; + } + else { + System.out.println( "failed." ); + failed++; + System.exit( -1 ); + } + } + // System.exit( 0 ); + System.out.print( "Hmmscan output parser: " ); + if ( testHmmscanOutputParser() ) { System.out.println( "OK." ); succeeded++; } @@ -262,8 +290,28 @@ public final class Test { System.out.println( "failed." ); failed++; } - System.out.print( "SN extraction: " ); - if ( Test.testExtractSNFromNodeName() ) { + // + System.out.print( "Overlap removal: " ); + if ( !org.forester.test.Test.testOverlapRemoval() ) { + System.out.println( "failed." ); + failed++; + } + else { + succeeded++; + } + System.out.println( "OK." ); + System.out.print( "Engulfing overlap removal: " ); + if ( !Test.testEngulfingOverlapRemoval() ) { + System.out.println( "failed." ); + failed++; + } + else { + succeeded++; + } + System.out.println( "OK." ); + // + System.out.print( "Taxonomy code extraction: " ); + if ( Test.testExtractTaxonomyCodeFromNodeName() ) { System.out.println( "OK." ); succeeded++; } @@ -271,8 +319,8 @@ public final class Test { System.out.println( "failed." ); failed++; } - System.out.print( "Taxonomy extraction (general): " ); - if ( Test.testTaxonomyExtraction() ) { + System.out.print( "SN extraction: " ); + if ( Test.testExtractSNFromNodeName() ) { System.out.println( "OK." ); succeeded++; } @@ -280,8 +328,8 @@ public final class Test { System.out.println( "failed." ); failed++; } - System.out.print( "UniProtKB id extraction: " ); - if ( Test.testExtractUniProtKbProteinSeqIdentifier() ) { + System.out.print( "Taxonomy extraction (general): " ); + if ( Test.testTaxonomyExtraction() ) { System.out.println( "OK." ); succeeded++; } @@ -460,6 +508,15 @@ public final class Test { System.out.println( "failed." ); failed++; } + System.out.print( "Tree copy: " ); + if ( Test.testTreeCopy() ) { + System.out.println( "OK." ); + succeeded++; + } + else { + System.out.println( "failed." ); + failed++; + } System.out.print( "Basic tree methods: " ); if ( Test.testBasicTreeMethods() ) { System.out.println( "OK." ); @@ -814,17 +871,8 @@ public final class Test { System.out.println( "failed." ); failed++; } - System.out.print( "EMBL Entry Retrieval: " ); - if ( Test.testEmblEntryRetrieval() ) { - System.out.println( "OK." ); - succeeded++; - } - else { - System.out.println( "failed." ); - failed++; - } - System.out.print( "Uniprot Entry Retrieval: " ); - if ( Test.testUniprotEntryRetrieval() ) { + System.out.print( "Genbank accessor parsing: " ); + if ( Test.testGenbankAccessorParsing() ) { System.out.println( "OK." ); succeeded++; } @@ -832,14 +880,27 @@ public final class Test { System.out.println( "failed." ); failed++; } - System.out.print( "Uniprot Taxonomy Search: " ); - if ( Test.testUniprotTaxonomySearch() ) { - System.out.println( "OK." ); - succeeded++; + if ( PERFORM_DB_TESTS ) { + System.out.print( "Uniprot Entry Retrieval: " ); + if ( Test.testUniprotEntryRetrieval() ) { + System.out.println( "OK." ); + succeeded++; + } + else { + System.out.println( "failed." ); + failed++; + } } - else { - System.out.println( "failed." ); - failed++; + if ( PERFORM_DB_TESTS ) { + System.out.print( "Uniprot Taxonomy Search: " ); + if ( Test.testUniprotTaxonomySearch() ) { + System.out.println( "OK." ); + succeeded++; + } + else { + System.out.println( "failed." ); + failed++; + } } //---- String path = ""; @@ -851,13 +912,13 @@ public final class Test { path = "C:\\Program Files\\mafft-win\\mafft.bat"; } else { - path = "/home/czmasek/bin/mafft"; - } - if ( !MsaInferrer.isInstalled( path ) ) { path = "mafft"; - } - if ( !MsaInferrer.isInstalled( path ) ) { - path = "/usr/local/bin/mafft"; + if ( !MsaInferrer.isInstalled( path ) ) { + path = "/usr/bin/mafft"; + } + if ( !MsaInferrer.isInstalled( path ) ) { + path = "/usr/local/bin/mafft"; + } } if ( MsaInferrer.isInstalled( path ) ) { System.out.print( "MAFFT (external program): " ); @@ -888,6 +949,24 @@ public final class Test { System.out.println( "failed." ); failed++; } + System.out.print( "NHX parsing from URL: " ); + if ( Test.testNHXparsingFromURL() ) { + System.out.println( "OK." ); + succeeded++; + } + else { + System.out.println( "failed." ); + failed++; + } + System.out.print( "phyloXML parsing from URL: " ); + if ( Test.testPhyloXMLparsingFromURL() ) { + System.out.println( "OK." ); + succeeded++; + } + else { + System.out.println( "failed." ); + failed++; + } System.out.println(); final Runtime rt = java.lang.Runtime.getRuntime(); final long free_memory = rt.freeMemory() / 1000000; @@ -906,6 +985,340 @@ public final class Test { } } + public static boolean testEngulfingOverlapRemoval() { + try { + final Domain d0 = new BasicDomain( "d0", 0, 8, ( short ) 1, ( short ) 1, 0.1, 1 ); + final Domain d1 = new BasicDomain( "d1", 0, 1, ( short ) 1, ( short ) 1, 0.1, 1 ); + final Domain d2 = new BasicDomain( "d2", 0, 2, ( short ) 1, ( short ) 1, 0.1, 1 ); + final Domain d3 = new BasicDomain( "d3", 7, 8, ( short ) 1, ( short ) 1, 0.1, 1 ); + final Domain d4 = new BasicDomain( "d4", 7, 9, ( short ) 1, ( short ) 1, 0.1, 1 ); + final Domain d5 = new BasicDomain( "d4", 0, 9, ( short ) 1, ( short ) 1, 0.1, 1 ); + final Domain d6 = new BasicDomain( "d4", 4, 5, ( short ) 1, ( short ) 1, 0.1, 1 ); + final List covered = new ArrayList(); + covered.add( true ); // 0 + covered.add( false ); // 1 + covered.add( true ); // 2 + covered.add( false ); // 3 + covered.add( true ); // 4 + covered.add( true ); // 5 + covered.add( false ); // 6 + covered.add( true ); // 7 + covered.add( true ); // 8 + if ( ForesterUtil.isEngulfed( d0, covered ) ) { + return false; + } + if ( ForesterUtil.isEngulfed( d1, covered ) ) { + return false; + } + if ( ForesterUtil.isEngulfed( d2, covered ) ) { + return false; + } + if ( !ForesterUtil.isEngulfed( d3, covered ) ) { + return false; + } + if ( ForesterUtil.isEngulfed( d4, covered ) ) { + return false; + } + if ( ForesterUtil.isEngulfed( d5, covered ) ) { + return false; + } + if ( !ForesterUtil.isEngulfed( d6, covered ) ) { + return false; + } + final Domain a = new BasicDomain( "a", 0, 10, ( short ) 1, ( short ) 1, 0.1, 1 ); + final Domain b = new BasicDomain( "b", 8, 20, ( short ) 1, ( short ) 1, 0.2, 1 ); + final Domain c = new BasicDomain( "c", 15, 16, ( short ) 1, ( short ) 1, 0.3, 1 ); + final Protein abc = new BasicProtein( "abc", "nemve", 0 ); + abc.addProteinDomain( a ); + abc.addProteinDomain( b ); + abc.addProteinDomain( c ); + final Protein abc_r1 = ForesterUtil.removeOverlappingDomains( 3, false, abc ); + final Protein abc_r2 = ForesterUtil.removeOverlappingDomains( 3, true, abc ); + if ( abc.getNumberOfProteinDomains() != 3 ) { + return false; + } + if ( abc_r1.getNumberOfProteinDomains() != 3 ) { + return false; + } + if ( abc_r2.getNumberOfProteinDomains() != 2 ) { + return false; + } + if ( !abc_r2.getProteinDomain( 0 ).getDomainId().equals( "a" ) ) { + return false; + } + if ( !abc_r2.getProteinDomain( 1 ).getDomainId().equals( "b" ) ) { + return false; + } + final Domain d = new BasicDomain( "d", 0, 10, ( short ) 1, ( short ) 1, 0.1, 1 ); + final Domain e = new BasicDomain( "e", 8, 20, ( short ) 1, ( short ) 1, 0.3, 1 ); + final Domain f = new BasicDomain( "f", 15, 16, ( short ) 1, ( short ) 1, 0.2, 1 ); + final Protein def = new BasicProtein( "def", "nemve", 0 ); + def.addProteinDomain( d ); + def.addProteinDomain( e ); + def.addProteinDomain( f ); + final Protein def_r1 = ForesterUtil.removeOverlappingDomains( 5, false, def ); + final Protein def_r2 = ForesterUtil.removeOverlappingDomains( 5, true, def ); + if ( def.getNumberOfProteinDomains() != 3 ) { + return false; + } + if ( def_r1.getNumberOfProteinDomains() != 3 ) { + return false; + } + if ( def_r2.getNumberOfProteinDomains() != 3 ) { + return false; + } + if ( !def_r2.getProteinDomain( 0 ).getDomainId().equals( "d" ) ) { + return false; + } + if ( !def_r2.getProteinDomain( 1 ).getDomainId().equals( "f" ) ) { + return false; + } + if ( !def_r2.getProteinDomain( 2 ).getDomainId().equals( "e" ) ) { + return false; + } + } + catch ( final Exception e ) { + e.printStackTrace( System.out ); + return false; + } + return true; + } + + public static final boolean testPhyloXMLparsingFromURL() { + try { + final String s = "https://sites.google.com/site/cmzmasek/home/software/archaeopteryx/examples/archaeopteryx_a/apaf_bcl2.xml"; + final URL u = new URL( s ); + final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); + final Phylogeny[] phys = factory.create( u.openStream(), PhyloXmlParser.createPhyloXmlParser() ); + if ( ( phys == null ) || ( phys.length != 2 ) ) { + return false; + } + } + catch ( final Exception e ) { + e.printStackTrace(); + } + return true; + } + + public static final boolean testNHXparsingFromURL() { + try { + final String s = "https://sites.google.com/site/cmzmasek/home/software/archaeopteryx/examples/simple/simple_1.nh"; + final URL u = new URL( s ); + final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); + final Phylogeny[] phys = factory.create( u, new NHXParser() ); + if ( ( phys == null ) || ( phys.length != 5 ) ) { + return false; + } + if ( !phys[ 0 ].toNewHampshire().equals( "((((A,B),C),D),(E,F));" ) ) { + System.out.println( phys[ 0 ].toNewHampshire() ); + return false; + } + if ( !phys[ 1 ].toNewHampshire().equals( "((1,2,3),(4,5,6),(7,8,9));" ) ) { + System.out.println( phys[ 1 ].toNewHampshire() ); + return false; + } + final Phylogeny[] phys2 = factory.create( u.openStream(), new NHXParser() ); + if ( ( phys2 == null ) || ( phys2.length != 5 ) ) { + return false; + } + if ( !phys2[ 0 ].toNewHampshire().equals( "((((A,B),C),D),(E,F));" ) ) { + System.out.println( phys2[ 0 ].toNewHampshire() ); + return false; + } + final PhylogenyFactory factory2 = ParserBasedPhylogenyFactory.getInstance(); + final NHXParser p = new NHXParser(); + final URL u2 = new URL( s ); + p.setSource( u2 ); + if ( !p.hasNext() ) { + return false; + } + if ( !p.next().toNewHampshire().equals( "((((A,B),C),D),(E,F));" ) ) { + return false; + } + if ( !p.hasNext() ) { + return false; + } + p.reset(); + if ( !p.hasNext() ) { + return false; + } + if ( !p.next().toNewHampshire().equals( "((((A,B),C),D),(E,F));" ) ) { + return false; + } + if ( !p.next().toNewHampshire().equals( "((1,2,3),(4,5,6),(7,8,9));" ) ) { + return false; + } + p.reset(); + if ( !p.hasNext() ) { + return false; + } + if ( !p.next().toNewHampshire().equals( "((((A,B),C),D),(E,F));" ) ) { + return false; + } + if ( !p.next().toNewHampshire().equals( "((1,2,3),(4,5,6),(7,8,9));" ) ) { + return false; + } + } + catch ( final Exception e ) { + e.printStackTrace(); + } + return true; + } + + public static boolean testOverlapRemoval() { + try { + final Domain d0 = new BasicDomain( "d0", ( short ) 2, ( short ) 5, ( short ) 1, ( short ) 1, 0.1, 1 ); + final Domain d1 = new BasicDomain( "d1", ( short ) 7, ( short ) 10, ( short ) 1, ( short ) 1, 0.1, 1 ); + final Domain d2 = new BasicDomain( "d2", ( short ) 0, ( short ) 20, ( short ) 1, ( short ) 1, 0.1, 1 ); + final Domain d3 = new BasicDomain( "d3", ( short ) 9, ( short ) 10, ( short ) 1, ( short ) 1, 0.1, 1 ); + final Domain d4 = new BasicDomain( "d4", ( short ) 7, ( short ) 8, ( short ) 1, ( short ) 1, 0.1, 1 ); + final List covered = new ArrayList(); + covered.add( true ); // 0 + covered.add( false ); // 1 + covered.add( true ); // 2 + covered.add( false ); // 3 + covered.add( true ); // 4 + covered.add( true ); // 5 + covered.add( false ); // 6 + covered.add( true ); // 7 + covered.add( true ); // 8 + if ( ForesterUtil.calculateOverlap( d0, covered ) != 3 ) { + return false; + } + if ( ForesterUtil.calculateOverlap( d1, covered ) != 2 ) { + return false; + } + if ( ForesterUtil.calculateOverlap( d2, covered ) != 6 ) { + return false; + } + if ( ForesterUtil.calculateOverlap( d3, covered ) != 0 ) { + return false; + } + if ( ForesterUtil.calculateOverlap( d4, covered ) != 2 ) { + return false; + } + final Domain a = new BasicDomain( "a", ( short ) 2, ( short ) 5, ( short ) 1, ( short ) 1, 1, -1 ); + final Domain b = new BasicDomain( "b", ( short ) 2, ( short ) 10, ( short ) 1, ( short ) 1, 0.1, -1 ); + final Protein ab = new BasicProtein( "ab", "varanus", 0 ); + ab.addProteinDomain( a ); + ab.addProteinDomain( b ); + final Protein ab_s0 = ForesterUtil.removeOverlappingDomains( 3, false, ab ); + if ( ab.getNumberOfProteinDomains() != 2 ) { + return false; + } + if ( ab_s0.getNumberOfProteinDomains() != 1 ) { + return false; + } + if ( !ab_s0.getProteinDomain( 0 ).getDomainId().equals( "b" ) ) { + return false; + } + final Protein ab_s1 = ForesterUtil.removeOverlappingDomains( 4, false, ab ); + if ( ab.getNumberOfProteinDomains() != 2 ) { + return false; + } + if ( ab_s1.getNumberOfProteinDomains() != 2 ) { + return false; + } + final Domain c = new BasicDomain( "c", ( short ) 20000, ( short ) 20500, ( short ) 1, ( short ) 1, 10, 1 ); + final Domain d = new BasicDomain( "d", + ( short ) 10000, + ( short ) 10500, + ( short ) 1, + ( short ) 1, + 0.0000001, + 1 ); + final Domain e = new BasicDomain( "e", ( short ) 5000, ( short ) 5500, ( short ) 1, ( short ) 1, 0.0001, 1 ); + final Protein cde = new BasicProtein( "cde", "varanus", 0 ); + cde.addProteinDomain( c ); + cde.addProteinDomain( d ); + cde.addProteinDomain( e ); + final Protein cde_s0 = ForesterUtil.removeOverlappingDomains( 0, false, cde ); + if ( cde.getNumberOfProteinDomains() != 3 ) { + return false; + } + if ( cde_s0.getNumberOfProteinDomains() != 3 ) { + return false; + } + final Domain f = new BasicDomain( "f", ( short ) 10, ( short ) 20, ( short ) 1, ( short ) 1, 10, 1 ); + final Domain g = new BasicDomain( "g", ( short ) 10, ( short ) 20, ( short ) 1, ( short ) 1, 0.01, 1 ); + final Domain h = new BasicDomain( "h", ( short ) 10, ( short ) 20, ( short ) 1, ( short ) 1, 0.0001, 1 ); + final Domain i = new BasicDomain( "i", ( short ) 10, ( short ) 20, ( short ) 1, ( short ) 1, 0.5, 1 ); + final Domain i2 = new BasicDomain( "i", ( short ) 5, ( short ) 30, ( short ) 1, ( short ) 1, 0.5, 10 ); + final Protein fghi = new BasicProtein( "fghi", "varanus", 0 ); + fghi.addProteinDomain( f ); + fghi.addProteinDomain( g ); + fghi.addProteinDomain( h ); + fghi.addProteinDomain( i ); + fghi.addProteinDomain( i ); + fghi.addProteinDomain( i ); + fghi.addProteinDomain( i2 ); + final Protein fghi_s0 = ForesterUtil.removeOverlappingDomains( 10, false, fghi ); + if ( fghi.getNumberOfProteinDomains() != 7 ) { + return false; + } + if ( fghi_s0.getNumberOfProteinDomains() != 1 ) { + return false; + } + if ( !fghi_s0.getProteinDomain( 0 ).getDomainId().equals( "h" ) ) { + return false; + } + final Protein fghi_s1 = ForesterUtil.removeOverlappingDomains( 11, false, fghi ); + if ( fghi.getNumberOfProteinDomains() != 7 ) { + return false; + } + if ( fghi_s1.getNumberOfProteinDomains() != 7 ) { + return false; + } + final Domain j = new BasicDomain( "j", ( short ) 10, ( short ) 20, ( short ) 1, ( short ) 1, 10, 1 ); + final Domain k = new BasicDomain( "k", ( short ) 10, ( short ) 20, ( short ) 1, ( short ) 1, 0.01, 1 ); + final Domain l = new BasicDomain( "l", ( short ) 10, ( short ) 20, ( short ) 1, ( short ) 1, 0.0001, 1 ); + final Domain m = new BasicDomain( "m", ( short ) 10, ( short ) 20, ( short ) 1, ( short ) 4, 0.5, 1 ); + final Domain m0 = new BasicDomain( "m", ( short ) 10, ( short ) 20, ( short ) 2, ( short ) 4, 0.5, 1 ); + final Domain m1 = new BasicDomain( "m", ( short ) 10, ( short ) 20, ( short ) 3, ( short ) 4, 0.5, 1 ); + final Domain m2 = new BasicDomain( "m", ( short ) 5, ( short ) 30, ( short ) 4, ( short ) 4, 0.5, 10 ); + final Protein jklm = new BasicProtein( "jklm", "varanus", 0 ); + jklm.addProteinDomain( j ); + jklm.addProteinDomain( k ); + jklm.addProteinDomain( l ); + jklm.addProteinDomain( m ); + jklm.addProteinDomain( m0 ); + jklm.addProteinDomain( m1 ); + jklm.addProteinDomain( m2 ); + final Protein jklm_s0 = ForesterUtil.removeOverlappingDomains( 10, false, jklm ); + if ( jklm.getNumberOfProteinDomains() != 7 ) { + return false; + } + if ( jklm_s0.getNumberOfProteinDomains() != 1 ) { + return false; + } + if ( !jklm_s0.getProteinDomain( 0 ).getDomainId().equals( "l" ) ) { + return false; + } + final Protein jklm_s1 = ForesterUtil.removeOverlappingDomains( 11, false, jklm ); + if ( jklm.getNumberOfProteinDomains() != 7 ) { + return false; + } + if ( jklm_s1.getNumberOfProteinDomains() != 7 ) { + return false; + } + final Domain only = new BasicDomain( "only", ( short ) 5, ( short ) 30, ( short ) 4, ( short ) 4, 0.5, 10 ); + final Protein od = new BasicProtein( "od", "varanus", 0 ); + od.addProteinDomain( only ); + final Protein od_s0 = ForesterUtil.removeOverlappingDomains( 0, false, od ); + if ( od.getNumberOfProteinDomains() != 1 ) { + return false; + } + if ( od_s0.getNumberOfProteinDomains() != 1 ) { + return false; + } + } + catch ( final Exception e ) { + e.printStackTrace( System.out ); + return false; + } + return true; + } + private final static Phylogeny createPhylogeny( final String nhx ) throws IOException { final Phylogeny p = ParserBasedPhylogenyFactory.getInstance().create( nhx, new NHXParser() )[ 0 ]; return p; @@ -953,7 +1366,7 @@ public final class Test { private static boolean testBasicDomain() { try { final Domain pd = new BasicDomain( "id", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); - if ( !pd.getDomainId().getId().equals( "id" ) ) { + if ( !pd.getDomainId().equals( "id" ) ) { return false; } if ( pd.getNumber() != 1 ) { @@ -997,7 +1410,7 @@ public final class Test { if ( a1.compareTo( a2 ) != 0 ) { return false; } - if ( a1.compareTo( a3 ) != 0 ) { + if ( a1.compareTo( a3 ) == 0 ) { return false; } } @@ -1052,7 +1465,7 @@ public final class Test { private static boolean testBasicPhyloXMLparsing() { try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); - final PhyloXmlParser xml_parser = new PhyloXmlParser(); + final PhyloXmlParser xml_parser = PhyloXmlParser.createPhyloXmlParser(); final Phylogeny[] phylogenies_0 = factory.create( Test.PATH_TO_TEST_DATA + "phyloxml_test_t1.xml", xml_parser ); if ( xml_parser.getErrorCount() > 0 ) { @@ -1218,6 +1631,22 @@ public final class Test { if ( !( t3.getNode( "root node" ).getNodeData().getSequence().getLocation().equals( "12p13-p12" ) ) ) { return false; } + final SortedSet x = t3.getNode( "root node" ).getNodeData().getSequence().getCrossReferences(); + if ( x.size() != 4 ) { + return false; + } + int c = 0; + for( final Accession acc : x ) { + if ( c == 0 ) { + if ( !acc.getSource().equals( "KEGG" ) ) { + return false; + } + if ( !acc.getValue().equals( "hsa:596" ) ) { + return false; + } + } + c++; + } } catch ( final Exception e ) { e.printStackTrace( System.out ); @@ -1229,7 +1658,7 @@ public final class Test { private static boolean testBasicPhyloXMLparsingRoundtrip() { try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); - final PhyloXmlParser xml_parser = new PhyloXmlParser(); + final PhyloXmlParser xml_parser = PhyloXmlParser.createPhyloXmlParser(); if ( USE_LOCAL_PHYLOXML_SCHEMA ) { xml_parser.setValidateAgainstSchema( PHYLOXML_LOCAL_XSD ); } @@ -1497,7 +1926,6 @@ public final class Test { } if ( ( ( BinaryCharacters ) t3_rt.getNode( "node bb" ).getNodeData().getBinaryCharacters().copy() ) .getLostCount() != BinaryCharacters.COUNT_DEFAULT ) { - ; return false; } if ( t3_rt.getNode( "node b" ).getNodeData().getBinaryCharacters().getGainedCount() != 1 ) { @@ -1546,6 +1974,23 @@ public final class Test { .equalsIgnoreCase( "433" ) ) { return false; } + final SortedSet x = t3_rt.getNode( "root node" ).getNodeData().getSequence() + .getCrossReferences(); + if ( x.size() != 4 ) { + return false; + } + int c = 0; + for( final Accession acc : x ) { + if ( c == 0 ) { + if ( !acc.getSource().equals( "KEGG" ) ) { + return false; + } + if ( !acc.getValue().equals( "hsa:596" ) ) { + return false; + } + } + c++; + } } catch ( final Exception e ) { e.printStackTrace( System.out ); @@ -1565,7 +2010,7 @@ public final class Test { // Do nothing -- means were not running from jar. } if ( xml_parser == null ) { - xml_parser = new PhyloXmlParser(); + xml_parser = PhyloXmlParser.createPhyloXmlParser(); if ( USE_LOCAL_PHYLOXML_SCHEMA ) { xml_parser.setValidateAgainstSchema( PHYLOXML_LOCAL_XSD ); } @@ -1856,238 +2301,78 @@ public final class Test { p.addProteinDomain( A20 ); p.addProteinDomain( B25 ); p.addProteinDomain( D80 ); - List domains_ids = new ArrayList(); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "B" ) ); - domains_ids.add( new DomainId( "C" ) ); + List domains_ids = new ArrayList(); + domains_ids.add( "A" ); + domains_ids.add( "B" ); + domains_ids.add( "C" ); if ( !p.contains( domains_ids, false ) ) { return false; } if ( !p.contains( domains_ids, true ) ) { return false; } - domains_ids.add( new DomainId( "X" ) ); + domains_ids.add( "X" ); if ( p.contains( domains_ids, false ) ) { return false; } if ( p.contains( domains_ids, true ) ) { return false; } - domains_ids = new ArrayList(); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "C" ) ); - domains_ids.add( new DomainId( "D" ) ); + domains_ids = new ArrayList(); + domains_ids.add( "A" ); + domains_ids.add( "C" ); + domains_ids.add( "D" ); if ( !p.contains( domains_ids, false ) ) { return false; } if ( !p.contains( domains_ids, true ) ) { return false; } - domains_ids = new ArrayList(); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "D" ) ); - domains_ids.add( new DomainId( "C" ) ); + domains_ids = new ArrayList(); + domains_ids.add( "A" ); + domains_ids.add( "D" ); + domains_ids.add( "C" ); if ( !p.contains( domains_ids, false ) ) { return false; } if ( p.contains( domains_ids, true ) ) { return false; } - domains_ids = new ArrayList(); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "B" ) ); - if ( !p.contains( domains_ids, false ) ) { - return false; - } - if ( !p.contains( domains_ids, true ) ) { - return false; - } - domains_ids = new ArrayList(); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "B" ) ); - domains_ids.add( new DomainId( "B" ) ); + domains_ids = new ArrayList(); + domains_ids.add( "A" ); + domains_ids.add( "A" ); + domains_ids.add( "B" ); if ( !p.contains( domains_ids, false ) ) { return false; } if ( !p.contains( domains_ids, true ) ) { return false; } - domains_ids = new ArrayList(); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "B" ) ); - domains_ids.add( new DomainId( "B" ) ); + domains_ids = new ArrayList(); + domains_ids.add( "A" ); + domains_ids.add( "A" ); + domains_ids.add( "A" ); + domains_ids.add( "B" ); + domains_ids.add( "B" ); if ( !p.contains( domains_ids, false ) ) { return false; } if ( !p.contains( domains_ids, true ) ) { return false; } - domains_ids = new ArrayList(); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "B" ) ); - domains_ids.add( new DomainId( "B" ) ); - if ( !p.contains( domains_ids, false ) ) { - return false; - } - if ( p.contains( domains_ids, true ) ) { - return false; - } - domains_ids = new ArrayList(); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "B" ) ); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "B" ) ); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "B" ) ); - domains_ids.add( new DomainId( "B" ) ); - domains_ids.add( new DomainId( "C" ) ); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "C" ) ); - domains_ids.add( new DomainId( "D" ) ); - if ( !p.contains( domains_ids, false ) ) { - return false; - } - if ( !p.contains( domains_ids, true ) ) { - return false; - } - domains_ids = new ArrayList(); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "B" ) ); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "B" ) ); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "B" ) ); - domains_ids.add( new DomainId( "B" ) ); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "C" ) ); - domains_ids.add( new DomainId( "D" ) ); - if ( !p.contains( domains_ids, false ) ) { - return false; - } - if ( !p.contains( domains_ids, true ) ) { - return false; - } - domains_ids = new ArrayList(); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "B" ) ); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "B" ) ); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "B" ) ); - domains_ids.add( new DomainId( "B" ) ); - domains_ids.add( new DomainId( "C" ) ); - domains_ids.add( new DomainId( "C" ) ); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "C" ) ); - domains_ids.add( new DomainId( "D" ) ); - if ( !p.contains( domains_ids, false ) ) { - return false; - } - if ( p.contains( domains_ids, true ) ) { - return false; - } - domains_ids = new ArrayList(); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "B" ) ); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "B" ) ); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "B" ) ); - domains_ids.add( new DomainId( "B" ) ); - domains_ids.add( new DomainId( "C" ) ); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "C" ) ); - domains_ids.add( new DomainId( "D" ) ); - if ( !p.contains( domains_ids, false ) ) { - return false; - } - if ( p.contains( domains_ids, true ) ) { - return false; - } - domains_ids = new ArrayList(); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "B" ) ); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "B" ) ); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "B" ) ); - domains_ids.add( new DomainId( "B" ) ); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "D" ) ); - if ( !p.contains( domains_ids, false ) ) { - return false; - } - if ( !p.contains( domains_ids, true ) ) { - return false; - } - domains_ids = new ArrayList(); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "B" ) ); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "B" ) ); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "B" ) ); - domains_ids.add( new DomainId( "B" ) ); - domains_ids.add( new DomainId( "C" ) ); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "C" ) ); - domains_ids.add( new DomainId( "D" ) ); - domains_ids.add( new DomainId( "X" ) ); - if ( p.contains( domains_ids, false ) ) { - return false; - } - if ( p.contains( domains_ids, true ) ) { - return false; - } - domains_ids = new ArrayList(); - domains_ids.add( new DomainId( "X" ) ); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "B" ) ); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "B" ) ); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "B" ) ); - domains_ids.add( new DomainId( "B" ) ); - domains_ids.add( new DomainId( "C" ) ); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "C" ) ); - domains_ids.add( new DomainId( "D" ) ); - if ( p.contains( domains_ids, false ) ) { - return false; - } - if ( p.contains( domains_ids, true ) ) { - return false; - } - domains_ids = new ArrayList(); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "B" ) ); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "B" ) ); - domains_ids.add( new DomainId( "B" ) ); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "B" ) ); - domains_ids.add( new DomainId( "C" ) ); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "C" ) ); - domains_ids.add( new DomainId( "D" ) ); + domains_ids = new ArrayList(); + domains_ids.add( "A" ); + domains_ids.add( "A" ); + domains_ids.add( "B" ); + domains_ids.add( "A" ); + domains_ids.add( "B" ); + domains_ids.add( "B" ); + domains_ids.add( "A" ); + domains_ids.add( "B" ); + domains_ids.add( "C" ); + domains_ids.add( "A" ); + domains_ids.add( "C" ); + domains_ids.add( "D" ); if ( !p.contains( domains_ids, false ) ) { return false; } @@ -2401,10 +2686,6 @@ public final class Test { private static boolean testBasicTreeMethods() { try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); - final Phylogeny t1 = factory.create(); - if ( !t1.isEmpty() ) { - return false; - } final Phylogeny t2 = factory.create( "((A:1,B:2)AB:1,(C:3,D:5)CD:3)ABCD:0.5", new NHXParser() )[ 0 ]; if ( t2.getNumberOfExternalNodes() != 4 ) { return false; @@ -2618,46 +2899,56 @@ public final class Test { try { final PhylogenyNode n = new PhylogenyNode(); n.setName( "tr|B3RJ64" ); - if ( !AptxUtil.createUriForSeqWeb( n, null, null ).equals( ForesterUtil.UNIPROT_KB + "B3RJ64" ) ) { + if ( !TreePanelUtil.createUriForSeqWeb( n, null, null ).equals( ForesterUtil.UNIPROT_KB + "B3RJ64" ) ) { return false; } n.setName( "B0LM41_HUMAN" ); - if ( !AptxUtil.createUriForSeqWeb( n, null, null ).equals( ForesterUtil.UNIPROT_KB + "B0LM41_HUMAN" ) ) { + if ( !TreePanelUtil.createUriForSeqWeb( n, null, null ).equals( ForesterUtil.UNIPROT_KB + "B0LM41_HUMAN" ) ) { return false; } n.setName( "NP_001025424" ); - if ( !AptxUtil.createUriForSeqWeb( n, null, null ).equals( ForesterUtil.NCBI_PROTEIN + "NP_001025424" ) ) { + if ( !TreePanelUtil.createUriForSeqWeb( n, null, null ).equals( ForesterUtil.NCBI_PROTEIN + "NP_001025424" ) ) { return false; } n.setName( "_NM_001030253-" ); - if ( !AptxUtil.createUriForSeqWeb( n, null, null ).equals( ForesterUtil.NCBI_NUCCORE + "NM_001030253" ) ) { + if ( !TreePanelUtil.createUriForSeqWeb( n, null, null ).equals( ForesterUtil.NCBI_NUCCORE + "NM_001030253" ) ) { return false; } n.setName( "XM_002122186" ); - if ( !AptxUtil.createUriForSeqWeb( n, null, null ).equals( ForesterUtil.NCBI_NUCCORE + "XM_002122186" ) ) { + if ( !TreePanelUtil.createUriForSeqWeb( n, null, null ).equals( ForesterUtil.NCBI_NUCCORE + "XM_002122186" ) ) { return false; } n.setName( "dgh_AAA34956_gdg" ); - if ( !AptxUtil.createUriForSeqWeb( n, null, null ).equals( ForesterUtil.NCBI_PROTEIN + "AAA34956" ) ) { + if ( !TreePanelUtil.createUriForSeqWeb( n, null, null ).equals( ForesterUtil.NCBI_PROTEIN + "AAA34956" ) ) { return false; } - n.setName( "j40f4_Q06891.1_fndn2 fnr3" ); - if ( !AptxUtil.createUriForSeqWeb( n, null, null ).equals( ForesterUtil.NCBI_PROTEIN + "Q06891.1" ) ) { + n.setName( "AAA34956" ); + if ( !TreePanelUtil.createUriForSeqWeb( n, null, null ).equals( ForesterUtil.NCBI_PROTEIN + "AAA34956" ) ) { return false; } n.setName( "GI:394892" ); - if ( !AptxUtil.createUriForSeqWeb( n, null, null ).equals( ForesterUtil.NCBI_GI + "394892" ) ) { - System.out.println( AptxUtil.createUriForSeqWeb( n, null, null ) ); + if ( !TreePanelUtil.createUriForSeqWeb( n, null, null ).equals( ForesterUtil.NCBI_GI + "394892" ) ) { + System.out.println( TreePanelUtil.createUriForSeqWeb( n, null, null ) ); return false; } n.setName( "gi_394892" ); - if ( !AptxUtil.createUriForSeqWeb( n, null, null ).equals( ForesterUtil.NCBI_GI + "394892" ) ) { - System.out.println( AptxUtil.createUriForSeqWeb( n, null, null ) ); + if ( !TreePanelUtil.createUriForSeqWeb( n, null, null ).equals( ForesterUtil.NCBI_GI + "394892" ) ) { + System.out.println( TreePanelUtil.createUriForSeqWeb( n, null, null ) ); return false; } n.setName( "gi6335_gi_394892_56635_Gi_43" ); - if ( !AptxUtil.createUriForSeqWeb( n, null, null ).equals( ForesterUtil.NCBI_GI + "394892" ) ) { - System.out.println( AptxUtil.createUriForSeqWeb( n, null, null ) ); + if ( !TreePanelUtil.createUriForSeqWeb( n, null, null ).equals( ForesterUtil.NCBI_GI + "394892" ) ) { + System.out.println( TreePanelUtil.createUriForSeqWeb( n, null, null ) ); + return false; + } + n.setName( "P12345" ); + if ( !TreePanelUtil.createUriForSeqWeb( n, null, null ).equals( ForesterUtil.UNIPROT_KB + "P12345" ) ) { + System.out.println( TreePanelUtil.createUriForSeqWeb( n, null, null ) ); + return false; + } + n.setName( "gi_fdgjmn-3jk5-243 mnefmn fg023-0 P12345 4395jtmnsrg02345m1ggi92450jrg890j4t0j240" ); + if ( !TreePanelUtil.createUriForSeqWeb( n, null, null ).equals( ForesterUtil.UNIPROT_KB + "P12345" ) ) { + System.out.println( TreePanelUtil.createUriForSeqWeb( n, null, null ) ); return false; } } @@ -3461,114 +3752,183 @@ public final class Test { return true; } - private static boolean testDomainId() { + private static boolean testEbiEntryRetrieval() { try { - final DomainId id1 = new DomainId( "a" ); - final DomainId id2 = new DomainId( "a" ); - final DomainId id3 = new DomainId( "A" ); - final DomainId id4 = new DomainId( "b" ); - if ( !id1.equals( id1 ) ) { + final SequenceDatabaseEntry entry = SequenceDbWsTools.obtainEntry( "AAK41263" ); + if ( !entry.getAccession().equals( "AAK41263" ) ) { + System.out.println( entry.getAccession() ); + return false; + } + if ( !entry.getTaxonomyScientificName().equals( "Sulfolobus solfataricus P2" ) ) { + System.out.println( entry.getTaxonomyScientificName() ); + return false; + } + if ( !entry.getSequenceName() + .equals( "Sulfolobus solfataricus P2 Glycogen debranching enzyme, hypothetical (treX-like)" ) ) { + System.out.println( entry.getSequenceName() ); + return false; + } + // if ( !entry.getSequenceSymbol().equals( "" ) ) { + // System.out.println( entry.getSequenceSymbol() ); + // return false; + // } + if ( !entry.getGeneName().equals( "treX-like" ) ) { + System.out.println( entry.getGeneName() ); + return false; + } + if ( !entry.getTaxonomyIdentifier().equals( "273057" ) ) { + System.out.println( entry.getTaxonomyIdentifier() ); + return false; + } + if ( !entry.getAnnotations().first().getRefValue().equals( "3.2.1.33" ) ) { + System.out.println( entry.getAnnotations().first().getRefValue() ); + return false; + } + if ( !entry.getAnnotations().first().getRefSource().equals( "EC" ) ) { + System.out.println( entry.getAnnotations().first().getRefSource() ); + return false; + } + if ( entry.getCrossReferences().size() != 5 ) { + return false; + } + // + final SequenceDatabaseEntry entry1 = SequenceDbWsTools.obtainEntry( "ABJ16409" ); + if ( !entry1.getAccession().equals( "ABJ16409" ) ) { + return false; + } + if ( !entry1.getTaxonomyScientificName().equals( "Felis catus" ) ) { + System.out.println( entry1.getTaxonomyScientificName() ); + return false; + } + if ( !entry1.getSequenceName().equals( "Felis catus (domestic cat) partial BCL2" ) ) { + System.out.println( entry1.getSequenceName() ); + return false; + } + if ( !entry1.getTaxonomyIdentifier().equals( "9685" ) ) { + System.out.println( entry1.getTaxonomyIdentifier() ); + return false; + } + if ( !entry1.getGeneName().equals( "BCL2" ) ) { + System.out.println( entry1.getGeneName() ); + return false; + } + if ( entry1.getCrossReferences().size() != 6 ) { + return false; + } + // + final SequenceDatabaseEntry entry2 = SequenceDbWsTools.obtainEntry( "NM_184234" ); + if ( !entry2.getAccession().equals( "NM_184234" ) ) { + return false; + } + if ( !entry2.getTaxonomyScientificName().equals( "Homo sapiens" ) ) { + System.out.println( entry2.getTaxonomyScientificName() ); + return false; + } + if ( !entry2.getSequenceName() + .equals( "Homo sapiens RNA binding motif protein 39 (RBM39), transcript variant 1, mRNA" ) ) { + System.out.println( entry2.getSequenceName() ); + return false; + } + if ( !entry2.getTaxonomyIdentifier().equals( "9606" ) ) { + System.out.println( entry2.getTaxonomyIdentifier() ); + return false; + } + if ( !entry2.getGeneName().equals( "RBM39" ) ) { + System.out.println( entry2.getGeneName() ); + return false; + } + if ( entry2.getCrossReferences().size() != 3 ) { + return false; + } + // + final SequenceDatabaseEntry entry3 = SequenceDbWsTools.obtainEntry( "HM043801" ); + if ( !entry3.getAccession().equals( "HM043801" ) ) { return false; } - if ( id1.getId().equals( "x" ) ) { + if ( !entry3.getTaxonomyScientificName().equals( "Bursaphelenchus xylophilus" ) ) { + System.out.println( entry3.getTaxonomyScientificName() ); return false; } - if ( id1.getId().equals( null ) ) { + if ( !entry3.getSequenceName().equals( "Bursaphelenchus xylophilus RAF gene, complete cds" ) ) { + System.out.println( entry3.getSequenceName() ); return false; } - if ( !id1.equals( id2 ) ) { + if ( !entry3.getTaxonomyIdentifier().equals( "6326" ) ) { + System.out.println( entry3.getTaxonomyIdentifier() ); return false; } - if ( id1.equals( id3 ) ) { + if ( !entry3.getSequenceSymbol().equals( "RAF" ) ) { + System.out.println( entry3.getSequenceSymbol() ); return false; } - if ( id1.hashCode() != id1.hashCode() ) { + if ( !ForesterUtil.isEmpty( entry3.getGeneName() ) ) { return false; } - if ( id1.hashCode() != id2.hashCode() ) { + if ( entry3.getCrossReferences().size() != 8 ) { return false; } - if ( id1.hashCode() == id3.hashCode() ) { + // + // + final SequenceDatabaseEntry entry4 = SequenceDbWsTools.obtainEntry( "AAA36557.1" ); + if ( !entry4.getAccession().equals( "AAA36557" ) ) { return false; } - if ( id1.compareTo( id1 ) != 0 ) { + if ( !entry4.getTaxonomyScientificName().equals( "Homo sapiens" ) ) { + System.out.println( entry4.getTaxonomyScientificName() ); return false; } - if ( id1.compareTo( id2 ) != 0 ) { + if ( !entry4.getSequenceName().equals( "Homo sapiens (human) ras protein" ) ) { + System.out.println( entry4.getSequenceName() ); return false; } - if ( id1.compareTo( id3 ) != 0 ) { + if ( !entry4.getTaxonomyIdentifier().equals( "9606" ) ) { + System.out.println( entry4.getTaxonomyIdentifier() ); return false; } - if ( id1.compareTo( id4 ) >= 0 ) { + if ( !entry4.getGeneName().equals( "ras" ) ) { + System.out.println( entry4.getGeneName() ); return false; } - if ( id4.compareTo( id1 ) <= 0 ) { + // if ( !entry4.getChromosome().equals( "ras" ) ) { + // System.out.println( entry4.getChromosome() ); + // return false; + // } + // if ( !entry4.getMap().equals( "ras" ) ) { + // System.out.println( entry4.getMap() ); + // return false; + // } + //TODO FIXME gi... + // + //TODO fails: + // final SequenceDatabaseEntry entry5 = SequenceDbWsTools.obtainEntry( "M30539" ); + // if ( !entry5.getAccession().equals( "HM043801" ) ) { + // return false; + // } + final SequenceDatabaseEntry entry5 = SequenceDbWsTools.obtainEntry( "AAZ45343.1" ); + if ( !entry5.getAccession().equals( "AAZ45343" ) ) { return false; } - if ( !id4.getId().equals( "b" ) ) { + if ( !entry5.getTaxonomyScientificName().equals( "Dechloromonas aromatica RCB" ) ) { + System.out.println( entry5.getTaxonomyScientificName() ); return false; } - final DomainId id5 = new DomainId( " C " ); - if ( !id5.getId().equals( "C" ) ) { + if ( !entry5.getSequenceName().equals( "Dechloromonas aromatica RCB 1,4-alpha-glucan branching enzyme" ) ) { + System.out.println( entry5.getSequenceName() ); return false; } - if ( id5.equals( id1 ) ) { + if ( !entry5.getTaxonomyIdentifier().equals( "159087" ) ) { + System.out.println( entry5.getTaxonomyIdentifier() ); return false; } } - catch ( final Exception e ) { + catch ( final IOException e ) { + System.out.println(); + System.out.println( "the following might be due to absence internet connection:" ); e.printStackTrace( System.out ); - return false; - } - return true; - } - - private static boolean testEmblEntryRetrieval() { - //The format for GenBank Accession numbers are: - //Nucleotide: 1 letter + 5 numerals OR 2 letters + 6 numerals - //Protein: 3 letters + 5 numerals - //http://www.ncbi.nlm.nih.gov/Sequin/acc.html - if ( !SequenceIdParser.parseGenbankAccessor( "AY423861" ).equals( "AY423861" ) ) { - return false; - } - if ( !SequenceIdParser.parseGenbankAccessor( ".AY423861.2" ).equals( "AY423861.2" ) ) { - return false; - } - if ( !SequenceIdParser.parseGenbankAccessor( "345_.AY423861.24_345" ).equals( "AY423861.24" ) ) { - return false; - } - if ( SequenceIdParser.parseGenbankAccessor( "AAY423861" ) != null ) { - return false; - } - if ( SequenceIdParser.parseGenbankAccessor( "AY4238612" ) != null ) { - return false; - } - if ( SequenceIdParser.parseGenbankAccessor( "AAY4238612" ) != null ) { - return false; - } - if ( SequenceIdParser.parseGenbankAccessor( "Y423861" ) != null ) { - return false; - } - if ( !SequenceIdParser.parseGenbankAccessor( "S12345" ).equals( "S12345" ) ) { - return false; - } - if ( !SequenceIdParser.parseGenbankAccessor( "|S12345|" ).equals( "S12345" ) ) { - return false; - } - if ( SequenceIdParser.parseGenbankAccessor( "|S123456" ) != null ) { - return false; - } - if ( SequenceIdParser.parseGenbankAccessor( "ABC123456" ) != null ) { - return false; - } - if ( !SequenceIdParser.parseGenbankAccessor( "ABC12345" ).equals( "ABC12345" ) ) { - return false; - } - if ( !SequenceIdParser.parseGenbankAccessor( "&ABC12345&" ).equals( "ABC12345" ) ) { - return false; + return true; } - if ( SequenceIdParser.parseGenbankAccessor( "ABCD12345" ) != null ) { + catch ( final Exception e ) { + e.printStackTrace(); return false; } return true; @@ -3862,166 +4222,166 @@ public final class Test { try { PhylogenyNode n = new PhylogenyNode(); n.setName( "tr|B3RJ64" ); - if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "B3RJ64" ) ) { + if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "B3RJ64" ) ) { return false; } n.setName( "tr.B3RJ64" ); - if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "B3RJ64" ) ) { + if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "B3RJ64" ) ) { return false; } n.setName( "tr=B3RJ64" ); - if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "B3RJ64" ) ) { + if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "B3RJ64" ) ) { return false; } n.setName( "tr-B3RJ64" ); - if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "B3RJ64" ) ) { + if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "B3RJ64" ) ) { return false; } n.setName( "tr/B3RJ64" ); - if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "B3RJ64" ) ) { + if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "B3RJ64" ) ) { return false; } n.setName( "tr\\B3RJ64" ); - if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "B3RJ64" ) ) { + if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "B3RJ64" ) ) { return false; } n.setName( "tr_B3RJ64" ); - if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "B3RJ64" ) ) { + if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "B3RJ64" ) ) { return false; } n.setName( " tr|B3RJ64 " ); - if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "B3RJ64" ) ) { + if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "B3RJ64" ) ) { return false; } n.setName( "-tr|B3RJ64-" ); - if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "B3RJ64" ) ) { + if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "B3RJ64" ) ) { return false; } n.setName( "-tr=B3RJ64-" ); - if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "B3RJ64" ) ) { + if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "B3RJ64" ) ) { return false; } n.setName( "_tr=B3RJ64_" ); - if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "B3RJ64" ) ) { + if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "B3RJ64" ) ) { return false; } n.setName( " tr_tr|B3RJ64_sp|123 " ); - if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "B3RJ64" ) ) { + if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "B3RJ64" ) ) { return false; } - n.setName( "sp|B3RJ64" ); - if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "B3RJ64" ) ) { + n.setName( "B3RJ64" ); + if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "B3RJ64" ) ) { return false; } - n.setName( "ssp|B3RJ64" ); - if ( ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ) != null ) { + n.setName( "sp|B3RJ64" ); + if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "B3RJ64" ) ) { return false; } n.setName( "sp|B3RJ64C" ); - if ( ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ) != null ) { + if ( SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ) != null ) { return false; } n.setName( "sp B3RJ64" ); - if ( ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ) != null ) { + if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "B3RJ64" ) ) { return false; } n.setName( "sp|B3RJ6X" ); - if ( ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ) != null ) { + if ( SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ) != null ) { return false; } n.setName( "sp|B3RJ6" ); - if ( ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ) != null ) { + if ( SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ) != null ) { return false; } n.setName( "K1PYK7_CRAGI" ); - if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "K1PYK7_CRAGI" ) ) { + if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "K1PYK7_CRAGI" ) ) { return false; } n.setName( "K1PYK7_PEA" ); - if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "K1PYK7_PEA" ) ) { + if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "K1PYK7_PEA" ) ) { return false; } n.setName( "K1PYK7_RAT" ); - if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "K1PYK7_RAT" ) ) { + if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "K1PYK7_RAT" ) ) { return false; } n.setName( "K1PYK7_PIG" ); - if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "K1PYK7_PIG" ) ) { + if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "K1PYK7_PIG" ) ) { return false; } n.setName( "~K1PYK7_PIG~" ); - if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "K1PYK7_PIG" ) ) { + if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "K1PYK7_PIG" ) ) { return false; } n.setName( "123456_ECOLI-K1PYK7_CRAGI-sp" ); - if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "K1PYK7_CRAGI" ) ) { + if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "K1PYK7_CRAGI" ) ) { return false; } n.setName( "K1PYKX_CRAGI" ); - if ( ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ) != null ) { + if ( SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ) != null ) { return false; } n.setName( "XXXXX_CRAGI" ); - if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "XXXXX_CRAGI" ) ) { + if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "XXXXX_CRAGI" ) ) { return false; } n.setName( "tr|H3IB65|H3IB65_STRPU~2-2" ); - if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "H3IB65" ) ) { + if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "H3IB65" ) ) { return false; } n.setName( "jgi|Lacbi2|181470|Lacbi1.estExt_GeneWisePlus_human.C_10729~2-3" ); - if ( ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ) != null ) { + if ( SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ) != null ) { return false; } n.setName( "sp|Q86U06|RBM23_HUMAN~2-2" ); - if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "Q86U06" ) ) { + if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "Q86U06" ) ) { return false; } n = new PhylogenyNode(); org.forester.phylogeny.data.Sequence seq = new org.forester.phylogeny.data.Sequence(); seq.setSymbol( "K1PYK7_CRAGI" ); n.getNodeData().addSequence( seq ); - if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "K1PYK7_CRAGI" ) ) { + if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "K1PYK7_CRAGI" ) ) { return false; } seq.setSymbol( "tr|B3RJ64" ); - if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "B3RJ64" ) ) { + if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "B3RJ64" ) ) { return false; } n = new PhylogenyNode(); seq = new org.forester.phylogeny.data.Sequence(); seq.setName( "K1PYK7_CRAGI" ); n.getNodeData().addSequence( seq ); - if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "K1PYK7_CRAGI" ) ) { + if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "K1PYK7_CRAGI" ) ) { return false; } seq.setName( "tr|B3RJ64" ); - if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "B3RJ64" ) ) { + if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "B3RJ64" ) ) { return false; } n = new PhylogenyNode(); seq = new org.forester.phylogeny.data.Sequence(); seq.setAccession( new Accession( "K1PYK8_CRAGI", "?" ) ); n.getNodeData().addSequence( seq ); - if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "K1PYK8_CRAGI" ) ) { + if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "K1PYK8_CRAGI" ) ) { return false; } n = new PhylogenyNode(); seq = new org.forester.phylogeny.data.Sequence(); seq.setAccession( new Accession( "tr|B3RJ64", "?" ) ); n.getNodeData().addSequence( seq ); - if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "B3RJ64" ) ) { + if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "B3RJ64" ) ) { return false; } // n = new PhylogenyNode(); n.setName( "ACP19736" ); - if ( !ForesterUtil.extractGenbankAccessor( n ).equals( "ACP19736" ) ) { + if ( !SequenceAccessionTools.obtainGenbankAccessorFromDataFields( n ).equals( "ACP19736" ) ) { return false; } n = new PhylogenyNode(); - n.setName( "_ACP19736_" ); - if ( !ForesterUtil.extractGenbankAccessor( n ).equals( "ACP19736" ) ) { + n.setName( "|ACP19736|" ); + if ( !SequenceAccessionTools.obtainGenbankAccessorFromDataFields( n ).equals( "ACP19736" ) ) { return false; } } @@ -4067,6 +4427,56 @@ public final class Test { return true; } + private static boolean testGenbankAccessorParsing() { + //The format for GenBank Accession numbers are: + //Nucleotide: 1 letter + 5 numerals OR 2 letters + 6 numerals + //Protein: 3 letters + 5 numerals + //http://www.ncbi.nlm.nih.gov/Sequin/acc.html + if ( !SequenceAccessionTools.parseGenbankAccessorFromString( "AY423861" ).equals( "AY423861" ) ) { + return false; + } + if ( !SequenceAccessionTools.parseGenbankAccessorFromString( ".AY423861.2" ).equals( "AY423861.2" ) ) { + return false; + } + if ( !SequenceAccessionTools.parseGenbankAccessorFromString( "345_.AY423861.24_345" ).equals( "AY423861.24" ) ) { + return false; + } + if ( SequenceAccessionTools.parseGenbankAccessorFromString( "AAY423861" ) != null ) { + return false; + } + if ( SequenceAccessionTools.parseGenbankAccessorFromString( "AY4238612" ) != null ) { + return false; + } + if ( SequenceAccessionTools.parseGenbankAccessorFromString( "AAY4238612" ) != null ) { + return false; + } + if ( SequenceAccessionTools.parseGenbankAccessorFromString( "Y423861" ) != null ) { + return false; + } + if ( !SequenceAccessionTools.parseGenbankAccessorFromString( "S12345" ).equals( "S12345" ) ) { + return false; + } + if ( !SequenceAccessionTools.parseGenbankAccessorFromString( "|S12345|" ).equals( "S12345" ) ) { + return false; + } + if ( SequenceAccessionTools.parseGenbankAccessorFromString( "|S123456" ) != null ) { + return false; + } + if ( SequenceAccessionTools.parseGenbankAccessorFromString( "ABC123456" ) != null ) { + return false; + } + if ( !SequenceAccessionTools.parseGenbankAccessorFromString( "ABC12345" ).equals( "ABC12345" ) ) { + return false; + } + if ( !SequenceAccessionTools.parseGenbankAccessorFromString( "&ABC12345&" ).equals( "ABC12345" ) ) { + return false; + } + if ( SequenceAccessionTools.parseGenbankAccessorFromString( "ABCD12345" ) != null ) { + return false; + } + return true; + } + private static boolean testGeneralMsaParser() { try { final String msa_str_0 = "seq1 abcd\n\nseq2 efgh\n"; @@ -4590,14 +5000,15 @@ public final class Test { private static boolean testGetLCA2() { try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); - final Phylogeny p_a = factory.create( "(a)", new NHXParser() )[ 0 ]; + // final Phylogeny p_a = factory.create( "(a)", new NHXParser() )[ 0 ]; + final Phylogeny p_a = NHXParser.parse( "(a)" )[ 0 ]; PhylogenyMethods.preOrderReId( p_a ); final PhylogenyNode p_a_1 = PhylogenyMethods.calculateLCAonTreeWithIdsInPreOrder( p_a.getNode( "a" ), p_a.getNode( "a" ) ); if ( !p_a_1.getName().equals( "a" ) ) { return false; } - final Phylogeny p_b = factory.create( "((a)b)", new NHXParser() )[ 0 ]; + final Phylogeny p_b = NHXParser.parse( "((a)b)" )[ 0 ]; PhylogenyMethods.preOrderReId( p_b ); final PhylogenyNode p_b_1 = PhylogenyMethods.calculateLCAonTreeWithIdsInPreOrder( p_b.getNode( "b" ), p_b.getNode( "a" ) ); @@ -4936,7 +5347,10 @@ public final class Test { if ( parser2.getDomainsIgnoredDueToDuf() != 0 ) { return false; } - if ( parser2.getDomainsIgnoredDueToEval() != 0 ) { + if ( parser2.getDomainsIgnoredDueToFsEval() != 0 ) { + return false; + } + if ( parser2.getDomainsIgnoredDueToIEval() != 0 ) { return false; } final Protein p1 = proteins.get( 0 ); @@ -4976,12 +5390,6 @@ public final class Test { if ( !Test.isEqual( p4.getProteinDomain( 0 ).getPerDomainScore(), 135.7 ) ) { return false; } - if ( !Test.isEqual( p4.getProteinDomain( 0 ).getPerSequenceEvalue(), 8.3e-40 ) ) { - return false; - } - if ( !Test.isEqual( p4.getProteinDomain( 0 ).getPerSequenceScore(), 136.3 ) ) { - return false; - } if ( !Test.isEqual( p4.getProteinDomain( 0 ).getNumber(), 1 ) ) { return false; } @@ -5290,10 +5698,10 @@ public final class Test { private static boolean testMsaQualityMethod() { try { - final Sequence s0 = BasicSequence.createAaSequence( "a", "ABAXEFGHIJ" ); - final Sequence s1 = BasicSequence.createAaSequence( "b", "ABBXEFGHIJ" ); - final Sequence s2 = BasicSequence.createAaSequence( "c", "AXCXEFGHIJ" ); - final Sequence s3 = BasicSequence.createAaSequence( "d", "AXDDEFGHIJ" ); + final Sequence s0 = BasicSequence.createAaSequence( "a", "ABAXEFGHIJJE-" ); + final Sequence s1 = BasicSequence.createAaSequence( "b", "ABBXEFGHIJJBB" ); + final Sequence s2 = BasicSequence.createAaSequence( "c", "AXCXEFGHIJJ--" ); + final Sequence s3 = BasicSequence.createAaSequence( "d", "AXDDEFGHIJ---" ); final List l = new ArrayList(); l.add( s0 ); l.add( s1 ); @@ -5312,6 +5720,15 @@ public final class Test { if ( !isEqual( 0.75, MsaMethods.calculateIdentityRatio( msa, 3 ) ) ) { return false; } + if ( !isEqual( 0.75, MsaMethods.calculateIdentityRatio( msa, 10 ) ) ) { + return false; + } + if ( !isEqual( 0.25, MsaMethods.calculateIdentityRatio( msa, 11 ) ) ) { + return false; + } + if ( !isEqual( 0.25, MsaMethods.calculateIdentityRatio( msa, 12 ) ) ) { + return false; + } } catch ( final Exception e ) { e.printStackTrace( System.out ); @@ -7659,6 +8076,67 @@ public final class Test { if ( p.next() != null ) { return false; } + // + final String p30_str = "(A,B);(C,D)"; + final NHXParser p30 = new NHXParser(); + p30.setSource( p30_str ); + if ( !p30.hasNext() ) { + return false; + } + Phylogeny phy30 = p30.next(); + if ( !phy30.toNewHampshire().equals( "(A,B);" ) ) { + System.out.println( phy30.toNewHampshire() ); + return false; + } + if ( !p30.hasNext() ) { + return false; + } + Phylogeny phy301 = p30.next(); + if ( !phy301.toNewHampshire().equals( "(C,D);" ) ) { + System.out.println( phy301.toNewHampshire() ); + return false; + } + if ( p30.hasNext() ) { + return false; + } + if ( p30.hasNext() ) { + return false; + } + if ( p30.next() != null ) { + return false; + } + if ( p30.next() != null ) { + return false; + } + p30.reset(); + if ( !p30.hasNext() ) { + return false; + } + phy30 = p30.next(); + if ( !phy30.toNewHampshire().equals( "(A,B);" ) ) { + System.out.println( phy30.toNewHampshire() ); + return false; + } + if ( !p30.hasNext() ) { + return false; + } + phy301 = p30.next(); + if ( !phy301.toNewHampshire().equals( "(C,D);" ) ) { + System.out.println( phy301.toNewHampshire() ); + return false; + } + if ( p30.hasNext() ) { + return false; + } + if ( p30.hasNext() ) { + return false; + } + if ( p30.next() != null ) { + return false; + } + if ( p30.next() != null ) { + return false; + } } catch ( final Exception e ) { e.printStackTrace( System.out ); @@ -8373,7 +8851,7 @@ public final class Test { // Do nothing -- means were not running from jar. } if ( xml_parser == null ) { - xml_parser = new PhyloXmlParser(); + xml_parser = PhyloXmlParser.createPhyloXmlParser(); if ( USE_LOCAL_PHYLOXML_SCHEMA ) { xml_parser.setValidateAgainstSchema( PHYLOXML_LOCAL_XSD ); } @@ -9818,125 +10296,298 @@ public final class Test { return true; } - private static boolean testSequenceIdParsing() { + private static boolean testSequenceDbWsTools1() { try { - Identifier id = SequenceIdParser.parse( "gb_ADF31344_segmented_worms_" ); - if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getProvider() ) - || !id.getValue().equals( "ADF31344" ) || !id.getProvider().equals( "ncbi" ) ) { - if ( id != null ) { - System.out.println( "value =" + id.getValue() ); - System.out.println( "provider=" + id.getProvider() ); - } + final PhylogenyNode n = new PhylogenyNode(); + n.setName( "NP_001025424" ); + Accession acc = SequenceDbWsTools.obtainSeqAccession( n ); + if ( ( acc == null ) || !acc.getSource().equals( Source.REFSEQ.toString() ) + || !acc.getValue().equals( "NP_001025424" ) ) { return false; } - // - id = SequenceIdParser.parse( "segmented worms|gb_ADF31344" ); - if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getProvider() ) - || !id.getValue().equals( "ADF31344" ) || !id.getProvider().equals( "ncbi" ) ) { + n.setName( "340 0559 -- _NP_001025424_dsfdg15 05" ); + acc = SequenceDbWsTools.obtainSeqAccession( n ); + if ( ( acc == null ) || !acc.getSource().equals( Source.REFSEQ.toString() ) + || !acc.getValue().equals( "NP_001025424" ) ) { + return false; + } + n.setName( "NP_001025424.1" ); + acc = SequenceDbWsTools.obtainSeqAccession( n ); + if ( ( acc == null ) || !acc.getSource().equals( Source.REFSEQ.toString() ) + || !acc.getValue().equals( "NP_001025424" ) ) { + return false; + } + n.setName( "NM_001030253" ); + acc = SequenceDbWsTools.obtainSeqAccession( n ); + if ( ( acc == null ) || !acc.getSource().equals( Source.REFSEQ.toString() ) + || !acc.getValue().equals( "NM_001030253" ) ) { + return false; + } + n.setName( "BCL2_HUMAN" ); + acc = SequenceDbWsTools.obtainSeqAccession( n ); + if ( ( acc == null ) || !acc.getSource().equals( Source.UNIPROT.toString() ) + || !acc.getValue().equals( "BCL2_HUMAN" ) ) { + System.out.println( acc.toString() ); + return false; + } + n.setName( "P10415" ); + acc = SequenceDbWsTools.obtainSeqAccession( n ); + if ( ( acc == null ) || !acc.getSource().equals( Source.UNIPROT.toString() ) + || !acc.getValue().equals( "P10415" ) ) { + System.out.println( acc.toString() ); + return false; + } + n.setName( " P10415 " ); + acc = SequenceDbWsTools.obtainSeqAccession( n ); + if ( ( acc == null ) || !acc.getSource().equals( Source.UNIPROT.toString() ) + || !acc.getValue().equals( "P10415" ) ) { + System.out.println( acc.toString() ); + return false; + } + n.setName( "_P10415|" ); + acc = SequenceDbWsTools.obtainSeqAccession( n ); + if ( ( acc == null ) || !acc.getSource().equals( Source.UNIPROT.toString() ) + || !acc.getValue().equals( "P10415" ) ) { + System.out.println( acc.toString() ); + return false; + } + n.setName( "AY695820" ); + acc = SequenceDbWsTools.obtainSeqAccession( n ); + if ( ( acc == null ) || !acc.getSource().equals( Source.NCBI.toString() ) + || !acc.getValue().equals( "AY695820" ) ) { + System.out.println( acc.toString() ); + return false; + } + n.setName( "_AY695820_" ); + acc = SequenceDbWsTools.obtainSeqAccession( n ); + if ( ( acc == null ) || !acc.getSource().equals( Source.NCBI.toString() ) + || !acc.getValue().equals( "AY695820" ) ) { + System.out.println( acc.toString() ); + return false; + } + n.setName( "AAA59452" ); + acc = SequenceDbWsTools.obtainSeqAccession( n ); + if ( ( acc == null ) || !acc.getSource().equals( Source.NCBI.toString() ) + || !acc.getValue().equals( "AAA59452" ) ) { + System.out.println( acc.toString() ); + return false; + } + n.setName( "_AAA59452_" ); + acc = SequenceDbWsTools.obtainSeqAccession( n ); + if ( ( acc == null ) || !acc.getSource().equals( Source.NCBI.toString() ) + || !acc.getValue().equals( "AAA59452" ) ) { + System.out.println( acc.toString() ); + return false; + } + n.setName( "AAA59452.1" ); + acc = SequenceDbWsTools.obtainSeqAccession( n ); + if ( ( acc == null ) || !acc.getSource().equals( Source.NCBI.toString() ) + || !acc.getValue().equals( "AAA59452.1" ) ) { + System.out.println( acc.toString() ); + return false; + } + n.setName( "_AAA59452.1_" ); + acc = SequenceDbWsTools.obtainSeqAccession( n ); + if ( ( acc == null ) || !acc.getSource().equals( Source.NCBI.toString() ) + || !acc.getValue().equals( "AAA59452.1" ) ) { + System.out.println( acc.toString() ); + return false; + } + n.setName( "GI:94894583" ); + acc = SequenceDbWsTools.obtainSeqAccession( n ); + if ( ( acc == null ) || !acc.getSource().equals( Source.GI.toString() ) + || !acc.getValue().equals( "94894583" ) ) { + System.out.println( acc.toString() ); + return false; + } + n.setName( "gi|71845847|1,4-alpha-glucan branching enzyme [Dechloromonas aromatica RCB]" ); + acc = SequenceDbWsTools.obtainSeqAccession( n ); + if ( ( acc == null ) || !acc.getSource().equals( Source.GI.toString() ) + || !acc.getValue().equals( "71845847" ) ) { + System.out.println( acc.toString() ); + return false; + } + n.setName( "gi|71845847|gb|AAZ45343.1| 1,4-alpha-glucan branching enzyme [Dechloromonas aromatica RCB]" ); + acc = SequenceDbWsTools.obtainSeqAccession( n ); + if ( ( acc == null ) || !acc.getSource().equals( Source.NCBI.toString() ) + || !acc.getValue().equals( "AAZ45343.1" ) ) { + System.out.println( acc.toString() ); + return false; + } + } + catch ( final Exception e ) { + return false; + } + return true; + } + + private static boolean testSequenceDbWsTools2() { + try { + final PhylogenyNode n1 = new PhylogenyNode( "NP_001025424" ); + SequenceDbWsTools.obtainSeqInformation( n1 ); + if ( !n1.getNodeData().getSequence().getName().equals( "Bcl2" ) ) { + return false; + } + if ( !n1.getNodeData().getTaxonomy().getScientificName().equals( "Danio rerio" ) ) { + return false; + } + if ( !n1.getNodeData().getSequence().getAccession().getSource().equals( Source.REFSEQ.toString() ) ) { + return false; + } + if ( !n1.getNodeData().getSequence().getAccession().getValue().equals( "NP_001025424" ) ) { + return false; + } + final PhylogenyNode n2 = new PhylogenyNode( "NM_001030253" ); + SequenceDbWsTools.obtainSeqInformation( n2 ); + if ( !n2.getNodeData().getSequence().getName() + .equals( "Danio rerio B-cell leukemia/lymphoma 2 (bcl2), mRNA" ) ) { + return false; + } + if ( !n2.getNodeData().getTaxonomy().getScientificName().equals( "Danio rerio" ) ) { + return false; + } + if ( !n2.getNodeData().getSequence().getAccession().getSource().equals( Source.REFSEQ.toString() ) ) { + return false; + } + if ( !n2.getNodeData().getSequence().getAccession().getValue().equals( "NM_001030253" ) ) { + return false; + } + final PhylogenyNode n3 = new PhylogenyNode( "NM_184234.2" ); + SequenceDbWsTools.obtainSeqInformation( n3 ); + if ( !n3.getNodeData().getSequence().getName() + .equals( "Homo sapiens RNA binding motif protein 39 (RBM39), transcript variant 1, mRNA" ) ) { + return false; + } + if ( !n3.getNodeData().getTaxonomy().getScientificName().equals( "Homo sapiens" ) ) { + return false; + } + if ( !n3.getNodeData().getSequence().getAccession().getSource().equals( Source.REFSEQ.toString() ) ) { + return false; + } + if ( !n3.getNodeData().getSequence().getAccession().getValue().equals( "NM_184234" ) ) { + return false; + } + } + catch ( final IOException e ) { + System.out.println(); + System.out.println( "the following might be due to absence internet connection:" ); + e.printStackTrace( System.out ); + return true; + } + catch ( final Exception e ) { + e.printStackTrace(); + return false; + } + return true; + } + + private static boolean testSequenceIdParsing() { + try { + Accession id = SequenceAccessionTools.parseAccessorFromString( "gb_ADF31344_segmented_worms_" ); + if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getSource() ) + || !id.getValue().equals( "ADF31344" ) || !id.getSource().equals( "ncbi" ) ) { if ( id != null ) { System.out.println( "value =" + id.getValue() ); - System.out.println( "provider=" + id.getProvider() ); + System.out.println( "provider=" + id.getSource() ); } return false; } // - id = SequenceIdParser.parse( "segmented worms gb_ADF31344 and more" ); - if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getProvider() ) - || !id.getValue().equals( "ADF31344" ) || !id.getProvider().equals( "ncbi" ) ) { + id = SequenceAccessionTools.parseAccessorFromString( "segmented worms|gb_ADF31344" ); + if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getSource() ) + || !id.getValue().equals( "ADF31344" ) || !id.getSource().equals( "ncbi" ) ) { if ( id != null ) { System.out.println( "value =" + id.getValue() ); - System.out.println( "provider=" + id.getProvider() ); + System.out.println( "provider=" + id.getSource() ); } return false; } - // - id = SequenceIdParser.parse( "gb_AAA96518_1" ); - if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getProvider() ) - || !id.getValue().equals( "AAA96518" ) || !id.getProvider().equals( "ncbi" ) ) { + // + id = SequenceAccessionTools.parseAccessorFromString( "segmented worms gb_ADF31344 and more" ); + if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getSource() ) + || !id.getValue().equals( "ADF31344" ) || !id.getSource().equals( "ncbi" ) ) { if ( id != null ) { System.out.println( "value =" + id.getValue() ); - System.out.println( "provider=" + id.getProvider() ); + System.out.println( "provider=" + id.getSource() ); } return false; } // - id = SequenceIdParser.parse( "gb_EHB07727_1_rodents_" ); - if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getProvider() ) - || !id.getValue().equals( "EHB07727" ) || !id.getProvider().equals( "ncbi" ) ) { + id = SequenceAccessionTools.parseAccessorFromString( "gb_AAA96518_1" ); + if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getSource() ) + || !id.getValue().equals( "AAA96518" ) || !id.getSource().equals( "ncbi" ) ) { if ( id != null ) { System.out.println( "value =" + id.getValue() ); - System.out.println( "provider=" + id.getProvider() ); + System.out.println( "provider=" + id.getSource() ); } return false; } // - id = SequenceIdParser.parse( "dbj_BAF37827_1_turtles_" ); - if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getProvider() ) - || !id.getValue().equals( "BAF37827" ) || !id.getProvider().equals( "ncbi" ) ) { + id = SequenceAccessionTools.parseAccessorFromString( "gb_EHB07727_1_rodents_" ); + if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getSource() ) + || !id.getValue().equals( "EHB07727" ) || !id.getSource().equals( "ncbi" ) ) { if ( id != null ) { System.out.println( "value =" + id.getValue() ); - System.out.println( "provider=" + id.getProvider() ); + System.out.println( "provider=" + id.getSource() ); } return false; } // - id = SequenceIdParser.parse( "emb_CAA73223_1_primates_" ); - if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getProvider() ) - || !id.getValue().equals( "CAA73223" ) || !id.getProvider().equals( "ncbi" ) ) { + id = SequenceAccessionTools.parseAccessorFromString( "dbj_BAF37827_1_turtles_" ); + if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getSource() ) + || !id.getValue().equals( "BAF37827" ) || !id.getSource().equals( "ncbi" ) ) { if ( id != null ) { System.out.println( "value =" + id.getValue() ); - System.out.println( "provider=" + id.getProvider() ); + System.out.println( "provider=" + id.getSource() ); } return false; } // - id = SequenceIdParser.parse( "mites|ref_XP_002434188_1" ); - if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getProvider() ) - || !id.getValue().equals( "XP_002434188" ) || !id.getProvider().equals( "refseq" ) ) { + id = SequenceAccessionTools.parseAccessorFromString( "emb_CAA73223_1_primates_" ); + if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getSource() ) + || !id.getValue().equals( "CAA73223" ) || !id.getSource().equals( "ncbi" ) ) { if ( id != null ) { System.out.println( "value =" + id.getValue() ); - System.out.println( "provider=" + id.getProvider() ); + System.out.println( "provider=" + id.getSource() ); } return false; } // - id = SequenceIdParser.parse( "mites_ref_XP_002434188_1_bla_XP_12345" ); - if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getProvider() ) - || !id.getValue().equals( "XP_002434188" ) || !id.getProvider().equals( "refseq" ) ) { + id = SequenceAccessionTools.parseAccessorFromString( "mites|ref_XP_002434188_1" ); + if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getSource() ) + || !id.getValue().equals( "XP_002434188" ) || !id.getSource().equals( "refseq" ) ) { if ( id != null ) { System.out.println( "value =" + id.getValue() ); - System.out.println( "provider=" + id.getProvider() ); + System.out.println( "provider=" + id.getSource() ); } return false; } // - id = SequenceIdParser.parse( "P4A123" ); - if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getProvider() ) - || !id.getValue().equals( "P4A123" ) || !id.getProvider().equals( "sp" ) ) { + id = SequenceAccessionTools.parseAccessorFromString( "mites_ref_XP_002434188_1_bla_XP_12345" ); + if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getSource() ) + || !id.getValue().equals( "XP_002434188" ) || !id.getSource().equals( "refseq" ) ) { if ( id != null ) { System.out.println( "value =" + id.getValue() ); - System.out.println( "provider=" + id.getProvider() ); + System.out.println( "provider=" + id.getSource() ); } return false; } // - id = SequenceIdParser.parse( "pllf[pok P4A123_osdjfosnqo035-9233332904i000490 vf tmv x45" ); - if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getProvider() ) - || !id.getValue().equals( "P4A123" ) || !id.getProvider().equals( "sp" ) ) { + id = SequenceAccessionTools.parseAccessorFromString( "P4A123" ); + if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getSource() ) + || !id.getValue().equals( "P4A123" ) || !id.getSource().equals( "uniprot" ) ) { if ( id != null ) { System.out.println( "value =" + id.getValue() ); - System.out.println( "provider=" + id.getProvider() ); + System.out.println( "provider=" + id.getSource() ); } return false; } - // - id = SequenceIdParser.parse( "XP_12345" ); + id = SequenceAccessionTools.parseAccessorFromString( "XP_12345" ); if ( id != null ) { System.out.println( "value =" + id.getValue() ); - System.out.println( "provider=" + id.getProvider() ); + System.out.println( "provider=" + id.getSource() ); return false; } - // lcl_91970_unknown_ } catch ( final Exception e ) { e.printStackTrace( System.out ); @@ -11011,6 +11662,41 @@ public final class Test { return true; } + private static boolean testTreeCopy() { + try { + final String str_0 = "((((a,b),c),d)[&&NHX:S=lizards],e[&&NHX:S=reptiles])r[&&NHX:S=animals]"; + final Phylogeny t0 = Phylogeny.createInstanceFromNhxString( str_0 ); + final Phylogeny t1 = t0.copy(); + if ( !t1.toNewHampshireX().equals( t0.toNewHampshireX() ) ) { + return false; + } + if ( !t1.toNewHampshireX().equals( str_0 ) ) { + return false; + } + t0.deleteSubtree( t0.getNode( "c" ), true ); + t0.deleteSubtree( t0.getNode( "a" ), true ); + t0.getRoot().getNodeData().getTaxonomy().setScientificName( "metazoa" ); + t0.getNode( "b" ).setName( "Bee" ); + if ( !t0.toNewHampshireX().equals( "((Bee,d)[&&NHX:S=lizards],e[&&NHX:S=reptiles])r[&&NHX:S=metazoa]" ) ) { + return false; + } + if ( !t1.toNewHampshireX().equals( str_0 ) ) { + return false; + } + t0.deleteSubtree( t0.getNode( "e" ), true ); + t0.deleteSubtree( t0.getNode( "Bee" ), true ); + t0.deleteSubtree( t0.getNode( "d" ), true ); + if ( !t1.toNewHampshireX().equals( str_0 ) ) { + return false; + } + } + catch ( final Exception e ) { + e.printStackTrace(); + return false; + } + return true; + } + private static boolean testTreeMethods() { try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); @@ -11051,6 +11737,12 @@ public final class Test { if ( !entry.getSequenceName().equals( "Aspartate aminotransferase, mitochondrial" ) ) { return false; } + if ( !entry.getSequenceSymbol().equals( "mAspAT" ) ) { + return false; + } + if ( !entry.getGeneName().equals( "GOT2" ) ) { + return false; + } if ( !entry.getTaxonomyIdentifier().equals( "9986" ) ) { return false; }