X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Ftest%2FTest.java;h=ca8df7e73d527778b3411119f7f21d2d6f7a278a;hb=b80a84de8b4d07847496bc04f51b45bacd146ff3;hp=e970ca71d92f757d4b863f8a2189e3fbf00a742f;hpb=ce9e2773527fda17dcd058c8432a3bffbfadecdc;p=jalview.git diff --git a/forester/java/src/org/forester/test/Test.java b/forester/java/src/org/forester/test/Test.java index e970ca7..ca8df7e 100644 --- a/forester/java/src/org/forester/test/Test.java +++ b/forester/java/src/org/forester/test/Test.java @@ -36,9 +36,11 @@ import java.util.Iterator; import java.util.List; import java.util.Locale; import java.util.Set; +import java.util.SortedSet; +import java.util.TreeSet; import org.forester.application.support_transfer; -import org.forester.archaeopteryx.AptxUtil; +import org.forester.archaeopteryx.TreePanelUtil; import org.forester.development.DevelopmentTools; import org.forester.evoinference.TestPhylogenyReconstruction; import org.forester.evoinference.matrix.character.CharacterStateMatrix; @@ -70,6 +72,7 @@ import org.forester.phylogeny.PhylogenyMethods; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.PhylogenyNode.NH_CONVERSION_SUPPORT_VALUE_STYLE; import org.forester.phylogeny.data.Accession; +import org.forester.phylogeny.data.Accession.Source; import org.forester.phylogeny.data.BinaryCharacters; import org.forester.phylogeny.data.BranchWidth; import org.forester.phylogeny.data.Confidence; @@ -91,7 +94,6 @@ import org.forester.phylogeny.iterators.PhylogenyNodeIterator; import org.forester.protein.BasicDomain; import org.forester.protein.BasicProtein; import org.forester.protein.Domain; -import org.forester.protein.DomainId; import org.forester.protein.Protein; import org.forester.protein.ProteinId; import org.forester.rio.TestRIO; @@ -114,7 +116,7 @@ import org.forester.util.DescriptiveStatistics; import org.forester.util.ForesterConstants; import org.forester.util.ForesterUtil; import org.forester.util.GeneralTable; -import org.forester.util.SequenceIdParser; +import org.forester.util.SequenceAccessionTools; import org.forester.ws.seqdb.SequenceDatabaseEntry; import org.forester.ws.seqdb.SequenceDbWsTools; import org.forester.ws.seqdb.UniProtTaxonomy; @@ -172,15 +174,15 @@ public final class Test { System.exit( -1 ); } final long start_time = new Date().getTime(); - System.out.print( "Domain id: " ); - if ( !testDomainId() ) { - System.out.println( "failed." ); - failed++; + System.out.print( "Basic node methods: " ); + if ( Test.testBasicNodeMethods() ) { + System.out.println( "OK." ); + succeeded++; } else { - succeeded++; + System.out.println( "failed." ); + failed++; } - System.out.println( "OK." ); System.out.print( "Protein id: " ); if ( !testProteinId() ) { System.out.println( "failed." ); @@ -235,8 +237,8 @@ public final class Test { System.out.println( "failed." ); failed++; } - System.out.print( "Hmmscan output parser: " ); - if ( testHmmscanOutputParser() ) { + System.out.print( "UniProtKB id extraction: " ); + if ( Test.testExtractUniProtKbProteinSeqIdentifier() ) { System.out.println( "OK." ); succeeded++; } @@ -244,8 +246,8 @@ public final class Test { System.out.println( "failed." ); failed++; } - System.out.print( "Basic node methods: " ); - if ( Test.testBasicNodeMethods() ) { + System.out.print( "Sequence DB tools 1: " ); + if ( testSequenceDbWsTools1() ) { System.out.println( "OK." ); succeeded++; } @@ -253,17 +255,19 @@ public final class Test { System.out.println( "failed." ); failed++; } - System.out.print( "Taxonomy code extraction: " ); - if ( Test.testExtractTaxonomyCodeFromNodeName() ) { + System.out.print( "Sequence DB tools 2: " ); + if ( testSequenceDbWsTools2() ) { System.out.println( "OK." ); succeeded++; } else { System.out.println( "failed." ); failed++; + System.exit( -1 ); } - System.out.print( "SN extraction: " ); - if ( Test.testExtractSNFromNodeName() ) { + System.exit( 0 ); + System.out.print( "Hmmscan output parser: " ); + if ( testHmmscanOutputParser() ) { System.out.println( "OK." ); succeeded++; } @@ -271,8 +275,8 @@ public final class Test { System.out.println( "failed." ); failed++; } - System.out.print( "Taxonomy extraction (general): " ); - if ( Test.testTaxonomyExtraction() ) { + System.out.print( "Taxonomy code extraction: " ); + if ( Test.testExtractTaxonomyCodeFromNodeName() ) { System.out.println( "OK." ); succeeded++; } @@ -280,8 +284,17 @@ public final class Test { System.out.println( "failed." ); failed++; } - System.out.print( "UniProtKB id extraction: " ); - if ( Test.testExtractUniProtKbProteinSeqIdentifier() ) { + System.out.print( "SN extraction: " ); + if ( Test.testExtractSNFromNodeName() ) { + System.out.println( "OK." ); + succeeded++; + } + else { + System.out.println( "failed." ); + failed++; + } + System.out.print( "Taxonomy extraction (general): " ); + if ( Test.testTaxonomyExtraction() ) { System.out.println( "OK." ); succeeded++; } @@ -953,7 +966,7 @@ public final class Test { private static boolean testBasicDomain() { try { final Domain pd = new BasicDomain( "id", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); - if ( !pd.getDomainId().getId().equals( "id" ) ) { + if ( !pd.getDomainId().equals( "id" ) ) { return false; } if ( pd.getNumber() != 1 ) { @@ -997,7 +1010,7 @@ public final class Test { if ( a1.compareTo( a2 ) != 0 ) { return false; } - if ( a1.compareTo( a3 ) != 0 ) { + if ( a1.compareTo( a3 ) == 0 ) { return false; } } @@ -1218,6 +1231,22 @@ public final class Test { if ( !( t3.getNode( "root node" ).getNodeData().getSequence().getLocation().equals( "12p13-p12" ) ) ) { return false; } + final SortedSet x = t3.getNode( "root node" ).getNodeData().getSequence().getCrossReferences(); + if ( x.size() != 4 ) { + return false; + } + int c = 0; + for( final Accession acc : x ) { + if ( c == 0 ) { + if ( !acc.getSource().equals( "KEGG" ) ) { + return false; + } + if ( !acc.getValue().equals( "hsa:596" ) ) { + return false; + } + } + c++; + } } catch ( final Exception e ) { e.printStackTrace( System.out ); @@ -1497,7 +1526,6 @@ public final class Test { } if ( ( ( BinaryCharacters ) t3_rt.getNode( "node bb" ).getNodeData().getBinaryCharacters().copy() ) .getLostCount() != BinaryCharacters.COUNT_DEFAULT ) { - ; return false; } if ( t3_rt.getNode( "node b" ).getNodeData().getBinaryCharacters().getGainedCount() != 1 ) { @@ -1546,6 +1574,23 @@ public final class Test { .equalsIgnoreCase( "433" ) ) { return false; } + final SortedSet x = t3_rt.getNode( "root node" ).getNodeData().getSequence() + .getCrossReferences(); + if ( x.size() != 4 ) { + return false; + } + int c = 0; + for( final Accession acc : x ) { + if ( c == 0 ) { + if ( !acc.getSource().equals( "KEGG" ) ) { + return false; + } + if ( !acc.getValue().equals( "hsa:596" ) ) { + return false; + } + } + c++; + } } catch ( final Exception e ) { e.printStackTrace( System.out ); @@ -1699,264 +1744,235 @@ public final class Test { if ( !p0.toDomainArchitectureString( "~" ).equals( "a~b~c~d~e~x~y" ) ) { return false; } - // A0 A10 B15 A20 B25 A30 B35 B40 C50 A60 C70 D80 - final Domain A0 = new BasicDomain( "A", 0, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); - final Domain A10 = new BasicDomain( "A", 10, 11, ( short ) 1, ( short ) 4, 0.1, -12 ); - final Domain B15 = new BasicDomain( "B", 11, 16, ( short ) 1, ( short ) 4, 0.1, -12 ); - final Domain A20 = new BasicDomain( "A", 20, 100, ( short ) 1, ( short ) 4, 0.1, -12 ); - final Domain B25 = new BasicDomain( "B", 25, 26, ( short ) 1, ( short ) 4, 0.1, -12 ); - final Domain A30 = new BasicDomain( "A", 30, 31, ( short ) 1, ( short ) 4, 0.1, -12 ); - final Domain B35 = new BasicDomain( "B", 31, 40, ( short ) 1, ( short ) 4, 0.1, -12 ); - final Domain B40 = new BasicDomain( "B", 40, 600, ( short ) 1, ( short ) 4, 0.1, -12 ); - final Domain C50 = new BasicDomain( "C", 50, 59, ( short ) 1, ( short ) 4, 0.1, -12 ); - final Domain A60 = new BasicDomain( "A", 60, 395, ( short ) 1, ( short ) 4, 0.1, -12 ); - final Domain C70 = new BasicDomain( "C", 70, 71, ( short ) 1, ( short ) 4, 0.1, -12 ); - final Domain D80 = new BasicDomain( "D", 80, 81, ( short ) 1, ( short ) 4, 0.1, -12 ); - final BasicProtein p = new BasicProtein( "p", "owl", 0 ); - p.addProteinDomain( B15 ); - p.addProteinDomain( C50 ); - p.addProteinDomain( A60 ); - p.addProteinDomain( A30 ); - p.addProteinDomain( C70 ); - p.addProteinDomain( B35 ); - p.addProteinDomain( B40 ); - p.addProteinDomain( A0 ); - p.addProteinDomain( A10 ); - p.addProteinDomain( A20 ); - p.addProteinDomain( B25 ); - p.addProteinDomain( D80 ); - List domains_ids = new ArrayList(); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "B" ) ); - domains_ids.add( new DomainId( "C" ) ); - if ( !p.contains( domains_ids, false ) ) { + if ( !p0.toDomainArchitectureString( "~", 3, "=" ).equals( "a~b~c~d~e~x~y" ) ) { return false; } - if ( !p.contains( domains_ids, true ) ) { + // + final BasicProtein aa0 = new BasicProtein( "aa", "owl", 0 ); + final Domain a1 = new BasicDomain( "a", 1, 10, ( short ) 1, ( short ) 5, 0.1, -12 ); + aa0.addProteinDomain( a1 ); + if ( !aa0.toDomainArchitectureString( "~" ).equals( "a" ) ) { return false; } - domains_ids.add( new DomainId( "X" ) ); - if ( p.contains( domains_ids, false ) ) { + if ( !aa0.toDomainArchitectureString( "~", 3, "" ).equals( "a" ) ) { return false; } - if ( p.contains( domains_ids, true ) ) { + // + final BasicProtein aa1 = new BasicProtein( "aa", "owl", 0 ); + final Domain a11 = new BasicDomain( "a", 1, 10, ( short ) 1, ( short ) 5, 0.1, -12 ); + final Domain a12 = new BasicDomain( "a", 2, 20, ( short ) 1, ( short ) 5, 0.1, -12 ); + aa1.addProteinDomain( a11 ); + aa1.addProteinDomain( a12 ); + if ( !aa1.toDomainArchitectureString( "~" ).equals( "a~a" ) ) { return false; } - domains_ids = new ArrayList(); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "C" ) ); - domains_ids.add( new DomainId( "D" ) ); - if ( !p.contains( domains_ids, false ) ) { + if ( !aa1.toDomainArchitectureString( "~", 3, "" ).equals( "a~a" ) ) { return false; } - if ( !p.contains( domains_ids, true ) ) { + aa1.addProteinDomain( new BasicDomain( "a", 20, 30, ( short ) 1, ( short ) 5, 0.1, -12 ) ); + if ( !aa1.toDomainArchitectureString( "~" ).equals( "a~a~a" ) ) { return false; } - domains_ids = new ArrayList(); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "D" ) ); - domains_ids.add( new DomainId( "C" ) ); - if ( !p.contains( domains_ids, false ) ) { + if ( !aa1.toDomainArchitectureString( "~", 3, "" ).equals( "aaa" ) ) { return false; } - if ( p.contains( domains_ids, true ) ) { + if ( !aa1.toDomainArchitectureString( "~", 4, "" ).equals( "a~a~a" ) ) { return false; } - domains_ids = new ArrayList(); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "B" ) ); - if ( !p.contains( domains_ids, false ) ) { + aa1.addProteinDomain( new BasicDomain( "a", 30, 40, ( short ) 1, ( short ) 5, 0.1, -12 ) ); + if ( !aa1.toDomainArchitectureString( "~" ).equals( "a~a~a~a" ) ) { return false; } - if ( !p.contains( domains_ids, true ) ) { + if ( !aa1.toDomainArchitectureString( "~", 3, "" ).equals( "aaa" ) ) { return false; } - domains_ids = new ArrayList(); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "B" ) ); - domains_ids.add( new DomainId( "B" ) ); - if ( !p.contains( domains_ids, false ) ) { + if ( !aa1.toDomainArchitectureString( "~", 4, "" ).equals( "aaa" ) ) { return false; } - if ( !p.contains( domains_ids, true ) ) { + if ( !aa1.toDomainArchitectureString( "~", 5, "" ).equals( "a~a~a~a" ) ) { return false; } - domains_ids = new ArrayList(); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "B" ) ); - domains_ids.add( new DomainId( "B" ) ); - if ( !p.contains( domains_ids, false ) ) { + aa1.addProteinDomain( new BasicDomain( "b", 32, 40, ( short ) 1, ( short ) 5, 0.1, -12 ) ); + if ( !aa1.toDomainArchitectureString( "~" ).equals( "a~a~a~a~b" ) ) { return false; } - if ( !p.contains( domains_ids, true ) ) { + if ( !aa1.toDomainArchitectureString( "~", 3, "" ).equals( "aaa~b" ) ) { return false; } - domains_ids = new ArrayList(); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "B" ) ); - domains_ids.add( new DomainId( "B" ) ); - if ( !p.contains( domains_ids, false ) ) { + if ( !aa1.toDomainArchitectureString( "~", 4, "" ).equals( "aaa~b" ) ) { return false; } - if ( p.contains( domains_ids, true ) ) { + if ( !aa1.toDomainArchitectureString( "~", 5, "" ).equals( "a~a~a~a~b" ) ) { return false; } - domains_ids = new ArrayList(); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "B" ) ); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "B" ) ); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "B" ) ); - domains_ids.add( new DomainId( "B" ) ); - domains_ids.add( new DomainId( "C" ) ); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "C" ) ); - domains_ids.add( new DomainId( "D" ) ); - if ( !p.contains( domains_ids, false ) ) { + aa1.addProteinDomain( new BasicDomain( "c", 1, 2, ( short ) 1, ( short ) 5, 0.1, -12 ) ); + if ( !aa1.toDomainArchitectureString( "~" ).equals( "c~a~a~a~a~b" ) ) { return false; } - if ( !p.contains( domains_ids, true ) ) { + if ( !aa1.toDomainArchitectureString( "~", 3, "" ).equals( "c~aaa~b" ) ) { return false; } - domains_ids = new ArrayList(); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "B" ) ); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "B" ) ); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "B" ) ); - domains_ids.add( new DomainId( "B" ) ); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "C" ) ); - domains_ids.add( new DomainId( "D" ) ); - if ( !p.contains( domains_ids, false ) ) { + if ( !aa1.toDomainArchitectureString( "~", 4, "" ).equals( "c~aaa~b" ) ) { return false; } - if ( !p.contains( domains_ids, true ) ) { + if ( !aa1.toDomainArchitectureString( "~", 5, "" ).equals( "c~a~a~a~a~b" ) ) { return false; } - domains_ids = new ArrayList(); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "B" ) ); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "B" ) ); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "B" ) ); - domains_ids.add( new DomainId( "B" ) ); - domains_ids.add( new DomainId( "C" ) ); - domains_ids.add( new DomainId( "C" ) ); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "C" ) ); - domains_ids.add( new DomainId( "D" ) ); + // + final BasicProtein p00 = new BasicProtein( "p0", "owl", 0 ); + final Domain a0 = new BasicDomain( "a", 1, 10, ( short ) 1, ( short ) 5, 0.1, -12 ); + final Domain b0 = new BasicDomain( "b", 11, 20, ( short ) 1, ( short ) 5, 0.1, -12 ); + final Domain c0 = new BasicDomain( "c", 9, 23, ( short ) 1, ( short ) 5, 0.1, -12 ); + final Domain d0 = new BasicDomain( "d", 15, 30, ( short ) 1, ( short ) 5, 0.1, -12 ); + final Domain e0 = new BasicDomain( "e", 60, 70, ( short ) 1, ( short ) 5, 0.1, -12 ); + final Domain e1 = new BasicDomain( "e", 61, 71, ( short ) 1, ( short ) 5, 0.1, -12 ); + final Domain e2 = new BasicDomain( "e", 62, 72, ( short ) 1, ( short ) 5, 0.1, -12 ); + final Domain e3 = new BasicDomain( "e", 63, 73, ( short ) 1, ( short ) 5, 0.1, -12 ); + final Domain e4 = new BasicDomain( "e", 64, 74, ( short ) 1, ( short ) 5, 0.1, -12 ); + final Domain e5 = new BasicDomain( "e", 65, 75, ( short ) 1, ( short ) 5, 0.1, -12 ); + final Domain x0 = new BasicDomain( "x", 100, 110, ( short ) 1, ( short ) 5, 0.1, -12 ); + final Domain y0 = new BasicDomain( "y", 100, 110, ( short ) 1, ( short ) 5, 0.1, -12 ); + final Domain y1 = new BasicDomain( "y", 120, 130, ( short ) 1, ( short ) 5, 0.1, -12 ); + final Domain y2 = new BasicDomain( "y", 140, 150, ( short ) 1, ( short ) 5, 0.1, -12 ); + final Domain y3 = new BasicDomain( "y", 160, 170, ( short ) 1, ( short ) 5, 0.1, -12 ); + final Domain z0 = new BasicDomain( "z", 200, 210, ( short ) 1, ( short ) 5, 0.1, -12 ); + final Domain z1 = new BasicDomain( "z", 300, 310, ( short ) 1, ( short ) 5, 0.1, -12 ); + final Domain z2 = new BasicDomain( "z", 400, 410, ( short ) 1, ( short ) 5, 0.1, -12 ); + final Domain zz0 = new BasicDomain( "Z", 500, 510, ( short ) 1, ( short ) 5, 0.1, -12 ); + final Domain zz1 = new BasicDomain( "Z", 600, 610, ( short ) 1, ( short ) 5, 0.1, -12 ); + p00.addProteinDomain( y0 ); + p00.addProteinDomain( e0 ); + p00.addProteinDomain( b0 ); + p00.addProteinDomain( c0 ); + p00.addProteinDomain( d0 ); + p00.addProteinDomain( a0 ); + p00.addProteinDomain( x0 ); + p00.addProteinDomain( y1 ); + p00.addProteinDomain( y2 ); + p00.addProteinDomain( y3 ); + p00.addProteinDomain( e1 ); + p00.addProteinDomain( e2 ); + p00.addProteinDomain( e3 ); + p00.addProteinDomain( e4 ); + p00.addProteinDomain( e5 ); + p00.addProteinDomain( z0 ); + p00.addProteinDomain( z1 ); + p00.addProteinDomain( z2 ); + p00.addProteinDomain( zz0 ); + p00.addProteinDomain( zz1 ); + if ( !p00.toDomainArchitectureString( "~", 3, "" ).equals( "a~b~c~d~eee~x~yyy~zzz~Z~Z" ) ) { + return false; + } + if ( !p00.toDomainArchitectureString( "~", 4, "" ).equals( "a~b~c~d~eee~x~yyy~z~z~z~Z~Z" ) ) { + return false; + } + if ( !p00.toDomainArchitectureString( "~", 5, "" ).equals( "a~b~c~d~eee~x~y~y~y~y~z~z~z~Z~Z" ) ) { + return false; + } + if ( !p00.toDomainArchitectureString( "~", 6, "" ).equals( "a~b~c~d~eee~x~y~y~y~y~z~z~z~Z~Z" ) ) { + return false; + } + if ( !p00.toDomainArchitectureString( "~", 7, "" ).equals( "a~b~c~d~e~e~e~e~e~e~x~y~y~y~y~z~z~z~Z~Z" ) ) { + return false; + } + // A0 A10 B15 A20 B25 A30 B35 B40 C50 A60 C70 D80 + final Domain A0 = new BasicDomain( "A", 0, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); + final Domain A10 = new BasicDomain( "A", 10, 11, ( short ) 1, ( short ) 4, 0.1, -12 ); + final Domain B15 = new BasicDomain( "B", 11, 16, ( short ) 1, ( short ) 4, 0.1, -12 ); + final Domain A20 = new BasicDomain( "A", 20, 100, ( short ) 1, ( short ) 4, 0.1, -12 ); + final Domain B25 = new BasicDomain( "B", 25, 26, ( short ) 1, ( short ) 4, 0.1, -12 ); + final Domain A30 = new BasicDomain( "A", 30, 31, ( short ) 1, ( short ) 4, 0.1, -12 ); + final Domain B35 = new BasicDomain( "B", 31, 40, ( short ) 1, ( short ) 4, 0.1, -12 ); + final Domain B40 = new BasicDomain( "B", 40, 600, ( short ) 1, ( short ) 4, 0.1, -12 ); + final Domain C50 = new BasicDomain( "C", 50, 59, ( short ) 1, ( short ) 4, 0.1, -12 ); + final Domain A60 = new BasicDomain( "A", 60, 395, ( short ) 1, ( short ) 4, 0.1, -12 ); + final Domain C70 = new BasicDomain( "C", 70, 71, ( short ) 1, ( short ) 4, 0.1, -12 ); + final Domain D80 = new BasicDomain( "D", 80, 81, ( short ) 1, ( short ) 4, 0.1, -12 ); + final BasicProtein p = new BasicProtein( "p", "owl", 0 ); + p.addProteinDomain( B15 ); + p.addProteinDomain( C50 ); + p.addProteinDomain( A60 ); + p.addProteinDomain( A30 ); + p.addProteinDomain( C70 ); + p.addProteinDomain( B35 ); + p.addProteinDomain( B40 ); + p.addProteinDomain( A0 ); + p.addProteinDomain( A10 ); + p.addProteinDomain( A20 ); + p.addProteinDomain( B25 ); + p.addProteinDomain( D80 ); + List domains_ids = new ArrayList(); + domains_ids.add( "A" ); + domains_ids.add( "B" ); + domains_ids.add( "C" ); if ( !p.contains( domains_ids, false ) ) { return false; } - if ( p.contains( domains_ids, true ) ) { + if ( !p.contains( domains_ids, true ) ) { return false; } - domains_ids = new ArrayList(); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "B" ) ); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "B" ) ); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "B" ) ); - domains_ids.add( new DomainId( "B" ) ); - domains_ids.add( new DomainId( "C" ) ); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "C" ) ); - domains_ids.add( new DomainId( "D" ) ); - if ( !p.contains( domains_ids, false ) ) { + domains_ids.add( "X" ); + if ( p.contains( domains_ids, false ) ) { return false; } if ( p.contains( domains_ids, true ) ) { return false; } - domains_ids = new ArrayList(); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "B" ) ); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "B" ) ); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "B" ) ); - domains_ids.add( new DomainId( "B" ) ); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "D" ) ); + domains_ids = new ArrayList(); + domains_ids.add( "A" ); + domains_ids.add( "C" ); + domains_ids.add( "D" ); if ( !p.contains( domains_ids, false ) ) { return false; } if ( !p.contains( domains_ids, true ) ) { return false; } - domains_ids = new ArrayList(); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "B" ) ); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "B" ) ); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "B" ) ); - domains_ids.add( new DomainId( "B" ) ); - domains_ids.add( new DomainId( "C" ) ); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "C" ) ); - domains_ids.add( new DomainId( "D" ) ); - domains_ids.add( new DomainId( "X" ) ); - if ( p.contains( domains_ids, false ) ) { + domains_ids = new ArrayList(); + domains_ids.add( "A" ); + domains_ids.add( "D" ); + domains_ids.add( "C" ); + if ( !p.contains( domains_ids, false ) ) { return false; } if ( p.contains( domains_ids, true ) ) { return false; } - domains_ids = new ArrayList(); - domains_ids.add( new DomainId( "X" ) ); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "B" ) ); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "B" ) ); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "B" ) ); - domains_ids.add( new DomainId( "B" ) ); - domains_ids.add( new DomainId( "C" ) ); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "C" ) ); - domains_ids.add( new DomainId( "D" ) ); - if ( p.contains( domains_ids, false ) ) { + domains_ids = new ArrayList(); + domains_ids.add( "A" ); + domains_ids.add( "A" ); + domains_ids.add( "B" ); + if ( !p.contains( domains_ids, false ) ) { return false; } - if ( p.contains( domains_ids, true ) ) { + if ( !p.contains( domains_ids, true ) ) { return false; } - domains_ids = new ArrayList(); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "B" ) ); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "B" ) ); - domains_ids.add( new DomainId( "B" ) ); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "B" ) ); - domains_ids.add( new DomainId( "C" ) ); - domains_ids.add( new DomainId( "A" ) ); - domains_ids.add( new DomainId( "C" ) ); - domains_ids.add( new DomainId( "D" ) ); + domains_ids = new ArrayList(); + domains_ids.add( "A" ); + domains_ids.add( "A" ); + domains_ids.add( "A" ); + domains_ids.add( "B" ); + domains_ids.add( "B" ); + if ( !p.contains( domains_ids, false ) ) { + return false; + } + if ( !p.contains( domains_ids, true ) ) { + return false; + } + domains_ids = new ArrayList(); + domains_ids.add( "A" ); + domains_ids.add( "A" ); + domains_ids.add( "B" ); + domains_ids.add( "A" ); + domains_ids.add( "B" ); + domains_ids.add( "B" ); + domains_ids.add( "A" ); + domains_ids.add( "B" ); + domains_ids.add( "C" ); + domains_ids.add( "A" ); + domains_ids.add( "C" ); + domains_ids.add( "D" ); if ( !p.contains( domains_ids, false ) ) { return false; } @@ -2487,46 +2503,56 @@ public final class Test { try { final PhylogenyNode n = new PhylogenyNode(); n.setName( "tr|B3RJ64" ); - if ( !AptxUtil.createUriForSeqWeb( n, null, null ).equals( ForesterUtil.UNIPROT_KB + "B3RJ64" ) ) { + if ( !TreePanelUtil.createUriForSeqWeb( n, null, null ).equals( ForesterUtil.UNIPROT_KB + "B3RJ64" ) ) { return false; } n.setName( "B0LM41_HUMAN" ); - if ( !AptxUtil.createUriForSeqWeb( n, null, null ).equals( ForesterUtil.UNIPROT_KB + "B0LM41_HUMAN" ) ) { + if ( !TreePanelUtil.createUriForSeqWeb( n, null, null ).equals( ForesterUtil.UNIPROT_KB + "B0LM41_HUMAN" ) ) { return false; } n.setName( "NP_001025424" ); - if ( !AptxUtil.createUriForSeqWeb( n, null, null ).equals( ForesterUtil.NCBI_PROTEIN + "NP_001025424" ) ) { + if ( !TreePanelUtil.createUriForSeqWeb( n, null, null ).equals( ForesterUtil.NCBI_PROTEIN + "NP_001025424" ) ) { return false; } n.setName( "_NM_001030253-" ); - if ( !AptxUtil.createUriForSeqWeb( n, null, null ).equals( ForesterUtil.NCBI_NUCCORE + "NM_001030253" ) ) { + if ( !TreePanelUtil.createUriForSeqWeb( n, null, null ).equals( ForesterUtil.NCBI_NUCCORE + "NM_001030253" ) ) { return false; } n.setName( "XM_002122186" ); - if ( !AptxUtil.createUriForSeqWeb( n, null, null ).equals( ForesterUtil.NCBI_NUCCORE + "XM_002122186" ) ) { + if ( !TreePanelUtil.createUriForSeqWeb( n, null, null ).equals( ForesterUtil.NCBI_NUCCORE + "XM_002122186" ) ) { return false; } n.setName( "dgh_AAA34956_gdg" ); - if ( !AptxUtil.createUriForSeqWeb( n, null, null ).equals( ForesterUtil.NCBI_PROTEIN + "AAA34956" ) ) { + if ( !TreePanelUtil.createUriForSeqWeb( n, null, null ).equals( ForesterUtil.NCBI_PROTEIN + "AAA34956" ) ) { return false; } - n.setName( "j40f4_Q06891.1_fndn2 fnr3" ); - if ( !AptxUtil.createUriForSeqWeb( n, null, null ).equals( ForesterUtil.NCBI_PROTEIN + "Q06891.1" ) ) { + n.setName( "AAA34956" ); + if ( !TreePanelUtil.createUriForSeqWeb( n, null, null ).equals( ForesterUtil.NCBI_PROTEIN + "AAA34956" ) ) { return false; } n.setName( "GI:394892" ); - if ( !AptxUtil.createUriForSeqWeb( n, null, null ).equals( ForesterUtil.NCBI_GI + "394892" ) ) { - System.out.println( AptxUtil.createUriForSeqWeb( n, null, null ) ); + if ( !TreePanelUtil.createUriForSeqWeb( n, null, null ).equals( ForesterUtil.NCBI_GI + "394892" ) ) { + System.out.println( TreePanelUtil.createUriForSeqWeb( n, null, null ) ); return false; } n.setName( "gi_394892" ); - if ( !AptxUtil.createUriForSeqWeb( n, null, null ).equals( ForesterUtil.NCBI_GI + "394892" ) ) { - System.out.println( AptxUtil.createUriForSeqWeb( n, null, null ) ); + if ( !TreePanelUtil.createUriForSeqWeb( n, null, null ).equals( ForesterUtil.NCBI_GI + "394892" ) ) { + System.out.println( TreePanelUtil.createUriForSeqWeb( n, null, null ) ); return false; } n.setName( "gi6335_gi_394892_56635_Gi_43" ); - if ( !AptxUtil.createUriForSeqWeb( n, null, null ).equals( ForesterUtil.NCBI_GI + "394892" ) ) { - System.out.println( AptxUtil.createUriForSeqWeb( n, null, null ) ); + if ( !TreePanelUtil.createUriForSeqWeb( n, null, null ).equals( ForesterUtil.NCBI_GI + "394892" ) ) { + System.out.println( TreePanelUtil.createUriForSeqWeb( n, null, null ) ); + return false; + } + n.setName( "P12345" ); + if ( !TreePanelUtil.createUriForSeqWeb( n, null, null ).equals( ForesterUtil.UNIPROT_KB + "P12345" ) ) { + System.out.println( TreePanelUtil.createUriForSeqWeb( n, null, null ) ); + return false; + } + n.setName( "gi_fdgjmn-3jk5-243 mnefmn fg023-0 P12345 4395jtmnsrg02345m1ggi92450jrg890j4t0j240" ); + if ( !TreePanelUtil.createUriForSeqWeb( n, null, null ).equals( ForesterUtil.UNIPROT_KB + "P12345" ) ) { + System.out.println( TreePanelUtil.createUriForSeqWeb( n, null, null ) ); return false; } } @@ -3330,114 +3356,51 @@ public final class Test { return true; } - private static boolean testDomainId() { - try { - final DomainId id1 = new DomainId( "a" ); - final DomainId id2 = new DomainId( "a" ); - final DomainId id3 = new DomainId( "A" ); - final DomainId id4 = new DomainId( "b" ); - if ( !id1.equals( id1 ) ) { - return false; - } - if ( id1.getId().equals( "x" ) ) { - return false; - } - if ( id1.getId().equals( null ) ) { - return false; - } - if ( !id1.equals( id2 ) ) { - return false; - } - if ( id1.equals( id3 ) ) { - return false; - } - if ( id1.hashCode() != id1.hashCode() ) { - return false; - } - if ( id1.hashCode() != id2.hashCode() ) { - return false; - } - if ( id1.hashCode() == id3.hashCode() ) { - return false; - } - if ( id1.compareTo( id1 ) != 0 ) { - return false; - } - if ( id1.compareTo( id2 ) != 0 ) { - return false; - } - if ( id1.compareTo( id3 ) != 0 ) { - return false; - } - if ( id1.compareTo( id4 ) >= 0 ) { - return false; - } - if ( id4.compareTo( id1 ) <= 0 ) { - return false; - } - if ( !id4.getId().equals( "b" ) ) { - return false; - } - final DomainId id5 = new DomainId( " C " ); - if ( !id5.getId().equals( "C" ) ) { - return false; - } - if ( id5.equals( id1 ) ) { - return false; - } - } - catch ( final Exception e ) { - e.printStackTrace( System.out ); - return false; - } - return true; - } - private static boolean testEmblEntryRetrieval() { //The format for GenBank Accession numbers are: //Nucleotide: 1 letter + 5 numerals OR 2 letters + 6 numerals //Protein: 3 letters + 5 numerals //http://www.ncbi.nlm.nih.gov/Sequin/acc.html - if ( !SequenceIdParser.parseGenbankAccessor( "AY423861" ).equals( "AY423861" ) ) { + if ( !SequenceAccessionTools.parseGenbankAccessorFromString( "AY423861" ).equals( "AY423861" ) ) { return false; } - if ( !SequenceIdParser.parseGenbankAccessor( ".AY423861.2" ).equals( "AY423861.2" ) ) { + if ( !SequenceAccessionTools.parseGenbankAccessorFromString( ".AY423861.2" ).equals( "AY423861.2" ) ) { return false; } - if ( !SequenceIdParser.parseGenbankAccessor( "345_.AY423861.24_345" ).equals( "AY423861.24" ) ) { + if ( !SequenceAccessionTools.parseGenbankAccessorFromString( "345_.AY423861.24_345" ).equals( "AY423861.24" ) ) { return false; } - if ( SequenceIdParser.parseGenbankAccessor( "AAY423861" ) != null ) { + if ( SequenceAccessionTools.parseGenbankAccessorFromString( "AAY423861" ) != null ) { return false; } - if ( SequenceIdParser.parseGenbankAccessor( "AY4238612" ) != null ) { + if ( SequenceAccessionTools.parseGenbankAccessorFromString( "AY4238612" ) != null ) { return false; } - if ( SequenceIdParser.parseGenbankAccessor( "AAY4238612" ) != null ) { + if ( SequenceAccessionTools.parseGenbankAccessorFromString( "AAY4238612" ) != null ) { return false; } - if ( SequenceIdParser.parseGenbankAccessor( "Y423861" ) != null ) { + if ( SequenceAccessionTools.parseGenbankAccessorFromString( "Y423861" ) != null ) { return false; } - if ( !SequenceIdParser.parseGenbankAccessor( "S12345" ).equals( "S12345" ) ) { + if ( !SequenceAccessionTools.parseGenbankAccessorFromString( "S12345" ).equals( "S12345" ) ) { return false; } - if ( !SequenceIdParser.parseGenbankAccessor( "|S12345|" ).equals( "S12345" ) ) { + if ( !SequenceAccessionTools.parseGenbankAccessorFromString( "|S12345|" ).equals( "S12345" ) ) { return false; } - if ( SequenceIdParser.parseGenbankAccessor( "|S123456" ) != null ) { + if ( SequenceAccessionTools.parseGenbankAccessorFromString( "|S123456" ) != null ) { return false; } - if ( SequenceIdParser.parseGenbankAccessor( "ABC123456" ) != null ) { + if ( SequenceAccessionTools.parseGenbankAccessorFromString( "ABC123456" ) != null ) { return false; } - if ( !SequenceIdParser.parseGenbankAccessor( "ABC12345" ).equals( "ABC12345" ) ) { + if ( !SequenceAccessionTools.parseGenbankAccessorFromString( "ABC12345" ).equals( "ABC12345" ) ) { return false; } - if ( !SequenceIdParser.parseGenbankAccessor( "&ABC12345&" ).equals( "ABC12345" ) ) { + if ( !SequenceAccessionTools.parseGenbankAccessorFromString( "&ABC12345&" ).equals( "ABC12345" ) ) { return false; } - if ( SequenceIdParser.parseGenbankAccessor( "ABCD12345" ) != null ) { + if ( SequenceAccessionTools.parseGenbankAccessorFromString( "ABCD12345" ) != null ) { return false; } return true; @@ -3731,166 +3694,166 @@ public final class Test { try { PhylogenyNode n = new PhylogenyNode(); n.setName( "tr|B3RJ64" ); - if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "B3RJ64" ) ) { + if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "B3RJ64" ) ) { return false; } n.setName( "tr.B3RJ64" ); - if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "B3RJ64" ) ) { + if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "B3RJ64" ) ) { return false; } n.setName( "tr=B3RJ64" ); - if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "B3RJ64" ) ) { + if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "B3RJ64" ) ) { return false; } n.setName( "tr-B3RJ64" ); - if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "B3RJ64" ) ) { + if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "B3RJ64" ) ) { return false; } n.setName( "tr/B3RJ64" ); - if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "B3RJ64" ) ) { + if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "B3RJ64" ) ) { return false; } n.setName( "tr\\B3RJ64" ); - if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "B3RJ64" ) ) { + if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "B3RJ64" ) ) { return false; } n.setName( "tr_B3RJ64" ); - if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "B3RJ64" ) ) { + if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "B3RJ64" ) ) { return false; } n.setName( " tr|B3RJ64 " ); - if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "B3RJ64" ) ) { + if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "B3RJ64" ) ) { return false; } n.setName( "-tr|B3RJ64-" ); - if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "B3RJ64" ) ) { + if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "B3RJ64" ) ) { return false; } n.setName( "-tr=B3RJ64-" ); - if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "B3RJ64" ) ) { + if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "B3RJ64" ) ) { return false; } n.setName( "_tr=B3RJ64_" ); - if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "B3RJ64" ) ) { + if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "B3RJ64" ) ) { return false; } n.setName( " tr_tr|B3RJ64_sp|123 " ); - if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "B3RJ64" ) ) { + if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "B3RJ64" ) ) { return false; } - n.setName( "sp|B3RJ64" ); - if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "B3RJ64" ) ) { + n.setName( "B3RJ64" ); + if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "B3RJ64" ) ) { return false; } - n.setName( "ssp|B3RJ64" ); - if ( ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ) != null ) { + n.setName( "sp|B3RJ64" ); + if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "B3RJ64" ) ) { return false; } n.setName( "sp|B3RJ64C" ); - if ( ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ) != null ) { + if ( SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ) != null ) { return false; } n.setName( "sp B3RJ64" ); - if ( ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ) != null ) { + if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "B3RJ64" ) ) { return false; } n.setName( "sp|B3RJ6X" ); - if ( ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ) != null ) { + if ( SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ) != null ) { return false; } n.setName( "sp|B3RJ6" ); - if ( ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ) != null ) { + if ( SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ) != null ) { return false; } n.setName( "K1PYK7_CRAGI" ); - if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "K1PYK7_CRAGI" ) ) { + if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "K1PYK7_CRAGI" ) ) { return false; } n.setName( "K1PYK7_PEA" ); - if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "K1PYK7_PEA" ) ) { + if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "K1PYK7_PEA" ) ) { return false; } n.setName( "K1PYK7_RAT" ); - if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "K1PYK7_RAT" ) ) { + if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "K1PYK7_RAT" ) ) { return false; } n.setName( "K1PYK7_PIG" ); - if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "K1PYK7_PIG" ) ) { + if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "K1PYK7_PIG" ) ) { return false; } n.setName( "~K1PYK7_PIG~" ); - if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "K1PYK7_PIG" ) ) { + if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "K1PYK7_PIG" ) ) { return false; } n.setName( "123456_ECOLI-K1PYK7_CRAGI-sp" ); - if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "K1PYK7_CRAGI" ) ) { + if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "K1PYK7_CRAGI" ) ) { return false; } n.setName( "K1PYKX_CRAGI" ); - if ( ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ) != null ) { + if ( SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ) != null ) { return false; } n.setName( "XXXXX_CRAGI" ); - if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "XXXXX_CRAGI" ) ) { + if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "XXXXX_CRAGI" ) ) { return false; } n.setName( "tr|H3IB65|H3IB65_STRPU~2-2" ); - if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "H3IB65" ) ) { + if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "H3IB65" ) ) { return false; } n.setName( "jgi|Lacbi2|181470|Lacbi1.estExt_GeneWisePlus_human.C_10729~2-3" ); - if ( ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ) != null ) { + if ( SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ) != null ) { return false; } n.setName( "sp|Q86U06|RBM23_HUMAN~2-2" ); - if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "Q86U06" ) ) { + if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "Q86U06" ) ) { return false; } n = new PhylogenyNode(); org.forester.phylogeny.data.Sequence seq = new org.forester.phylogeny.data.Sequence(); seq.setSymbol( "K1PYK7_CRAGI" ); n.getNodeData().addSequence( seq ); - if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "K1PYK7_CRAGI" ) ) { + if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "K1PYK7_CRAGI" ) ) { return false; } seq.setSymbol( "tr|B3RJ64" ); - if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "B3RJ64" ) ) { + if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "B3RJ64" ) ) { return false; } n = new PhylogenyNode(); seq = new org.forester.phylogeny.data.Sequence(); seq.setName( "K1PYK7_CRAGI" ); n.getNodeData().addSequence( seq ); - if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "K1PYK7_CRAGI" ) ) { + if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "K1PYK7_CRAGI" ) ) { return false; } seq.setName( "tr|B3RJ64" ); - if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "B3RJ64" ) ) { + if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "B3RJ64" ) ) { return false; } n = new PhylogenyNode(); seq = new org.forester.phylogeny.data.Sequence(); seq.setAccession( new Accession( "K1PYK8_CRAGI", "?" ) ); n.getNodeData().addSequence( seq ); - if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "K1PYK8_CRAGI" ) ) { + if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "K1PYK8_CRAGI" ) ) { return false; } n = new PhylogenyNode(); seq = new org.forester.phylogeny.data.Sequence(); seq.setAccession( new Accession( "tr|B3RJ64", "?" ) ); n.getNodeData().addSequence( seq ); - if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "B3RJ64" ) ) { + if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "B3RJ64" ) ) { return false; } // n = new PhylogenyNode(); n.setName( "ACP19736" ); - if ( !ForesterUtil.extractGenbankAccessor( n ).equals( "ACP19736" ) ) { + if ( !SequenceAccessionTools.obtainGenbankAccessorFromDataFields( n ).equals( "ACP19736" ) ) { return false; } n = new PhylogenyNode(); - n.setName( "_ACP19736_" ); - if ( !ForesterUtil.extractGenbankAccessor( n ).equals( "ACP19736" ) ) { + n.setName( "|ACP19736|" ); + if ( !SequenceAccessionTools.obtainGenbankAccessorFromDataFields( n ).equals( "ACP19736" ) ) { return false; } } @@ -9689,123 +9652,111 @@ public final class Test { private static boolean testSequenceIdParsing() { try { - Identifier id = SequenceIdParser.parse( "gb_ADF31344_segmented_worms_" ); - if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getProvider() ) - || !id.getValue().equals( "ADF31344" ) || !id.getProvider().equals( "ncbi" ) ) { + Accession id = SequenceAccessionTools.parseAccessorFromString( "gb_ADF31344_segmented_worms_" ); + if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getSource() ) + || !id.getValue().equals( "ADF31344" ) || !id.getSource().equals( "ncbi" ) ) { if ( id != null ) { System.out.println( "value =" + id.getValue() ); - System.out.println( "provider=" + id.getProvider() ); + System.out.println( "provider=" + id.getSource() ); } return false; } // - id = SequenceIdParser.parse( "segmented worms|gb_ADF31344" ); - if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getProvider() ) - || !id.getValue().equals( "ADF31344" ) || !id.getProvider().equals( "ncbi" ) ) { + id = SequenceAccessionTools.parseAccessorFromString( "segmented worms|gb_ADF31344" ); + if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getSource() ) + || !id.getValue().equals( "ADF31344" ) || !id.getSource().equals( "ncbi" ) ) { if ( id != null ) { System.out.println( "value =" + id.getValue() ); - System.out.println( "provider=" + id.getProvider() ); + System.out.println( "provider=" + id.getSource() ); } return false; } // - id = SequenceIdParser.parse( "segmented worms gb_ADF31344 and more" ); - if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getProvider() ) - || !id.getValue().equals( "ADF31344" ) || !id.getProvider().equals( "ncbi" ) ) { - if ( id != null ) { - System.out.println( "value =" + id.getValue() ); - System.out.println( "provider=" + id.getProvider() ); - } - return false; - } - // - id = SequenceIdParser.parse( "gb_AAA96518_1" ); - if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getProvider() ) - || !id.getValue().equals( "AAA96518" ) || !id.getProvider().equals( "ncbi" ) ) { + id = SequenceAccessionTools.parseAccessorFromString( "segmented worms gb_ADF31344 and more" ); + if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getSource() ) + || !id.getValue().equals( "ADF31344" ) || !id.getSource().equals( "ncbi" ) ) { if ( id != null ) { System.out.println( "value =" + id.getValue() ); - System.out.println( "provider=" + id.getProvider() ); + System.out.println( "provider=" + id.getSource() ); } return false; } // - id = SequenceIdParser.parse( "gb_EHB07727_1_rodents_" ); - if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getProvider() ) - || !id.getValue().equals( "EHB07727" ) || !id.getProvider().equals( "ncbi" ) ) { + id = SequenceAccessionTools.parseAccessorFromString( "gb_AAA96518_1" ); + if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getSource() ) + || !id.getValue().equals( "AAA96518" ) || !id.getSource().equals( "ncbi" ) ) { if ( id != null ) { System.out.println( "value =" + id.getValue() ); - System.out.println( "provider=" + id.getProvider() ); + System.out.println( "provider=" + id.getSource() ); } return false; } // - id = SequenceIdParser.parse( "dbj_BAF37827_1_turtles_" ); - if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getProvider() ) - || !id.getValue().equals( "BAF37827" ) || !id.getProvider().equals( "ncbi" ) ) { + id = SequenceAccessionTools.parseAccessorFromString( "gb_EHB07727_1_rodents_" ); + if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getSource() ) + || !id.getValue().equals( "EHB07727" ) || !id.getSource().equals( "ncbi" ) ) { if ( id != null ) { System.out.println( "value =" + id.getValue() ); - System.out.println( "provider=" + id.getProvider() ); + System.out.println( "provider=" + id.getSource() ); } return false; } // - id = SequenceIdParser.parse( "emb_CAA73223_1_primates_" ); - if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getProvider() ) - || !id.getValue().equals( "CAA73223" ) || !id.getProvider().equals( "ncbi" ) ) { + id = SequenceAccessionTools.parseAccessorFromString( "dbj_BAF37827_1_turtles_" ); + if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getSource() ) + || !id.getValue().equals( "BAF37827" ) || !id.getSource().equals( "ncbi" ) ) { if ( id != null ) { System.out.println( "value =" + id.getValue() ); - System.out.println( "provider=" + id.getProvider() ); + System.out.println( "provider=" + id.getSource() ); } return false; } // - id = SequenceIdParser.parse( "mites|ref_XP_002434188_1" ); - if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getProvider() ) - || !id.getValue().equals( "XP_002434188" ) || !id.getProvider().equals( "refseq" ) ) { + id = SequenceAccessionTools.parseAccessorFromString( "emb_CAA73223_1_primates_" ); + if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getSource() ) + || !id.getValue().equals( "CAA73223" ) || !id.getSource().equals( "ncbi" ) ) { if ( id != null ) { System.out.println( "value =" + id.getValue() ); - System.out.println( "provider=" + id.getProvider() ); + System.out.println( "provider=" + id.getSource() ); } return false; } // - id = SequenceIdParser.parse( "mites_ref_XP_002434188_1_bla_XP_12345" ); - if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getProvider() ) - || !id.getValue().equals( "XP_002434188" ) || !id.getProvider().equals( "refseq" ) ) { + id = SequenceAccessionTools.parseAccessorFromString( "mites|ref_XP_002434188_1" ); + if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getSource() ) + || !id.getValue().equals( "XP_002434188" ) || !id.getSource().equals( "refseq" ) ) { if ( id != null ) { System.out.println( "value =" + id.getValue() ); - System.out.println( "provider=" + id.getProvider() ); + System.out.println( "provider=" + id.getSource() ); } return false; } // - id = SequenceIdParser.parse( "P4A123" ); - if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getProvider() ) - || !id.getValue().equals( "P4A123" ) || !id.getProvider().equals( "sp" ) ) { + id = SequenceAccessionTools.parseAccessorFromString( "mites_ref_XP_002434188_1_bla_XP_12345" ); + if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getSource() ) + || !id.getValue().equals( "XP_002434188" ) || !id.getSource().equals( "refseq" ) ) { if ( id != null ) { System.out.println( "value =" + id.getValue() ); - System.out.println( "provider=" + id.getProvider() ); + System.out.println( "provider=" + id.getSource() ); } return false; } // - id = SequenceIdParser.parse( "pllf[pok P4A123_osdjfosnqo035-9233332904i000490 vf tmv x45" ); - if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getProvider() ) - || !id.getValue().equals( "P4A123" ) || !id.getProvider().equals( "sp" ) ) { + id = SequenceAccessionTools.parseAccessorFromString( "P4A123" ); + if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getSource() ) + || !id.getValue().equals( "P4A123" ) || !id.getSource().equals( "uniprot" ) ) { if ( id != null ) { System.out.println( "value =" + id.getValue() ); - System.out.println( "provider=" + id.getProvider() ); + System.out.println( "provider=" + id.getSource() ); } return false; } - // - id = SequenceIdParser.parse( "XP_12345" ); + id = SequenceAccessionTools.parseAccessorFromString( "XP_12345" ); if ( id != null ) { System.out.println( "value =" + id.getValue() ); - System.out.println( "provider=" + id.getProvider() ); + System.out.println( "provider=" + id.getSource() ); return false; } - // lcl_91970_unknown_ } catch ( final Exception e ) { e.printStackTrace( System.out ); @@ -10908,6 +10859,157 @@ public final class Test { return true; } + private static boolean testSequenceDbWsTools1() { + try { + PhylogenyNode n = new PhylogenyNode(); + n.setName( "NP_001025424" ); + Accession acc = SequenceDbWsTools.obtainSeqAccession( n ); + if ( acc == null || !acc.getSource().equals( Source.REFSEQ.toString() ) + || !acc.getValue().equals( "NP_001025424" ) ) { + return false; + } + n.setName( "340 0559 -- _NP_001025424_dsfdg15 05" ); + acc = SequenceDbWsTools.obtainSeqAccession( n ); + if ( acc == null || !acc.getSource().equals( Source.REFSEQ.toString() ) + || !acc.getValue().equals( "NP_001025424" ) ) { + return false; + } + n.setName( "NP_001025424.1" ); + acc = SequenceDbWsTools.obtainSeqAccession( n ); + if ( acc == null || !acc.getSource().equals( Source.REFSEQ.toString() ) + || !acc.getValue().equals( "NP_001025424" ) ) { + return false; + } + n.setName( "NM_001030253" ); + acc = SequenceDbWsTools.obtainSeqAccession( n ); + if ( acc == null || !acc.getSource().equals( Source.REFSEQ.toString() ) + || !acc.getValue().equals( "NM_001030253" ) ) { + return false; + } + n.setName( "BCL2_HUMAN" ); + acc = SequenceDbWsTools.obtainSeqAccession( n ); + if ( acc == null || !acc.getSource().equals( Source.UNIPROT.toString() ) + || !acc.getValue().equals( "BCL2_HUMAN" ) ) { + System.out.println( acc.toString() ); + return false; + } + n.setName( "P10415" ); + acc = SequenceDbWsTools.obtainSeqAccession( n ); + if ( acc == null || !acc.getSource().equals( Source.UNIPROT.toString() ) + || !acc.getValue().equals( "P10415" ) ) { + System.out.println( acc.toString() ); + return false; + } + n.setName( " P10415 " ); + acc = SequenceDbWsTools.obtainSeqAccession( n ); + if ( acc == null || !acc.getSource().equals( Source.UNIPROT.toString() ) + || !acc.getValue().equals( "P10415" ) ) { + System.out.println( acc.toString() ); + return false; + } + n.setName( "_P10415|" ); + acc = SequenceDbWsTools.obtainSeqAccession( n ); + if ( acc == null || !acc.getSource().equals( Source.UNIPROT.toString() ) + || !acc.getValue().equals( "P10415" ) ) { + System.out.println( acc.toString() ); + return false; + } + n.setName( "AY695820" ); + acc = SequenceDbWsTools.obtainSeqAccession( n ); + if ( acc == null || !acc.getSource().equals( Source.NCBI.toString() ) + || !acc.getValue().equals( "AY695820" ) ) { + System.out.println( acc.toString() ); + return false; + } + n.setName( "_AY695820_" ); + acc = SequenceDbWsTools.obtainSeqAccession( n ); + if ( acc == null || !acc.getSource().equals( Source.NCBI.toString() ) + || !acc.getValue().equals( "AY695820" ) ) { + System.out.println( acc.toString() ); + return false; + } + n.setName( "AAA59452" ); + acc = SequenceDbWsTools.obtainSeqAccession( n ); + if ( acc == null || !acc.getSource().equals( Source.NCBI.toString() ) + || !acc.getValue().equals( "AAA59452" ) ) { + System.out.println( acc.toString() ); + return false; + } + n.setName( "_AAA59452_" ); + acc = SequenceDbWsTools.obtainSeqAccession( n ); + if ( acc == null || !acc.getSource().equals( Source.NCBI.toString() ) + || !acc.getValue().equals( "AAA59452" ) ) { + System.out.println( acc.toString() ); + return false; + } + n.setName( "AAA59452.1" ); + acc = SequenceDbWsTools.obtainSeqAccession( n ); + if ( acc == null || !acc.getSource().equals( Source.NCBI.toString() ) + || !acc.getValue().equals( "AAA59452.1" ) ) { + System.out.println( acc.toString() ); + return false; + } + n.setName( "_AAA59452.1_" ); + acc = SequenceDbWsTools.obtainSeqAccession( n ); + if ( acc == null || !acc.getSource().equals( Source.NCBI.toString() ) + || !acc.getValue().equals( "AAA59452.1" ) ) { + System.out.println( acc.toString() ); + return false; + } + n.setName( "GI:94894583" ); + acc = SequenceDbWsTools.obtainSeqAccession( n ); + if ( acc == null || !acc.getSource().equals( Source.GI.toString() ) || !acc.getValue().equals( "94894583" ) ) { + System.out.println( acc.toString() ); + return false; + } + } + // catch ( final IOException e ) { + // System.out.println(); + // System.out.println( "the following might be due to absence internet connection:" ); + // e.printStackTrace( System.out ); + // return true; + // } + catch ( final Exception e ) { + return false; + } + return true; + } + + private static boolean testSequenceDbWsTools2() { + try { + PhylogenyNode n1 = new PhylogenyNode(); + n1.setName( "NP_001025424" ); + SequenceDbWsTools.obtainSeqInformation( false, 4000, new TreeSet(), n1 ); + if ( !n1.getNodeData().getSequence().getName().equals( "Bcl2" ) ) { + return false; + } + if ( !n1.getNodeData().getTaxonomy().getScientificName().equals( "Danio rerio" ) ) { + return false; + } + PhylogenyNode n2 = new PhylogenyNode(); + n2.setName( "NM_001030253" ); + SequenceDbWsTools.obtainSeqInformation( false, 4000, new TreeSet(), n2 ); + System.out.println( n2.toString() ); + if ( !n2.getNodeData().getSequence().getName() + .equals( "Danio rerio B-cell leukemia/lymphoma 2 (bcl2), mRNA" ) ) { + return false; + } + if ( !n2.getNodeData().getTaxonomy().getScientificName().equals( "Danio rerio" ) ) { + return false; + } + } + catch ( final IOException e ) { + System.out.println(); + System.out.println( "the following might be due to absence internet connection:" ); + e.printStackTrace( System.out ); + return true; + } + catch ( final Exception e ) { + return false; + } + return true; + } + private static boolean testUniprotEntryRetrieval() { try { final SequenceDatabaseEntry entry = SequenceDbWsTools.obtainUniProtEntry( "P12345", 200 ); @@ -10920,6 +11022,12 @@ public final class Test { if ( !entry.getSequenceName().equals( "Aspartate aminotransferase, mitochondrial" ) ) { return false; } + if ( !entry.getSequenceSymbol().equals( "mAspAT" ) ) { + return false; + } + if ( !entry.getGeneName().equals( "GOT2" ) ) { + return false; + } if ( !entry.getTaxonomyIdentifier().equals( "9986" ) ) { return false; } @@ -11029,6 +11137,84 @@ public final class Test { System.out.println( results.get( 0 ).getLineage() ); return false; } + // + results = null; + results = SequenceDbWsTools.getTaxonomiesFromScientificNameStrict( "Xenopus tropicalis", 10 ); + if ( results.size() != 1 ) { + return false; + } + if ( !results.get( 0 ).getCode().equals( "XENTR" ) ) { + return false; + } + if ( !results.get( 0 ).getCommonName().equalsIgnoreCase( "Western clawed frog" ) ) { + return false; + } + if ( !results.get( 0 ).getId().equalsIgnoreCase( "8364" ) ) { + return false; + } + if ( !results.get( 0 ).getRank().equalsIgnoreCase( "species" ) ) { + return false; + } + if ( !results.get( 0 ).getScientificName().equals( "Xenopus tropicalis" ) ) { + return false; + } + if ( !results.get( 0 ).getLineage().get( results.get( 0 ).getLineage().size() - 1 ) + .equals( "Xenopus tropicalis" ) ) { + System.out.println( results.get( 0 ).getLineage() ); + return false; + } + // + results = null; + results = SequenceDbWsTools.getTaxonomiesFromId( "8364", 10 ); + if ( results.size() != 1 ) { + return false; + } + if ( !results.get( 0 ).getCode().equals( "XENTR" ) ) { + return false; + } + if ( !results.get( 0 ).getCommonName().equalsIgnoreCase( "Western clawed frog" ) ) { + return false; + } + if ( !results.get( 0 ).getId().equalsIgnoreCase( "8364" ) ) { + return false; + } + if ( !results.get( 0 ).getRank().equalsIgnoreCase( "species" ) ) { + return false; + } + if ( !results.get( 0 ).getScientificName().equals( "Xenopus tropicalis" ) ) { + return false; + } + if ( !results.get( 0 ).getLineage().get( results.get( 0 ).getLineage().size() - 1 ) + .equals( "Xenopus tropicalis" ) ) { + System.out.println( results.get( 0 ).getLineage() ); + return false; + } + // + results = null; + results = SequenceDbWsTools.getTaxonomiesFromTaxonomyCode( "XENTR", 10 ); + if ( results.size() != 1 ) { + return false; + } + if ( !results.get( 0 ).getCode().equals( "XENTR" ) ) { + return false; + } + if ( !results.get( 0 ).getCommonName().equalsIgnoreCase( "Western clawed frog" ) ) { + return false; + } + if ( !results.get( 0 ).getId().equalsIgnoreCase( "8364" ) ) { + return false; + } + if ( !results.get( 0 ).getRank().equalsIgnoreCase( "species" ) ) { + return false; + } + if ( !results.get( 0 ).getScientificName().equals( "Xenopus tropicalis" ) ) { + return false; + } + if ( !results.get( 0 ).getLineage().get( results.get( 0 ).getLineage().size() - 1 ) + .equals( "Xenopus tropicalis" ) ) { + System.out.println( results.get( 0 ).getLineage() ); + return false; + } } catch ( final IOException e ) { System.out.println();