X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Ftest%2FTest.java;h=b1b35ee5e3415dd63ba869b82c943db35c2cdd2c;hb=813491b06a1cf8364202bfc803a4dcff8b89977c;hp=0fb8f1d0a8a8dad27138a5c4a7866a2624a1cf4e;hpb=ca77b890e4cf571db9a7d01e1de5427670078d2e;p=jalview.git diff --git a/forester/java/src/org/forester/test/Test.java b/forester/java/src/org/forester/test/Test.java index 0fb8f1d..b1b35ee 100644 --- a/forester/java/src/org/forester/test/Test.java +++ b/forester/java/src/org/forester/test/Test.java @@ -54,9 +54,11 @@ import org.forester.io.parsers.nhx.NHXParser; import org.forester.io.parsers.phyloxml.PhyloXmlParser; import org.forester.io.parsers.tol.TolParser; import org.forester.io.writers.PhylogenyWriter; +import org.forester.msa.BasicMsa; import org.forester.msa.Mafft; import org.forester.msa.Msa; import org.forester.msa.MsaInferrer; +import org.forester.msa.MsaMethods; import org.forester.pccx.TestPccx; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyBranch; @@ -81,6 +83,7 @@ import org.forester.phylogeny.data.Taxonomy; import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory; import org.forester.phylogeny.factories.PhylogenyFactory; import org.forester.phylogeny.iterators.PhylogenyNodeIterator; +import org.forester.protein.Protein; import org.forester.sdi.SDI; import org.forester.sdi.SDIR; import org.forester.sdi.SDIse; @@ -88,7 +91,6 @@ import org.forester.sdi.TaxonomyAssigner; import org.forester.sdi.TestGSDI; import org.forester.sequence.BasicSequence; import org.forester.sequence.Sequence; -import org.forester.surfacing.Protein; import org.forester.surfacing.TestSurfacing; import org.forester.tools.ConfidenceAssessor; import org.forester.tools.SupportCount; @@ -101,6 +103,7 @@ import org.forester.util.DescriptiveStatistics; import org.forester.util.ForesterConstants; import org.forester.util.ForesterUtil; import org.forester.util.GeneralTable; +import org.forester.util.SequenceIdParser; import org.forester.ws.uniprot.DatabaseTools; import org.forester.ws.uniprot.SequenceDatabaseEntry; import org.forester.ws.uniprot.UniProtTaxonomy; @@ -169,6 +172,19 @@ public final class Test { System.exit( -1 ); } final long start_time = new Date().getTime(); + + + + System.out.print( "Sequence id parsing: " ); + if ( testSequenceIdParsing() ) { + System.out.println( "OK." ); + succeeded++; + } + else { + System.out.println( "failed." ); + System.exit( -1 ); //TODO FIXME remove me!! ~ + failed++; + } System.out.print( "Hmmscan output parser: " ); if ( testHmmscanOutputParser() ) { System.out.println( "OK." ); @@ -704,6 +720,15 @@ public final class Test { System.out.println( "failed." ); failed++; } + System.out.print( "Simple MSA quality: " ); + if ( Test.testMsaQualityMethod() ) { + System.out.println( "OK." ); + succeeded++; + } + else { + System.out.println( "failed." ); + failed++; + } // System.out.print( "WABI TxSearch: " ); // if ( Test.testWabiTxSearch() ) { // System.out.println( "OK." ); @@ -2850,7 +2875,7 @@ public final class Test { dss3.addValue( 10 ); final AsciiHistogram histo = new AsciiHistogram( dss3 ); histo.toStringBuffer( 10, '=', 40, 5 ); - histo.toStringBuffer( 3, 8, 10, '=', 40, 5 ); + histo.toStringBuffer( 3, 8, 10, '=', 40, 5, null ); } catch ( final Exception e ) { e.printStackTrace( System.out ); @@ -3433,10 +3458,16 @@ public final class Test { if ( p1.getNumberOfProteinDomains() != 15 ) { return false; } + if ( p1.getLength() != 850 ) { + return false; + } final Protein p2 = proteins.get( 1 ); if ( p2.getNumberOfProteinDomains() != 51 ) { return false; } + if ( p2.getLength() != 1291 ) { + return false; + } final Protein p3 = proteins.get( 2 ); if ( p3.getNumberOfProteinDomains() != 2 ) { return false; @@ -4497,6 +4528,15 @@ public final class Test { if ( p53.getNode( "B (x (a' ,b) f(x);" ) == null ) { return false; } + // + final Phylogeny p54 = factory.create( new StringBuffer( "((A,B):[88],C)" ), new NHXParser() )[ 0 ]; + if ( p54.getNode( "A" ) == null ) { + return false; + } + if ( !p54.toNewHampshire( false, NH_CONVERSION_SUPPORT_VALUE_STYLE.IN_SQUARE_BRACKETS ) + .equals( "((A,B)[88],C);" ) ) { + return false; + } } catch ( final Exception e ) { e.printStackTrace( System.out ); @@ -4706,7 +4746,7 @@ public final class Test { if ( !b.getName().equals( "n10_ECOLI1/1-2" ) ) { return false; } - if ( !PhylogenyMethods.getSpecies( b ).equals( "ECOLI" ) ) { + if ( !PhylogenyMethods.getSpecies( b ).equals( "" ) ) { return false; } final PhylogenyNode c = PhylogenyNode @@ -4715,7 +4755,25 @@ public final class Test { if ( !c.getName().equals( "n10_RATAF12/1000-2000" ) ) { return false; } - if ( !PhylogenyMethods.getSpecies( c ).equals( "RATAF" ) ) { + if ( !PhylogenyMethods.getSpecies( c ).equals( "" ) ) { + return false; + } + final PhylogenyNode c1 = PhylogenyNode + .createInstanceFromNhxString( "n10_BOVIN_1/1000-2000", + PhylogenyMethods.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ); + if ( !c1.getName().equals( "n10_BOVIN_1/1000-2000" ) ) { + return false; + } + if ( !PhylogenyMethods.getSpecies( c1 ).equals( "BOVIN" ) ) { + return false; + } + final PhylogenyNode c2 = PhylogenyNode + .createInstanceFromNhxString( "n10_Bovin_1/1000-2000", + PhylogenyMethods.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ); + if ( !c2.getName().equals( "n10_Bovin_1/1000-2000" ) ) { + return false; + } + if ( !PhylogenyMethods.getSpecies( c2 ).equals( "" ) ) { return false; } final PhylogenyNode d = PhylogenyNode @@ -8167,12 +8225,48 @@ public final class Test { try { final String msa_str_0 = "seq1 abcd\n\nseq2 efgh\n"; final Msa msa_0 = GeneralMsaParser.parse( new ByteArrayInputStream( msa_str_0.getBytes() ) ); - final String msa_str_1 = "seq_1 abc\nseq2 ghi\nseq_1 def\nseq2 jkm\n"; + final String msa_str_1 = "seq1 abc\nseq2 ghi\nseq1 def\nseq2 jkm\n"; final Msa msa_1 = GeneralMsaParser.parse( new ByteArrayInputStream( msa_str_1.getBytes() ) ); final String msa_str_2 = "seq1 abc\nseq2 ghi\n\ndef\njkm\n"; final Msa msa_2 = GeneralMsaParser.parse( new ByteArrayInputStream( msa_str_2.getBytes() ) ); final String msa_str_3 = "seq1 abc\n def\nseq2 ghi\n jkm\n"; final Msa msa_3 = GeneralMsaParser.parse( new ByteArrayInputStream( msa_str_3.getBytes() ) ); + if ( !msa_1.getSequenceAsString( 0 ).toString().equalsIgnoreCase( "abcdef" ) ) { + return false; + } + if ( !msa_1.getSequenceAsString( 1 ).toString().equalsIgnoreCase( "ghixkm" ) ) { + return false; + } + if ( !msa_1.getIdentifier( 0 ).toString().equals( "seq1" ) ) { + return false; + } + if ( !msa_1.getIdentifier( 1 ).toString().equals( "seq2" ) ) { + return false; + } + if ( !msa_2.getSequenceAsString( 0 ).toString().equalsIgnoreCase( "abcdef" ) ) { + return false; + } + if ( !msa_2.getSequenceAsString( 1 ).toString().equalsIgnoreCase( "ghixkm" ) ) { + return false; + } + if ( !msa_2.getIdentifier( 0 ).toString().equals( "seq1" ) ) { + return false; + } + if ( !msa_2.getIdentifier( 1 ).toString().equals( "seq2" ) ) { + return false; + } + if ( !msa_3.getSequenceAsString( 0 ).toString().equalsIgnoreCase( "abcdef" ) ) { + return false; + } + if ( !msa_3.getSequenceAsString( 1 ).toString().equalsIgnoreCase( "ghixkm" ) ) { + return false; + } + if ( !msa_3.getIdentifier( 0 ).toString().equals( "seq1" ) ) { + return false; + } + if ( !msa_3.getIdentifier( 1 ).toString().equals( "seq2" ) ) { + return false; + } final Msa msa_4 = GeneralMsaParser.parse( new FileInputStream( PATH_TO_TEST_DATA + "msa_1.txt" ) ); if ( !msa_4.getSequenceAsString( 0 ).toString().equalsIgnoreCase( "abcdefeeeeeeeexx" ) ) { return false; @@ -8220,8 +8314,11 @@ public final class Test { opts.add( "--quiet" ); Msa msa = null; final MsaInferrer mafft = Mafft.createInstance(); - msa = mafft.infer( new File( PATH_TO_TEST_DATA + "ncbi.fasta" ), opts ); - if ( ( msa == null ) || ( msa.getLength() < 10 ) || ( msa.getNumberOfSequences() != 19 ) ) { + msa = mafft.infer( new File( PATH_TO_TEST_DATA + "ncbi_sn.fasta" ), opts ); + if ( ( msa == null ) || ( msa.getLength() < 20 ) || ( msa.getNumberOfSequences() != 19 ) ) { + return false; + } + if ( !msa.getIdentifier( 0 ).toString().equals( "a" ) ) { return false; } } @@ -8792,4 +8889,152 @@ public final class Test { } return true; } + + private static boolean testMsaQualityMethod() { + try { + final Sequence s0 = BasicSequence.createAaSequence( "a", "ABAXEFGHIJ" ); + final Sequence s1 = BasicSequence.createAaSequence( "a", "ABBXEFGHIJ" ); + final Sequence s2 = BasicSequence.createAaSequence( "a", "AXCXEFGHIJ" ); + final Sequence s3 = BasicSequence.createAaSequence( "a", "AXDDEFGHIJ" ); + final List l = new ArrayList(); + l.add( s0 ); + l.add( s1 ); + l.add( s2 ); + l.add( s3 ); + final Msa msa = BasicMsa.createInstance( l ); + if ( !isEqual( 1, MsaMethods.calculateIdentityRatio( msa, 0 ) ) ) { + return false; + } + if ( !isEqual( 0.5, MsaMethods.calculateIdentityRatio( msa, 1 ) ) ) { + return false; + } + if ( !isEqual( 0.25, MsaMethods.calculateIdentityRatio( msa, 2 ) ) ) { + return false; + } + if ( !isEqual( 0.75, MsaMethods.calculateIdentityRatio( msa, 3 ) ) ) { + return false; + } + } + catch ( final Exception e ) { + e.printStackTrace( System.out ); + return false; + } + return true; + } + + private static boolean testSequenceIdParsing() { + try { + Identifier id = SequenceIdParser.parse( "gb_ADF31344_segmented_worms_" ); + if ( id == null + || ForesterUtil.isEmpty( id.getValue() ) + || ForesterUtil.isEmpty( id.getProvider() ) + || !id.getValue().equals( "ADF31344" ) + || !id.getProvider().equals( "genbank" ) ) { + if ( id != null ) { + System.out.println( "value =" + id.getValue() ); + System.out.println( "provider=" + id.getProvider() ); + } + return false; + } + // + id = SequenceIdParser.parse( "segmented worms|gb_ADF31344" ); + if ( id == null + || ForesterUtil.isEmpty( id.getValue() ) + || ForesterUtil.isEmpty( id.getProvider() ) + || !id.getValue().equals( "ADF31344" ) + || !id.getProvider().equals( "genbank" ) ) { + if ( id != null ) { + System.out.println( "value =" + id.getValue() ); + System.out.println( "provider=" + id.getProvider() ); + } + return false; + } + // + id = SequenceIdParser.parse( "segmented worms gb_ADF31344 and more" ); + if ( id == null + || ForesterUtil.isEmpty( id.getValue() ) + || ForesterUtil.isEmpty( id.getProvider() ) + || !id.getValue().equals( "ADF31344" ) + || !id.getProvider().equals( "genbank" ) ) { + if ( id != null ) { + System.out.println( "value =" + id.getValue() ); + System.out.println( "provider=" + id.getProvider() ); + } + return false; + } + + // + id = SequenceIdParser.parse( "gb_AAA96518_1" ); + if ( id == null + || ForesterUtil.isEmpty( id.getValue() ) + || ForesterUtil.isEmpty( id.getProvider() ) + || !id.getValue().equals( "AAA96518" ) + || !id.getProvider().equals( "genbank" ) ) { + if ( id != null ) { + System.out.println( "value =" + id.getValue() ); + System.out.println( "provider=" + id.getProvider() ); + } + return false; + } + // + id = SequenceIdParser.parse( "gb_EHB07727_1_rodents_" ); + if ( id == null + || ForesterUtil.isEmpty( id.getValue() ) + || ForesterUtil.isEmpty( id.getProvider() ) + || !id.getValue().equals( "EHB07727" ) + || !id.getProvider().equals( "genbank" ) ) { + if ( id != null ) { + System.out.println( "value =" + id.getValue() ); + System.out.println( "provider=" + id.getProvider() ); + } + return false; + } + // + id = SequenceIdParser.parse( "dbj_BAF37827_1_turtles_" ); + if ( id == null + || ForesterUtil.isEmpty( id.getValue() ) + || ForesterUtil.isEmpty( id.getProvider() ) + || !id.getValue().equals( "BAF37827" ) + || !id.getProvider().equals( "genbank" ) ) { + if ( id != null ) { + System.out.println( "value =" + id.getValue() ); + System.out.println( "provider=" + id.getProvider() ); + } + return false; + } + // + id = SequenceIdParser.parse( "emb_CAA73223_1_primates_" ); + if ( id == null + || ForesterUtil.isEmpty( id.getValue() ) + || ForesterUtil.isEmpty( id.getProvider() ) + || !id.getValue().equals( "CAA73223" ) + || !id.getProvider().equals( "genbank" ) ) { + if ( id != null ) { + System.out.println( "value =" + id.getValue() ); + System.out.println( "provider=" + id.getProvider() ); + } + return false; + } + // +// id = SequenceIdParser.parse( "mites|ref_XP_002434188_1" ); +// if ( id == null +// || ForesterUtil.isEmpty( id.getValue() ) +// || ForesterUtil.isEmpty( id.getProvider() ) +// || !id.getValue().equals( "002434188" ) +// || !id.getProvider().equals( "genbank" ) ) { +// if ( id != null ) { +// System.out.println( "value =" + id.getValue() ); +// System.out.println( "provider=" + id.getProvider() ); +// } +// return false; +// } + + // lcl_91970_unknown_ + } + catch ( final Exception e ) { + e.printStackTrace( System.out ); + return false; + } + return true; + } }