phylotastic hackathon at NESCENT 120606
[jalview.git] / forester / java / src / org / forester / test / Test.java
index 0fb8f1d..b1b35ee 100644 (file)
@@ -54,9 +54,11 @@ import org.forester.io.parsers.nhx.NHXParser;
 import org.forester.io.parsers.phyloxml.PhyloXmlParser;
 import org.forester.io.parsers.tol.TolParser;
 import org.forester.io.writers.PhylogenyWriter;
+import org.forester.msa.BasicMsa;
 import org.forester.msa.Mafft;
 import org.forester.msa.Msa;
 import org.forester.msa.MsaInferrer;
+import org.forester.msa.MsaMethods;
 import org.forester.pccx.TestPccx;
 import org.forester.phylogeny.Phylogeny;
 import org.forester.phylogeny.PhylogenyBranch;
@@ -81,6 +83,7 @@ import org.forester.phylogeny.data.Taxonomy;
 import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
 import org.forester.phylogeny.factories.PhylogenyFactory;
 import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
+import org.forester.protein.Protein;
 import org.forester.sdi.SDI;
 import org.forester.sdi.SDIR;
 import org.forester.sdi.SDIse;
@@ -88,7 +91,6 @@ import org.forester.sdi.TaxonomyAssigner;
 import org.forester.sdi.TestGSDI;
 import org.forester.sequence.BasicSequence;
 import org.forester.sequence.Sequence;
-import org.forester.surfacing.Protein;
 import org.forester.surfacing.TestSurfacing;
 import org.forester.tools.ConfidenceAssessor;
 import org.forester.tools.SupportCount;
@@ -101,6 +103,7 @@ import org.forester.util.DescriptiveStatistics;
 import org.forester.util.ForesterConstants;
 import org.forester.util.ForesterUtil;
 import org.forester.util.GeneralTable;
+import org.forester.util.SequenceIdParser;
 import org.forester.ws.uniprot.DatabaseTools;
 import org.forester.ws.uniprot.SequenceDatabaseEntry;
 import org.forester.ws.uniprot.UniProtTaxonomy;
@@ -169,6 +172,19 @@ public final class Test {
             System.exit( -1 );
         }
         final long start_time = new Date().getTime();
+        
+        
+       
+        System.out.print( "Sequence id parsing: " );
+        if (  testSequenceIdParsing() ) {
+            System.out.println( "OK." );
+            succeeded++;
+        }
+        else {
+            System.out.println( "failed." );
+            System.exit( -1 ); //TODO FIXME remove me!! ~
+            failed++;
+        }
         System.out.print( "Hmmscan output parser: " );
         if ( testHmmscanOutputParser() ) {
             System.out.println( "OK." );
@@ -704,6 +720,15 @@ public final class Test {
             System.out.println( "failed." );
             failed++;
         }
+        System.out.print( "Simple MSA quality: " );
+        if ( Test.testMsaQualityMethod() ) {
+            System.out.println( "OK." );
+            succeeded++;
+        }
+        else {
+            System.out.println( "failed." );
+            failed++;
+        }
         //        System.out.print( "WABI TxSearch: " );
         //        if ( Test.testWabiTxSearch() ) {
         //            System.out.println( "OK." );
@@ -2850,7 +2875,7 @@ public final class Test {
             dss3.addValue( 10 );
             final AsciiHistogram histo = new AsciiHistogram( dss3 );
             histo.toStringBuffer( 10, '=', 40, 5 );
-            histo.toStringBuffer( 3, 8, 10, '=', 40, 5 );
+            histo.toStringBuffer( 3, 8, 10, '=', 40, 5, null );
         }
         catch ( final Exception e ) {
             e.printStackTrace( System.out );
@@ -3433,10 +3458,16 @@ public final class Test {
             if ( p1.getNumberOfProteinDomains() != 15 ) {
                 return false;
             }
+            if ( p1.getLength() != 850 ) {
+                return false;
+            }
             final Protein p2 = proteins.get( 1 );
             if ( p2.getNumberOfProteinDomains() != 51 ) {
                 return false;
             }
+            if ( p2.getLength() != 1291 ) {
+                return false;
+            }
             final Protein p3 = proteins.get( 2 );
             if ( p3.getNumberOfProteinDomains() != 2 ) {
                 return false;
@@ -4497,6 +4528,15 @@ public final class Test {
             if ( p53.getNode( "B (x (a' ,b) f(x);" ) == null ) {
                 return false;
             }
+            // 
+            final Phylogeny p54 = factory.create( new StringBuffer( "((A,B):[88],C)" ), new NHXParser() )[ 0 ];
+            if ( p54.getNode( "A" ) == null ) {
+                return false;
+            }
+            if ( !p54.toNewHampshire( false, NH_CONVERSION_SUPPORT_VALUE_STYLE.IN_SQUARE_BRACKETS )
+                    .equals( "((A,B)[88],C);" ) ) {
+                return false;
+            }
         }
         catch ( final Exception e ) {
             e.printStackTrace( System.out );
@@ -4706,7 +4746,7 @@ public final class Test {
                 if ( !b.getName().equals( "n10_ECOLI1/1-2" ) ) {
                     return false;
                 }
-                if ( !PhylogenyMethods.getSpecies( b ).equals( "ECOLI" ) ) {
+                if ( !PhylogenyMethods.getSpecies( b ).equals( "" ) ) {
                     return false;
                 }
                 final PhylogenyNode c = PhylogenyNode
@@ -4715,7 +4755,25 @@ public final class Test {
                 if ( !c.getName().equals( "n10_RATAF12/1000-2000" ) ) {
                     return false;
                 }
-                if ( !PhylogenyMethods.getSpecies( c ).equals( "RATAF" ) ) {
+                if ( !PhylogenyMethods.getSpecies( c ).equals( "" ) ) {
+                    return false;
+                }
+                final PhylogenyNode c1 = PhylogenyNode
+                        .createInstanceFromNhxString( "n10_BOVIN_1/1000-2000",
+                                                      PhylogenyMethods.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY );
+                if ( !c1.getName().equals( "n10_BOVIN_1/1000-2000" ) ) {
+                    return false;
+                }
+                if ( !PhylogenyMethods.getSpecies( c1 ).equals( "BOVIN" ) ) {
+                    return false;
+                }
+                final PhylogenyNode c2 = PhylogenyNode
+                        .createInstanceFromNhxString( "n10_Bovin_1/1000-2000",
+                                                      PhylogenyMethods.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY );
+                if ( !c2.getName().equals( "n10_Bovin_1/1000-2000" ) ) {
+                    return false;
+                }
+                if ( !PhylogenyMethods.getSpecies( c2 ).equals( "" ) ) {
                     return false;
                 }
                 final PhylogenyNode d = PhylogenyNode
@@ -8167,12 +8225,48 @@ public final class Test {
         try {
             final String msa_str_0 = "seq1 abcd\n\nseq2 efgh\n";
             final Msa msa_0 = GeneralMsaParser.parse( new ByteArrayInputStream( msa_str_0.getBytes() ) );
-            final String msa_str_1 = "seq_1 abc\nseq2 ghi\nseq_1 def\nseq2 jkm\n";
+            final String msa_str_1 = "seq1 abc\nseq2 ghi\nseq1 def\nseq2 jkm\n";
             final Msa msa_1 = GeneralMsaParser.parse( new ByteArrayInputStream( msa_str_1.getBytes() ) );
             final String msa_str_2 = "seq1 abc\nseq2 ghi\n\ndef\njkm\n";
             final Msa msa_2 = GeneralMsaParser.parse( new ByteArrayInputStream( msa_str_2.getBytes() ) );
             final String msa_str_3 = "seq1 abc\n def\nseq2 ghi\n jkm\n";
             final Msa msa_3 = GeneralMsaParser.parse( new ByteArrayInputStream( msa_str_3.getBytes() ) );
+            if ( !msa_1.getSequenceAsString( 0 ).toString().equalsIgnoreCase( "abcdef" ) ) {
+                return false;
+            }
+            if ( !msa_1.getSequenceAsString( 1 ).toString().equalsIgnoreCase( "ghixkm" ) ) {
+                return false;
+            }
+            if ( !msa_1.getIdentifier( 0 ).toString().equals( "seq1" ) ) {
+                return false;
+            }
+            if ( !msa_1.getIdentifier( 1 ).toString().equals( "seq2" ) ) {
+                return false;
+            }
+            if ( !msa_2.getSequenceAsString( 0 ).toString().equalsIgnoreCase( "abcdef" ) ) {
+                return false;
+            }
+            if ( !msa_2.getSequenceAsString( 1 ).toString().equalsIgnoreCase( "ghixkm" ) ) {
+                return false;
+            }
+            if ( !msa_2.getIdentifier( 0 ).toString().equals( "seq1" ) ) {
+                return false;
+            }
+            if ( !msa_2.getIdentifier( 1 ).toString().equals( "seq2" ) ) {
+                return false;
+            }
+            if ( !msa_3.getSequenceAsString( 0 ).toString().equalsIgnoreCase( "abcdef" ) ) {
+                return false;
+            }
+            if ( !msa_3.getSequenceAsString( 1 ).toString().equalsIgnoreCase( "ghixkm" ) ) {
+                return false;
+            }
+            if ( !msa_3.getIdentifier( 0 ).toString().equals( "seq1" ) ) {
+                return false;
+            }
+            if ( !msa_3.getIdentifier( 1 ).toString().equals( "seq2" ) ) {
+                return false;
+            }
             final Msa msa_4 = GeneralMsaParser.parse( new FileInputStream( PATH_TO_TEST_DATA + "msa_1.txt" ) );
             if ( !msa_4.getSequenceAsString( 0 ).toString().equalsIgnoreCase( "abcdefeeeeeeeexx" ) ) {
                 return false;
@@ -8220,8 +8314,11 @@ public final class Test {
             opts.add( "--quiet" );
             Msa msa = null;
             final MsaInferrer mafft = Mafft.createInstance();
-            msa = mafft.infer( new File( PATH_TO_TEST_DATA + "ncbi.fasta" ), opts );
-            if ( ( msa == null ) || ( msa.getLength() < 10 ) || ( msa.getNumberOfSequences() != 19 ) ) {
+            msa = mafft.infer( new File( PATH_TO_TEST_DATA + "ncbi_sn.fasta" ), opts );
+            if ( ( msa == null ) || ( msa.getLength() < 20 ) || ( msa.getNumberOfSequences() != 19 ) ) {
+                return false;
+            }
+            if ( !msa.getIdentifier( 0 ).toString().equals( "a" ) ) {
                 return false;
             }
         }
@@ -8792,4 +8889,152 @@ public final class Test {
         }
         return true;
     }
+
+    private static boolean testMsaQualityMethod() {
+        try {
+            final Sequence s0 = BasicSequence.createAaSequence( "a", "ABAXEFGHIJ" );
+            final Sequence s1 = BasicSequence.createAaSequence( "a", "ABBXEFGHIJ" );
+            final Sequence s2 = BasicSequence.createAaSequence( "a", "AXCXEFGHIJ" );
+            final Sequence s3 = BasicSequence.createAaSequence( "a", "AXDDEFGHIJ" );
+            final List<Sequence> l = new ArrayList<Sequence>();
+            l.add( s0 );
+            l.add( s1 );
+            l.add( s2 );
+            l.add( s3 );
+            final Msa msa = BasicMsa.createInstance( l );
+            if ( !isEqual( 1, MsaMethods.calculateIdentityRatio( msa, 0 ) ) ) {
+                return false;
+            }
+            if ( !isEqual( 0.5, MsaMethods.calculateIdentityRatio( msa, 1 ) ) ) {
+                return false;
+            }
+            if ( !isEqual( 0.25, MsaMethods.calculateIdentityRatio( msa, 2 ) ) ) {
+                return false;
+            }
+            if ( !isEqual( 0.75, MsaMethods.calculateIdentityRatio( msa, 3 ) ) ) {
+                return false;
+            }
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace( System.out );
+            return false;
+        }
+        return true;
+    }
+    
+    private static boolean testSequenceIdParsing() {
+        try {
+            Identifier id = SequenceIdParser.parse( "gb_ADF31344_segmented_worms_" );
+            if ( id == null
+                 || ForesterUtil.isEmpty( id.getValue() )
+                 || ForesterUtil.isEmpty( id.getProvider() )
+                 || !id.getValue().equals( "ADF31344" )
+                 || !id.getProvider().equals( "genbank" ) ) {
+                if ( id != null ) {
+                    System.out.println( "value   =" + id.getValue() );
+                    System.out.println( "provider=" + id.getProvider() );
+                }
+                return false;
+            }
+            //
+            id = SequenceIdParser.parse( "segmented worms|gb_ADF31344" );
+            if ( id == null
+                 || ForesterUtil.isEmpty( id.getValue() )
+                 || ForesterUtil.isEmpty( id.getProvider() )
+                 || !id.getValue().equals( "ADF31344" )
+                 || !id.getProvider().equals( "genbank" ) ) {
+                if ( id != null ) {
+                    System.out.println( "value   =" + id.getValue() );
+                    System.out.println( "provider=" + id.getProvider() );
+                }
+                return false;
+            }
+            //
+            id = SequenceIdParser.parse( "segmented worms gb_ADF31344 and more" );
+            if ( id == null
+                 || ForesterUtil.isEmpty( id.getValue() )
+                 || ForesterUtil.isEmpty( id.getProvider() )
+                 || !id.getValue().equals( "ADF31344" )
+                 || !id.getProvider().equals( "genbank" ) ) {
+                if ( id != null ) {
+                    System.out.println( "value   =" + id.getValue() );
+                    System.out.println( "provider=" + id.getProvider() );
+                }
+                return false;
+            }
+           
+            // 
+            id = SequenceIdParser.parse( "gb_AAA96518_1" );
+            if ( id == null
+                 || ForesterUtil.isEmpty( id.getValue() )
+                 || ForesterUtil.isEmpty( id.getProvider() )
+                 || !id.getValue().equals( "AAA96518" )
+                 || !id.getProvider().equals( "genbank" ) ) {
+                if ( id != null ) {
+                    System.out.println( "value   =" + id.getValue() );
+                    System.out.println( "provider=" + id.getProvider() );
+                }
+                return false;
+            }
+            // 
+            id = SequenceIdParser.parse( "gb_EHB07727_1_rodents_" );
+            if ( id == null
+                 || ForesterUtil.isEmpty( id.getValue() )
+                 || ForesterUtil.isEmpty( id.getProvider() )
+                 || !id.getValue().equals( "EHB07727" )
+                 || !id.getProvider().equals( "genbank" ) ) {
+                if ( id != null ) {
+                    System.out.println( "value   =" + id.getValue() );
+                    System.out.println( "provider=" + id.getProvider() );
+                }
+                return false;
+            }
+            // 
+            id = SequenceIdParser.parse( "dbj_BAF37827_1_turtles_" );
+            if ( id == null
+                 || ForesterUtil.isEmpty( id.getValue() )
+                 || ForesterUtil.isEmpty( id.getProvider() )
+                 || !id.getValue().equals( "BAF37827" )
+                 || !id.getProvider().equals( "genbank" ) ) {
+                if ( id != null ) {
+                    System.out.println( "value   =" + id.getValue() );
+                    System.out.println( "provider=" + id.getProvider() );
+                }
+                return false;
+            }
+            // 
+            id = SequenceIdParser.parse( "emb_CAA73223_1_primates_" );
+            if ( id == null
+                 || ForesterUtil.isEmpty( id.getValue() )
+                 || ForesterUtil.isEmpty( id.getProvider() )
+                 || !id.getValue().equals( "CAA73223" )
+                 || !id.getProvider().equals( "genbank" ) ) {
+                if ( id != null ) {
+                    System.out.println( "value   =" + id.getValue() );
+                    System.out.println( "provider=" + id.getProvider() );
+                }
+                return false;
+            }
+            // 
+//            id = SequenceIdParser.parse( "mites|ref_XP_002434188_1" );
+//            if ( id == null
+//                 || ForesterUtil.isEmpty( id.getValue() )
+//                 || ForesterUtil.isEmpty( id.getProvider() )
+//                 || !id.getValue().equals( "002434188" )
+//                 || !id.getProvider().equals( "genbank" ) ) {
+//                if ( id != null ) {
+//                    System.out.println( "value   =" + id.getValue() );
+//                    System.out.println( "provider=" + id.getProvider() );
+//                }
+//                return false;
+//            }
+            
+            // lcl_91970_unknown_
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace( System.out );
+            return false;
+        }
+        return true;
+    }
 }