import org.forester.io.parsers.phyloxml.PhyloXmlParser;
import org.forester.io.parsers.tol.TolParser;
import org.forester.io.writers.PhylogenyWriter;
+import org.forester.msa.BasicMsa;
import org.forester.msa.Mafft;
import org.forester.msa.Msa;
import org.forester.msa.MsaInferrer;
+import org.forester.msa.MsaMethods;
import org.forester.pccx.TestPccx;
import org.forester.phylogeny.Phylogeny;
import org.forester.phylogeny.PhylogenyBranch;
import org.forester.util.ForesterConstants;
import org.forester.util.ForesterUtil;
import org.forester.util.GeneralTable;
+import org.forester.util.SequenceIdParser;
import org.forester.ws.uniprot.DatabaseTools;
import org.forester.ws.uniprot.SequenceDatabaseEntry;
import org.forester.ws.uniprot.UniProtTaxonomy;
System.exit( -1 );
}
final long start_time = new Date().getTime();
+
+
+
+ System.out.print( "Sequence id parsing: " );
+ if ( testSequenceIdParsing() ) {
+ System.out.println( "OK." );
+ succeeded++;
+ }
+ else {
+ System.out.println( "failed." );
+ System.exit( -1 ); //TODO FIXME remove me!! ~
+ failed++;
+ }
System.out.print( "Hmmscan output parser: " );
if ( testHmmscanOutputParser() ) {
System.out.println( "OK." );
System.out.println( "failed." );
failed++;
}
+ System.out.print( "Simple MSA quality: " );
+ if ( Test.testMsaQualityMethod() ) {
+ System.out.println( "OK." );
+ succeeded++;
+ }
+ else {
+ System.out.println( "failed." );
+ failed++;
+ }
// System.out.print( "WABI TxSearch: " );
// if ( Test.testWabiTxSearch() ) {
// System.out.println( "OK." );
try {
final String msa_str_0 = "seq1 abcd\n\nseq2 efgh\n";
final Msa msa_0 = GeneralMsaParser.parse( new ByteArrayInputStream( msa_str_0.getBytes() ) );
- final String msa_str_1 = "seq_1 abc\nseq2 ghi\nseq_1 def\nseq2 jkm\n";
+ final String msa_str_1 = "seq1 abc\nseq2 ghi\nseq1 def\nseq2 jkm\n";
final Msa msa_1 = GeneralMsaParser.parse( new ByteArrayInputStream( msa_str_1.getBytes() ) );
final String msa_str_2 = "seq1 abc\nseq2 ghi\n\ndef\njkm\n";
final Msa msa_2 = GeneralMsaParser.parse( new ByteArrayInputStream( msa_str_2.getBytes() ) );
final String msa_str_3 = "seq1 abc\n def\nseq2 ghi\n jkm\n";
final Msa msa_3 = GeneralMsaParser.parse( new ByteArrayInputStream( msa_str_3.getBytes() ) );
+ if ( !msa_1.getSequenceAsString( 0 ).toString().equalsIgnoreCase( "abcdef" ) ) {
+ return false;
+ }
+ if ( !msa_1.getSequenceAsString( 1 ).toString().equalsIgnoreCase( "ghixkm" ) ) {
+ return false;
+ }
+ if ( !msa_1.getIdentifier( 0 ).toString().equals( "seq1" ) ) {
+ return false;
+ }
+ if ( !msa_1.getIdentifier( 1 ).toString().equals( "seq2" ) ) {
+ return false;
+ }
+ if ( !msa_2.getSequenceAsString( 0 ).toString().equalsIgnoreCase( "abcdef" ) ) {
+ return false;
+ }
+ if ( !msa_2.getSequenceAsString( 1 ).toString().equalsIgnoreCase( "ghixkm" ) ) {
+ return false;
+ }
+ if ( !msa_2.getIdentifier( 0 ).toString().equals( "seq1" ) ) {
+ return false;
+ }
+ if ( !msa_2.getIdentifier( 1 ).toString().equals( "seq2" ) ) {
+ return false;
+ }
+ if ( !msa_3.getSequenceAsString( 0 ).toString().equalsIgnoreCase( "abcdef" ) ) {
+ return false;
+ }
+ if ( !msa_3.getSequenceAsString( 1 ).toString().equalsIgnoreCase( "ghixkm" ) ) {
+ return false;
+ }
+ if ( !msa_3.getIdentifier( 0 ).toString().equals( "seq1" ) ) {
+ return false;
+ }
+ if ( !msa_3.getIdentifier( 1 ).toString().equals( "seq2" ) ) {
+ return false;
+ }
final Msa msa_4 = GeneralMsaParser.parse( new FileInputStream( PATH_TO_TEST_DATA + "msa_1.txt" ) );
if ( !msa_4.getSequenceAsString( 0 ).toString().equalsIgnoreCase( "abcdefeeeeeeeexx" ) ) {
return false;
Msa msa = null;
final MsaInferrer mafft = Mafft.createInstance();
msa = mafft.infer( new File( PATH_TO_TEST_DATA + "ncbi_sn.fasta" ), opts );
- if ( ( msa == null ) || ( msa.getLength() < 10 ) || ( msa.getNumberOfSequences() != 19 ) ) {
+ if ( ( msa == null ) || ( msa.getLength() < 20 ) || ( msa.getNumberOfSequences() != 19 ) ) {
+ return false;
+ }
+ if ( !msa.getIdentifier( 0 ).toString().equals( "a" ) ) {
return false;
}
}
}
return true;
}
+
+ private static boolean testMsaQualityMethod() {
+ try {
+ final Sequence s0 = BasicSequence.createAaSequence( "a", "ABAXEFGHIJ" );
+ final Sequence s1 = BasicSequence.createAaSequence( "a", "ABBXEFGHIJ" );
+ final Sequence s2 = BasicSequence.createAaSequence( "a", "AXCXEFGHIJ" );
+ final Sequence s3 = BasicSequence.createAaSequence( "a", "AXDDEFGHIJ" );
+ final List<Sequence> l = new ArrayList<Sequence>();
+ l.add( s0 );
+ l.add( s1 );
+ l.add( s2 );
+ l.add( s3 );
+ final Msa msa = BasicMsa.createInstance( l );
+ if ( !isEqual( 1, MsaMethods.calculateIdentityRatio( msa, 0 ) ) ) {
+ return false;
+ }
+ if ( !isEqual( 0.5, MsaMethods.calculateIdentityRatio( msa, 1 ) ) ) {
+ return false;
+ }
+ if ( !isEqual( 0.25, MsaMethods.calculateIdentityRatio( msa, 2 ) ) ) {
+ return false;
+ }
+ if ( !isEqual( 0.75, MsaMethods.calculateIdentityRatio( msa, 3 ) ) ) {
+ return false;
+ }
+ }
+ catch ( final Exception e ) {
+ e.printStackTrace( System.out );
+ return false;
+ }
+ return true;
+ }
+
+ private static boolean testSequenceIdParsing() {
+ try {
+ Identifier id = SequenceIdParser.parse( "gb_ADF31344_segmented_worms_" );
+ if ( id == null
+ || ForesterUtil.isEmpty( id.getValue() )
+ || ForesterUtil.isEmpty( id.getProvider() )
+ || !id.getValue().equals( "ADF31344" )
+ || !id.getProvider().equals( "genbank" ) ) {
+ if ( id != null ) {
+ System.out.println( "value =" + id.getValue() );
+ System.out.println( "provider=" + id.getProvider() );
+ }
+ return false;
+ }
+ //
+ id = SequenceIdParser.parse( "segmented worms|gb_ADF31344" );
+ if ( id == null
+ || ForesterUtil.isEmpty( id.getValue() )
+ || ForesterUtil.isEmpty( id.getProvider() )
+ || !id.getValue().equals( "ADF31344" )
+ || !id.getProvider().equals( "genbank" ) ) {
+ if ( id != null ) {
+ System.out.println( "value =" + id.getValue() );
+ System.out.println( "provider=" + id.getProvider() );
+ }
+ return false;
+ }
+ //
+ id = SequenceIdParser.parse( "segmented worms gb_ADF31344 and more" );
+ if ( id == null
+ || ForesterUtil.isEmpty( id.getValue() )
+ || ForesterUtil.isEmpty( id.getProvider() )
+ || !id.getValue().equals( "ADF31344" )
+ || !id.getProvider().equals( "genbank" ) ) {
+ if ( id != null ) {
+ System.out.println( "value =" + id.getValue() );
+ System.out.println( "provider=" + id.getProvider() );
+ }
+ return false;
+ }
+
+ //
+ id = SequenceIdParser.parse( "gb_AAA96518_1" );
+ if ( id == null
+ || ForesterUtil.isEmpty( id.getValue() )
+ || ForesterUtil.isEmpty( id.getProvider() )
+ || !id.getValue().equals( "AAA96518" )
+ || !id.getProvider().equals( "genbank" ) ) {
+ if ( id != null ) {
+ System.out.println( "value =" + id.getValue() );
+ System.out.println( "provider=" + id.getProvider() );
+ }
+ return false;
+ }
+ //
+ id = SequenceIdParser.parse( "gb_EHB07727_1_rodents_" );
+ if ( id == null
+ || ForesterUtil.isEmpty( id.getValue() )
+ || ForesterUtil.isEmpty( id.getProvider() )
+ || !id.getValue().equals( "EHB07727" )
+ || !id.getProvider().equals( "genbank" ) ) {
+ if ( id != null ) {
+ System.out.println( "value =" + id.getValue() );
+ System.out.println( "provider=" + id.getProvider() );
+ }
+ return false;
+ }
+ //
+ id = SequenceIdParser.parse( "dbj_BAF37827_1_turtles_" );
+ if ( id == null
+ || ForesterUtil.isEmpty( id.getValue() )
+ || ForesterUtil.isEmpty( id.getProvider() )
+ || !id.getValue().equals( "BAF37827" )
+ || !id.getProvider().equals( "genbank" ) ) {
+ if ( id != null ) {
+ System.out.println( "value =" + id.getValue() );
+ System.out.println( "provider=" + id.getProvider() );
+ }
+ return false;
+ }
+ //
+ id = SequenceIdParser.parse( "emb_CAA73223_1_primates_" );
+ if ( id == null
+ || ForesterUtil.isEmpty( id.getValue() )
+ || ForesterUtil.isEmpty( id.getProvider() )
+ || !id.getValue().equals( "CAA73223" )
+ || !id.getProvider().equals( "genbank" ) ) {
+ if ( id != null ) {
+ System.out.println( "value =" + id.getValue() );
+ System.out.println( "provider=" + id.getProvider() );
+ }
+ return false;
+ }
+ //
+// id = SequenceIdParser.parse( "mites|ref_XP_002434188_1" );
+// if ( id == null
+// || ForesterUtil.isEmpty( id.getValue() )
+// || ForesterUtil.isEmpty( id.getProvider() )
+// || !id.getValue().equals( "002434188" )
+// || !id.getProvider().equals( "genbank" ) ) {
+// if ( id != null ) {
+// System.out.println( "value =" + id.getValue() );
+// System.out.println( "provider=" + id.getProvider() );
+// }
+// return false;
+// }
+
+ // lcl_91970_unknown_
+ }
+ catch ( final Exception e ) {
+ e.printStackTrace( System.out );
+ return false;
+ }
+ return true;
+ }
}