import org.forester.phylogeny.PhylogenyNode;
import org.forester.phylogeny.PhylogenyNode.NH_CONVERSION_SUPPORT_VALUE_STYLE;
import org.forester.phylogeny.data.Accession;
+import org.forester.phylogeny.data.Accession.Source;
import org.forester.phylogeny.data.BinaryCharacters;
import org.forester.phylogeny.data.BranchWidth;
import org.forester.phylogeny.data.Confidence;
import org.forester.util.ForesterConstants;
import org.forester.util.ForesterUtil;
import org.forester.util.GeneralTable;
-import org.forester.util.SequenceIdParser;
+import org.forester.util.SequenceAccessionTools;
import org.forester.ws.seqdb.SequenceDatabaseEntry;
import org.forester.ws.seqdb.SequenceDbWsTools;
import org.forester.ws.seqdb.UniProtTaxonomy;
@SuppressWarnings( "unused")
public final class Test {
+ private final static boolean PERFORM_DB_TESTS = true;
private final static double ZERO_DIFF = 1.0E-9;
private final static String PATH_TO_TEST_DATA = System.getProperty( "user.dir" )
+ ForesterUtil.getFileSeparator() + "test_data"
System.exit( -1 );
}
final long start_time = new Date().getTime();
+ System.out.print( "Basic node methods: " );
+ if ( Test.testBasicNodeMethods() ) {
+ System.out.println( "OK." );
+ succeeded++;
+ }
+ else {
+ System.out.println( "failed." );
+ failed++;
+ }
System.out.print( "Protein id: " );
if ( !testProteinId() ) {
System.out.println( "failed." );
System.out.println( "failed." );
failed++;
}
- System.out.print( "Hmmscan output parser: " );
- if ( testHmmscanOutputParser() ) {
+ System.out.print( "UniProtKB id extraction: " );
+ if ( Test.testExtractUniProtKbProteinSeqIdentifier() ) {
System.out.println( "OK." );
succeeded++;
}
System.out.println( "failed." );
failed++;
}
- System.out.print( "Basic node methods: " );
- if ( Test.testBasicNodeMethods() ) {
+ System.out.print( "Sequence DB tools 1: " );
+ if ( testSequenceDbWsTools1() ) {
System.out.println( "OK." );
succeeded++;
}
System.out.println( "failed." );
failed++;
}
- System.out.print( "Taxonomy code extraction: " );
- if ( Test.testExtractTaxonomyCodeFromNodeName() ) {
+ if ( PERFORM_DB_TESTS ) {
+ System.out.print( "Sequence DB tools 2: " );
+ if ( testSequenceDbWsTools2() ) {
+ System.out.println( "OK." );
+ succeeded++;
+ }
+ else {
+ System.out.println( "failed." );
+ failed++;
+ System.exit( -1 );
+ }
+ }
+ // System.exit( 0 );
+ System.out.print( "Hmmscan output parser: " );
+ if ( testHmmscanOutputParser() ) {
System.out.println( "OK." );
succeeded++;
}
System.out.println( "failed." );
failed++;
}
- System.out.print( "SN extraction: " );
- if ( Test.testExtractSNFromNodeName() ) {
+ System.out.print( "Taxonomy code extraction: " );
+ if ( Test.testExtractTaxonomyCodeFromNodeName() ) {
System.out.println( "OK." );
succeeded++;
}
System.out.println( "failed." );
failed++;
}
- System.out.print( "Taxonomy extraction (general): " );
- if ( Test.testTaxonomyExtraction() ) {
+ System.out.print( "SN extraction: " );
+ if ( Test.testExtractSNFromNodeName() ) {
System.out.println( "OK." );
succeeded++;
}
System.out.println( "failed." );
failed++;
}
- System.out.print( "UniProtKB id extraction: " );
- if ( Test.testExtractUniProtKbProteinSeqIdentifier() ) {
+ System.out.print( "Taxonomy extraction (general): " );
+ if ( Test.testTaxonomyExtraction() ) {
System.out.println( "OK." );
succeeded++;
}
System.out.println( "failed." );
failed++;
}
- System.out.print( "Uniprot Entry Retrieval: " );
- if ( Test.testUniprotEntryRetrieval() ) {
- System.out.println( "OK." );
- succeeded++;
- }
- else {
- System.out.println( "failed." );
- failed++;
- }
- System.out.print( "Uniprot Taxonomy Search: " );
- if ( Test.testUniprotTaxonomySearch() ) {
- System.out.println( "OK." );
- succeeded++;
+ if ( PERFORM_DB_TESTS ) {
+ System.out.print( "Uniprot Entry Retrieval: " );
+ if ( Test.testUniprotEntryRetrieval() ) {
+ System.out.println( "OK." );
+ succeeded++;
+ }
+ else {
+ System.out.println( "failed." );
+ failed++;
+ }
}
- else {
- System.out.println( "failed." );
- failed++;
+ if ( PERFORM_DB_TESTS ) {
+ System.out.print( "Uniprot Taxonomy Search: " );
+ if ( Test.testUniprotTaxonomySearch() ) {
+ System.out.println( "OK." );
+ succeeded++;
+ }
+ else {
+ System.out.println( "failed." );
+ failed++;
+ }
}
//----
String path = "";
if ( !( t3.getNode( "root node" ).getNodeData().getSequence().getLocation().equals( "12p13-p12" ) ) ) {
return false;
}
- SortedSet<Accession> x = t3.getNode( "root node" ).getNodeData().getSequence().getCrossReferences();
+ final SortedSet<Accession> x = t3.getNode( "root node" ).getNodeData().getSequence().getCrossReferences();
if ( x.size() != 4 ) {
return false;
}
int c = 0;
- for( Accession acc : x ) {
+ for( final Accession acc : x ) {
if ( c == 0 ) {
if ( !acc.getSource().equals( "KEGG" ) ) {
return false;
}
if ( ( ( BinaryCharacters ) t3_rt.getNode( "node bb" ).getNodeData().getBinaryCharacters().copy() )
.getLostCount() != BinaryCharacters.COUNT_DEFAULT ) {
-
return false;
}
if ( t3_rt.getNode( "node b" ).getNodeData().getBinaryCharacters().getGainedCount() != 1 ) {
.equalsIgnoreCase( "433" ) ) {
return false;
}
- SortedSet<Accession> x = t3_rt.getNode( "root node" ).getNodeData().getSequence().getCrossReferences();
+ final SortedSet<Accession> x = t3_rt.getNode( "root node" ).getNodeData().getSequence()
+ .getCrossReferences();
if ( x.size() != 4 ) {
return false;
}
int c = 0;
- for( Accession acc : x ) {
+ for( final Accession acc : x ) {
if ( c == 0 ) {
if ( !acc.getSource().equals( "KEGG" ) ) {
return false;
if ( !TreePanelUtil.createUriForSeqWeb( n, null, null ).equals( ForesterUtil.NCBI_PROTEIN + "AAA34956" ) ) {
return false;
}
- n.setName( "j40f4_Q06891.1_fndn2 fnr3" );
- if ( !TreePanelUtil.createUriForSeqWeb( n, null, null ).equals( ForesterUtil.NCBI_PROTEIN + "Q06891.1" ) ) {
+ n.setName( "AAA34956" );
+ if ( !TreePanelUtil.createUriForSeqWeb( n, null, null ).equals( ForesterUtil.NCBI_PROTEIN + "AAA34956" ) ) {
return false;
}
n.setName( "GI:394892" );
System.out.println( TreePanelUtil.createUriForSeqWeb( n, null, null ) );
return false;
}
+ n.setName( "P12345" );
+ if ( !TreePanelUtil.createUriForSeqWeb( n, null, null ).equals( ForesterUtil.UNIPROT_KB + "P12345" ) ) {
+ System.out.println( TreePanelUtil.createUriForSeqWeb( n, null, null ) );
+ return false;
+ }
+ n.setName( "gi_fdgjmn-3jk5-243 mnefmn fg023-0 P12345 4395jtmnsrg02345m1ggi92450jrg890j4t0j240" );
+ if ( !TreePanelUtil.createUriForSeqWeb( n, null, null ).equals( ForesterUtil.UNIPROT_KB + "P12345" ) ) {
+ System.out.println( TreePanelUtil.createUriForSeqWeb( n, null, null ) );
+ return false;
+ }
}
catch ( final Exception e ) {
e.printStackTrace( System.out );
//Nucleotide: 1 letter + 5 numerals OR 2 letters + 6 numerals
//Protein: 3 letters + 5 numerals
//http://www.ncbi.nlm.nih.gov/Sequin/acc.html
- if ( !SequenceIdParser.parseGenbankAccessor( "AY423861" ).equals( "AY423861" ) ) {
+ if ( !SequenceAccessionTools.parseGenbankAccessorFromString( "AY423861" ).equals( "AY423861" ) ) {
return false;
}
- if ( !SequenceIdParser.parseGenbankAccessor( ".AY423861.2" ).equals( "AY423861.2" ) ) {
+ if ( !SequenceAccessionTools.parseGenbankAccessorFromString( ".AY423861.2" ).equals( "AY423861.2" ) ) {
return false;
}
- if ( !SequenceIdParser.parseGenbankAccessor( "345_.AY423861.24_345" ).equals( "AY423861.24" ) ) {
+ if ( !SequenceAccessionTools.parseGenbankAccessorFromString( "345_.AY423861.24_345" ).equals( "AY423861.24" ) ) {
return false;
}
- if ( SequenceIdParser.parseGenbankAccessor( "AAY423861" ) != null ) {
+ if ( SequenceAccessionTools.parseGenbankAccessorFromString( "AAY423861" ) != null ) {
return false;
}
- if ( SequenceIdParser.parseGenbankAccessor( "AY4238612" ) != null ) {
+ if ( SequenceAccessionTools.parseGenbankAccessorFromString( "AY4238612" ) != null ) {
return false;
}
- if ( SequenceIdParser.parseGenbankAccessor( "AAY4238612" ) != null ) {
+ if ( SequenceAccessionTools.parseGenbankAccessorFromString( "AAY4238612" ) != null ) {
return false;
}
- if ( SequenceIdParser.parseGenbankAccessor( "Y423861" ) != null ) {
+ if ( SequenceAccessionTools.parseGenbankAccessorFromString( "Y423861" ) != null ) {
return false;
}
- if ( !SequenceIdParser.parseGenbankAccessor( "S12345" ).equals( "S12345" ) ) {
+ if ( !SequenceAccessionTools.parseGenbankAccessorFromString( "S12345" ).equals( "S12345" ) ) {
return false;
}
- if ( !SequenceIdParser.parseGenbankAccessor( "|S12345|" ).equals( "S12345" ) ) {
+ if ( !SequenceAccessionTools.parseGenbankAccessorFromString( "|S12345|" ).equals( "S12345" ) ) {
return false;
}
- if ( SequenceIdParser.parseGenbankAccessor( "|S123456" ) != null ) {
+ if ( SequenceAccessionTools.parseGenbankAccessorFromString( "|S123456" ) != null ) {
return false;
}
- if ( SequenceIdParser.parseGenbankAccessor( "ABC123456" ) != null ) {
+ if ( SequenceAccessionTools.parseGenbankAccessorFromString( "ABC123456" ) != null ) {
return false;
}
- if ( !SequenceIdParser.parseGenbankAccessor( "ABC12345" ).equals( "ABC12345" ) ) {
+ if ( !SequenceAccessionTools.parseGenbankAccessorFromString( "ABC12345" ).equals( "ABC12345" ) ) {
return false;
}
- if ( !SequenceIdParser.parseGenbankAccessor( "&ABC12345&" ).equals( "ABC12345" ) ) {
+ if ( !SequenceAccessionTools.parseGenbankAccessorFromString( "&ABC12345&" ).equals( "ABC12345" ) ) {
return false;
}
- if ( SequenceIdParser.parseGenbankAccessor( "ABCD12345" ) != null ) {
+ if ( SequenceAccessionTools.parseGenbankAccessorFromString( "ABCD12345" ) != null ) {
return false;
}
return true;
try {
PhylogenyNode n = new PhylogenyNode();
n.setName( "tr|B3RJ64" );
- if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "B3RJ64" ) ) {
+ if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "B3RJ64" ) ) {
return false;
}
n.setName( "tr.B3RJ64" );
- if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "B3RJ64" ) ) {
+ if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "B3RJ64" ) ) {
return false;
}
n.setName( "tr=B3RJ64" );
- if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "B3RJ64" ) ) {
+ if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "B3RJ64" ) ) {
return false;
}
n.setName( "tr-B3RJ64" );
- if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "B3RJ64" ) ) {
+ if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "B3RJ64" ) ) {
return false;
}
n.setName( "tr/B3RJ64" );
- if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "B3RJ64" ) ) {
+ if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "B3RJ64" ) ) {
return false;
}
n.setName( "tr\\B3RJ64" );
- if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "B3RJ64" ) ) {
+ if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "B3RJ64" ) ) {
return false;
}
n.setName( "tr_B3RJ64" );
- if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "B3RJ64" ) ) {
+ if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "B3RJ64" ) ) {
return false;
}
n.setName( " tr|B3RJ64 " );
- if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "B3RJ64" ) ) {
+ if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "B3RJ64" ) ) {
return false;
}
n.setName( "-tr|B3RJ64-" );
- if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "B3RJ64" ) ) {
+ if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "B3RJ64" ) ) {
return false;
}
n.setName( "-tr=B3RJ64-" );
- if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "B3RJ64" ) ) {
+ if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "B3RJ64" ) ) {
return false;
}
n.setName( "_tr=B3RJ64_" );
- if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "B3RJ64" ) ) {
+ if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "B3RJ64" ) ) {
return false;
}
n.setName( " tr_tr|B3RJ64_sp|123 " );
- if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "B3RJ64" ) ) {
+ if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "B3RJ64" ) ) {
return false;
}
- n.setName( "sp|B3RJ64" );
- if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "B3RJ64" ) ) {
+ n.setName( "B3RJ64" );
+ if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "B3RJ64" ) ) {
return false;
}
- n.setName( "ssp|B3RJ64" );
- if ( ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ) != null ) {
+ n.setName( "sp|B3RJ64" );
+ if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "B3RJ64" ) ) {
return false;
}
n.setName( "sp|B3RJ64C" );
- if ( ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ) != null ) {
+ if ( SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ) != null ) {
return false;
}
n.setName( "sp B3RJ64" );
- if ( ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ) != null ) {
+ if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "B3RJ64" ) ) {
return false;
}
n.setName( "sp|B3RJ6X" );
- if ( ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ) != null ) {
+ if ( SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ) != null ) {
return false;
}
n.setName( "sp|B3RJ6" );
- if ( ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ) != null ) {
+ if ( SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ) != null ) {
return false;
}
n.setName( "K1PYK7_CRAGI" );
- if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "K1PYK7_CRAGI" ) ) {
+ if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "K1PYK7_CRAGI" ) ) {
return false;
}
n.setName( "K1PYK7_PEA" );
- if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "K1PYK7_PEA" ) ) {
+ if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "K1PYK7_PEA" ) ) {
return false;
}
n.setName( "K1PYK7_RAT" );
- if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "K1PYK7_RAT" ) ) {
+ if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "K1PYK7_RAT" ) ) {
return false;
}
n.setName( "K1PYK7_PIG" );
- if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "K1PYK7_PIG" ) ) {
+ if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "K1PYK7_PIG" ) ) {
return false;
}
n.setName( "~K1PYK7_PIG~" );
- if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "K1PYK7_PIG" ) ) {
+ if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "K1PYK7_PIG" ) ) {
return false;
}
n.setName( "123456_ECOLI-K1PYK7_CRAGI-sp" );
- if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "K1PYK7_CRAGI" ) ) {
+ if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "K1PYK7_CRAGI" ) ) {
return false;
}
n.setName( "K1PYKX_CRAGI" );
- if ( ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ) != null ) {
+ if ( SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ) != null ) {
return false;
}
n.setName( "XXXXX_CRAGI" );
- if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "XXXXX_CRAGI" ) ) {
+ if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "XXXXX_CRAGI" ) ) {
return false;
}
n.setName( "tr|H3IB65|H3IB65_STRPU~2-2" );
- if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "H3IB65" ) ) {
+ if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "H3IB65" ) ) {
return false;
}
n.setName( "jgi|Lacbi2|181470|Lacbi1.estExt_GeneWisePlus_human.C_10729~2-3" );
- if ( ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ) != null ) {
+ if ( SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ) != null ) {
return false;
}
n.setName( "sp|Q86U06|RBM23_HUMAN~2-2" );
- if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "Q86U06" ) ) {
+ if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "Q86U06" ) ) {
return false;
}
n = new PhylogenyNode();
org.forester.phylogeny.data.Sequence seq = new org.forester.phylogeny.data.Sequence();
seq.setSymbol( "K1PYK7_CRAGI" );
n.getNodeData().addSequence( seq );
- if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "K1PYK7_CRAGI" ) ) {
+ if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "K1PYK7_CRAGI" ) ) {
return false;
}
seq.setSymbol( "tr|B3RJ64" );
- if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "B3RJ64" ) ) {
+ if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "B3RJ64" ) ) {
return false;
}
n = new PhylogenyNode();
seq = new org.forester.phylogeny.data.Sequence();
seq.setName( "K1PYK7_CRAGI" );
n.getNodeData().addSequence( seq );
- if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "K1PYK7_CRAGI" ) ) {
+ if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "K1PYK7_CRAGI" ) ) {
return false;
}
seq.setName( "tr|B3RJ64" );
- if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "B3RJ64" ) ) {
+ if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "B3RJ64" ) ) {
return false;
}
n = new PhylogenyNode();
seq = new org.forester.phylogeny.data.Sequence();
seq.setAccession( new Accession( "K1PYK8_CRAGI", "?" ) );
n.getNodeData().addSequence( seq );
- if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "K1PYK8_CRAGI" ) ) {
+ if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "K1PYK8_CRAGI" ) ) {
return false;
}
n = new PhylogenyNode();
seq = new org.forester.phylogeny.data.Sequence();
seq.setAccession( new Accession( "tr|B3RJ64", "?" ) );
n.getNodeData().addSequence( seq );
- if ( !ForesterUtil.extractUniProtKbProteinSeqIdentifier( n ).equals( "B3RJ64" ) ) {
+ if ( !SequenceAccessionTools.obtainUniProtAccessorFromDataFields( n ).equals( "B3RJ64" ) ) {
return false;
}
//
n = new PhylogenyNode();
n.setName( "ACP19736" );
- if ( !ForesterUtil.extractGenbankAccessor( n ).equals( "ACP19736" ) ) {
+ if ( !SequenceAccessionTools.obtainGenbankAccessorFromDataFields( n ).equals( "ACP19736" ) ) {
return false;
}
n = new PhylogenyNode();
- n.setName( "_ACP19736_" );
- if ( !ForesterUtil.extractGenbankAccessor( n ).equals( "ACP19736" ) ) {
+ n.setName( "|ACP19736|" );
+ if ( !SequenceAccessionTools.obtainGenbankAccessorFromDataFields( n ).equals( "ACP19736" ) ) {
return false;
}
}
private static boolean testSequenceIdParsing() {
try {
- Identifier id = SequenceIdParser.parse( "gb_ADF31344_segmented_worms_" );
- if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getProvider() )
- || !id.getValue().equals( "ADF31344" ) || !id.getProvider().equals( "ncbi" ) ) {
+ Accession id = SequenceAccessionTools.parseAccessorFromString( "gb_ADF31344_segmented_worms_" );
+ if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getSource() )
+ || !id.getValue().equals( "ADF31344" ) || !id.getSource().equals( "ncbi" ) ) {
if ( id != null ) {
System.out.println( "value =" + id.getValue() );
- System.out.println( "provider=" + id.getProvider() );
+ System.out.println( "provider=" + id.getSource() );
}
return false;
}
//
- id = SequenceIdParser.parse( "segmented worms|gb_ADF31344" );
- if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getProvider() )
- || !id.getValue().equals( "ADF31344" ) || !id.getProvider().equals( "ncbi" ) ) {
+ id = SequenceAccessionTools.parseAccessorFromString( "segmented worms|gb_ADF31344" );
+ if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getSource() )
+ || !id.getValue().equals( "ADF31344" ) || !id.getSource().equals( "ncbi" ) ) {
if ( id != null ) {
System.out.println( "value =" + id.getValue() );
- System.out.println( "provider=" + id.getProvider() );
+ System.out.println( "provider=" + id.getSource() );
}
return false;
}
//
- id = SequenceIdParser.parse( "segmented worms gb_ADF31344 and more" );
- if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getProvider() )
- || !id.getValue().equals( "ADF31344" ) || !id.getProvider().equals( "ncbi" ) ) {
+ id = SequenceAccessionTools.parseAccessorFromString( "segmented worms gb_ADF31344 and more" );
+ if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getSource() )
+ || !id.getValue().equals( "ADF31344" ) || !id.getSource().equals( "ncbi" ) ) {
if ( id != null ) {
System.out.println( "value =" + id.getValue() );
- System.out.println( "provider=" + id.getProvider() );
+ System.out.println( "provider=" + id.getSource() );
}
return false;
}
//
- id = SequenceIdParser.parse( "gb_AAA96518_1" );
- if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getProvider() )
- || !id.getValue().equals( "AAA96518" ) || !id.getProvider().equals( "ncbi" ) ) {
+ id = SequenceAccessionTools.parseAccessorFromString( "gb_AAA96518_1" );
+ if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getSource() )
+ || !id.getValue().equals( "AAA96518" ) || !id.getSource().equals( "ncbi" ) ) {
if ( id != null ) {
System.out.println( "value =" + id.getValue() );
- System.out.println( "provider=" + id.getProvider() );
+ System.out.println( "provider=" + id.getSource() );
}
return false;
}
//
- id = SequenceIdParser.parse( "gb_EHB07727_1_rodents_" );
- if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getProvider() )
- || !id.getValue().equals( "EHB07727" ) || !id.getProvider().equals( "ncbi" ) ) {
+ id = SequenceAccessionTools.parseAccessorFromString( "gb_EHB07727_1_rodents_" );
+ if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getSource() )
+ || !id.getValue().equals( "EHB07727" ) || !id.getSource().equals( "ncbi" ) ) {
if ( id != null ) {
System.out.println( "value =" + id.getValue() );
- System.out.println( "provider=" + id.getProvider() );
+ System.out.println( "provider=" + id.getSource() );
}
return false;
}
//
- id = SequenceIdParser.parse( "dbj_BAF37827_1_turtles_" );
- if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getProvider() )
- || !id.getValue().equals( "BAF37827" ) || !id.getProvider().equals( "ncbi" ) ) {
+ id = SequenceAccessionTools.parseAccessorFromString( "dbj_BAF37827_1_turtles_" );
+ if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getSource() )
+ || !id.getValue().equals( "BAF37827" ) || !id.getSource().equals( "ncbi" ) ) {
if ( id != null ) {
System.out.println( "value =" + id.getValue() );
- System.out.println( "provider=" + id.getProvider() );
+ System.out.println( "provider=" + id.getSource() );
}
return false;
}
//
- id = SequenceIdParser.parse( "emb_CAA73223_1_primates_" );
- if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getProvider() )
- || !id.getValue().equals( "CAA73223" ) || !id.getProvider().equals( "ncbi" ) ) {
+ id = SequenceAccessionTools.parseAccessorFromString( "emb_CAA73223_1_primates_" );
+ if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getSource() )
+ || !id.getValue().equals( "CAA73223" ) || !id.getSource().equals( "ncbi" ) ) {
if ( id != null ) {
System.out.println( "value =" + id.getValue() );
- System.out.println( "provider=" + id.getProvider() );
+ System.out.println( "provider=" + id.getSource() );
}
return false;
}
//
- id = SequenceIdParser.parse( "mites|ref_XP_002434188_1" );
- if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getProvider() )
- || !id.getValue().equals( "XP_002434188" ) || !id.getProvider().equals( "refseq" ) ) {
+ id = SequenceAccessionTools.parseAccessorFromString( "mites|ref_XP_002434188_1" );
+ if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getSource() )
+ || !id.getValue().equals( "XP_002434188" ) || !id.getSource().equals( "refseq" ) ) {
if ( id != null ) {
System.out.println( "value =" + id.getValue() );
- System.out.println( "provider=" + id.getProvider() );
+ System.out.println( "provider=" + id.getSource() );
}
return false;
}
//
- id = SequenceIdParser.parse( "mites_ref_XP_002434188_1_bla_XP_12345" );
- if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getProvider() )
- || !id.getValue().equals( "XP_002434188" ) || !id.getProvider().equals( "refseq" ) ) {
+ id = SequenceAccessionTools.parseAccessorFromString( "mites_ref_XP_002434188_1_bla_XP_12345" );
+ if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getSource() )
+ || !id.getValue().equals( "XP_002434188" ) || !id.getSource().equals( "refseq" ) ) {
if ( id != null ) {
System.out.println( "value =" + id.getValue() );
- System.out.println( "provider=" + id.getProvider() );
+ System.out.println( "provider=" + id.getSource() );
}
return false;
}
//
- id = SequenceIdParser.parse( "P4A123" );
- if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getProvider() )
- || !id.getValue().equals( "P4A123" ) || !id.getProvider().equals( "sp" ) ) {
+ id = SequenceAccessionTools.parseAccessorFromString( "P4A123" );
+ if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getSource() )
+ || !id.getValue().equals( "P4A123" ) || !id.getSource().equals( "uniprot" ) ) {
if ( id != null ) {
System.out.println( "value =" + id.getValue() );
- System.out.println( "provider=" + id.getProvider() );
+ System.out.println( "provider=" + id.getSource() );
}
return false;
}
- //
- id = SequenceIdParser.parse( "pllf[pok P4A123_osdjfosnqo035-9233332904i000490 vf tmv x45" );
- if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getProvider() )
- || !id.getValue().equals( "P4A123" ) || !id.getProvider().equals( "sp" ) ) {
- if ( id != null ) {
- System.out.println( "value =" + id.getValue() );
- System.out.println( "provider=" + id.getProvider() );
- }
- return false;
- }
- //
- id = SequenceIdParser.parse( "XP_12345" );
+ id = SequenceAccessionTools.parseAccessorFromString( "XP_12345" );
if ( id != null ) {
System.out.println( "value =" + id.getValue() );
- System.out.println( "provider=" + id.getProvider() );
+ System.out.println( "provider=" + id.getSource() );
return false;
}
- // lcl_91970_unknown_
}
catch ( final Exception e ) {
e.printStackTrace( System.out );
return true;
}
+ private static boolean testSequenceDbWsTools1() {
+ try {
+ final PhylogenyNode n = new PhylogenyNode();
+ n.setName( "NP_001025424" );
+ Accession acc = SequenceDbWsTools.obtainSeqAccession( n );
+ if ( ( acc == null ) || !acc.getSource().equals( Source.REFSEQ.toString() )
+ || !acc.getValue().equals( "NP_001025424" ) ) {
+ return false;
+ }
+ n.setName( "340 0559 -- _NP_001025424_dsfdg15 05" );
+ acc = SequenceDbWsTools.obtainSeqAccession( n );
+ if ( ( acc == null ) || !acc.getSource().equals( Source.REFSEQ.toString() )
+ || !acc.getValue().equals( "NP_001025424" ) ) {
+ return false;
+ }
+ n.setName( "NP_001025424.1" );
+ acc = SequenceDbWsTools.obtainSeqAccession( n );
+ if ( ( acc == null ) || !acc.getSource().equals( Source.REFSEQ.toString() )
+ || !acc.getValue().equals( "NP_001025424" ) ) {
+ return false;
+ }
+ n.setName( "NM_001030253" );
+ acc = SequenceDbWsTools.obtainSeqAccession( n );
+ if ( ( acc == null ) || !acc.getSource().equals( Source.REFSEQ.toString() )
+ || !acc.getValue().equals( "NM_001030253" ) ) {
+ return false;
+ }
+ n.setName( "BCL2_HUMAN" );
+ acc = SequenceDbWsTools.obtainSeqAccession( n );
+ if ( ( acc == null ) || !acc.getSource().equals( Source.UNIPROT.toString() )
+ || !acc.getValue().equals( "BCL2_HUMAN" ) ) {
+ System.out.println( acc.toString() );
+ return false;
+ }
+ n.setName( "P10415" );
+ acc = SequenceDbWsTools.obtainSeqAccession( n );
+ if ( ( acc == null ) || !acc.getSource().equals( Source.UNIPROT.toString() )
+ || !acc.getValue().equals( "P10415" ) ) {
+ System.out.println( acc.toString() );
+ return false;
+ }
+ n.setName( " P10415 " );
+ acc = SequenceDbWsTools.obtainSeqAccession( n );
+ if ( ( acc == null ) || !acc.getSource().equals( Source.UNIPROT.toString() )
+ || !acc.getValue().equals( "P10415" ) ) {
+ System.out.println( acc.toString() );
+ return false;
+ }
+ n.setName( "_P10415|" );
+ acc = SequenceDbWsTools.obtainSeqAccession( n );
+ if ( ( acc == null ) || !acc.getSource().equals( Source.UNIPROT.toString() )
+ || !acc.getValue().equals( "P10415" ) ) {
+ System.out.println( acc.toString() );
+ return false;
+ }
+ n.setName( "AY695820" );
+ acc = SequenceDbWsTools.obtainSeqAccession( n );
+ if ( ( acc == null ) || !acc.getSource().equals( Source.NCBI.toString() )
+ || !acc.getValue().equals( "AY695820" ) ) {
+ System.out.println( acc.toString() );
+ return false;
+ }
+ n.setName( "_AY695820_" );
+ acc = SequenceDbWsTools.obtainSeqAccession( n );
+ if ( ( acc == null ) || !acc.getSource().equals( Source.NCBI.toString() )
+ || !acc.getValue().equals( "AY695820" ) ) {
+ System.out.println( acc.toString() );
+ return false;
+ }
+ n.setName( "AAA59452" );
+ acc = SequenceDbWsTools.obtainSeqAccession( n );
+ if ( ( acc == null ) || !acc.getSource().equals( Source.NCBI.toString() )
+ || !acc.getValue().equals( "AAA59452" ) ) {
+ System.out.println( acc.toString() );
+ return false;
+ }
+ n.setName( "_AAA59452_" );
+ acc = SequenceDbWsTools.obtainSeqAccession( n );
+ if ( ( acc == null ) || !acc.getSource().equals( Source.NCBI.toString() )
+ || !acc.getValue().equals( "AAA59452" ) ) {
+ System.out.println( acc.toString() );
+ return false;
+ }
+ n.setName( "AAA59452.1" );
+ acc = SequenceDbWsTools.obtainSeqAccession( n );
+ if ( ( acc == null ) || !acc.getSource().equals( Source.NCBI.toString() )
+ || !acc.getValue().equals( "AAA59452.1" ) ) {
+ System.out.println( acc.toString() );
+ return false;
+ }
+ n.setName( "_AAA59452.1_" );
+ acc = SequenceDbWsTools.obtainSeqAccession( n );
+ if ( ( acc == null ) || !acc.getSource().equals( Source.NCBI.toString() )
+ || !acc.getValue().equals( "AAA59452.1" ) ) {
+ System.out.println( acc.toString() );
+ return false;
+ }
+ n.setName( "GI:94894583" );
+ acc = SequenceDbWsTools.obtainSeqAccession( n );
+ if ( ( acc == null ) || !acc.getSource().equals( Source.GI.toString() )
+ || !acc.getValue().equals( "94894583" ) ) {
+ System.out.println( acc.toString() );
+ return false;
+ }
+ }
+ catch ( final Exception e ) {
+ return false;
+ }
+ return true;
+ }
+
+ private static boolean testSequenceDbWsTools2() {
+ try {
+ final PhylogenyNode n1 = new PhylogenyNode( "NP_001025424" );
+ SequenceDbWsTools.obtainSeqInformation( n1 );
+ if ( !n1.getNodeData().getSequence().getName().equals( "Bcl2" ) ) {
+ return false;
+ }
+ if ( !n1.getNodeData().getTaxonomy().getScientificName().equals( "Danio rerio" ) ) {
+ return false;
+ }
+ if ( !n1.getNodeData().getSequence().getAccession().getSource().equals( Source.REFSEQ.toString() ) ) {
+ return false;
+ }
+ if ( !n1.getNodeData().getSequence().getAccession().getValue().equals( "NP_001025424" ) ) {
+ return false;
+ }
+ final PhylogenyNode n2 = new PhylogenyNode( "NM_001030253" );
+ SequenceDbWsTools.obtainSeqInformation( n2 );
+ System.out.println( n2.toString() );
+ if ( !n2.getNodeData().getSequence().getName()
+ .equals( "Danio rerio B-cell leukemia/lymphoma 2 (bcl2), mRNA" ) ) {
+ return false;
+ }
+ if ( !n2.getNodeData().getTaxonomy().getScientificName().equals( "Danio rerio" ) ) {
+ return false;
+ }
+ if ( !n2.getNodeData().getSequence().getAccession().getSource().equals( Source.REFSEQ.toString() ) ) {
+ return false;
+ }
+ if ( !n2.getNodeData().getSequence().getAccession().getValue().equals( "NM_001030253" ) ) {
+ return false;
+ }
+ final PhylogenyNode n3 = new PhylogenyNode( "NM_184234.2" );
+ SequenceDbWsTools.obtainSeqInformation( n3 );
+ System.out.println( "n=" + n3.toString() );
+ if ( !n3.getNodeData().getSequence().getName()
+ .equals( "Homo sapiens RNA binding motif protein 39 (RBM39), transcript variant 1, mRNA" ) ) {
+ return false;
+ }
+ if ( !n3.getNodeData().getTaxonomy().getScientificName().equals( "Homo sapiens" ) ) {
+ return false;
+ }
+ if ( !n3.getNodeData().getSequence().getAccession().getSource().equals( Source.REFSEQ.toString() ) ) {
+ return false;
+ }
+ if ( !n3.getNodeData().getSequence().getAccession().getValue().equals( "NM_184234" ) ) {
+ return false;
+ }
+ }
+ catch ( final IOException e ) {
+ System.out.println();
+ System.out.println( "the following might be due to absence internet connection:" );
+ e.printStackTrace( System.out );
+ return true;
+ }
+ catch ( final Exception e ) {
+ e.printStackTrace();
+ return false;
+ }
+ return true;
+ }
+
private static boolean testUniprotEntryRetrieval() {
try {
final SequenceDatabaseEntry entry = SequenceDbWsTools.obtainUniProtEntry( "P12345", 200 );