2 // FORESTER -- software libraries and applications
3 // for evolutionary biology research and applications.
5 // Copyright (C) 2008-2009 Christian M. Zmasek
6 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
23 // Contact: phylosoft @ gmail . com
24 // WWW: https://sites.google.com/site/cmzmasek/home/software/forester
26 package org.forester.go;
29 import java.util.ArrayList;
30 import java.util.List;
32 import java.util.SortedSet;
34 import org.forester.util.ForesterUtil;
38 private final static double ZERO_DIFF = 1.0E-9;
40 public static boolean isEqual( final double a, final double b ) {
41 return ( ( Math.abs( a - b ) ) < ZERO_DIFF );
44 public static boolean test( final File test_dir ) {
45 System.out.print( " GO ID: " );
47 System.out.println( "failed." );
50 System.out.println( "OK." );
51 System.out.print( " Namespace: " );
52 if ( !testNamespace() ) {
53 System.out.println( "failed." );
56 System.out.println( "OK." );
57 System.out.print( " Basic GO term: " );
58 if ( !testBasicGoTerm() ) {
59 System.out.println( "failed." );
62 System.out.println( "OK." );
63 System.out.print( " OBO parser: " );
64 if ( !testOBOparser( test_dir ) ) {
65 System.out.println( "failed." );
68 System.out.println( "OK." );
69 System.out.print( " Pfam to GO mapping: " );
70 if ( !testPfamToGoMapping() ) {
71 System.out.println( "failed." );
74 System.out.println( "OK." );
75 System.out.print( " Pfam to GO parser: " );
76 if ( !testPfamToGoParser( test_dir ) ) {
77 System.out.println( "failed." );
80 System.out.println( "OK." );
81 System.out.print( " Super terms: " );
82 if ( !testSuperTermGetting( test_dir ) ) {
83 System.out.println( "failed." );
86 System.out.println( "OK." );
87 System.out.print( " Super term counting: " );
88 if ( !testSuperTermCounting( test_dir ) ) {
89 System.out.println( "failed." );
92 System.out.println( "OK." );
96 private static boolean testBasicGoTerm() {
98 final GoTerm gt1 = new BasicGoTerm( "GO:0047579",
99 "4-hydroxymandelate oxidase activity",
100 "molecular_function",
102 final GoTerm gt2 = new BasicGoTerm( "GO:0047579",
103 "4-hydroxymandelate oxidase activity",
104 "molecular_function",
106 final GoTerm gt3 = new BasicGoTerm( "GO:0047579", "?", "molecular_function", true );
107 final GoTerm gt4 = new BasicGoTerm( "GO:0047579",
108 "4-hydroxymandelate oxidase activity",
109 "biological_process",
111 final GoTerm gt5 = new BasicGoTerm( "GO:0047578",
112 "4-hydroxymandelate oxidase activity",
113 "molecular_function",
115 if ( !gt1.equals( gt2 ) ) {
118 if ( !gt1.equals( gt3 ) ) {
121 if ( gt1.equals( gt4 ) ) {
124 if ( gt1.hashCode() != gt4.hashCode() ) {
127 if ( gt1.equals( gt5 ) ) {
130 final GoTerm gt6 = ( GoTerm ) gt5.copy();
131 if ( !gt6.equals( gt5 ) ) {
135 catch ( final Exception e ) {
136 e.printStackTrace( System.out );
142 private static boolean testGoId() {
144 final GoId id1 = new GoId( "GO:0042617" );
145 final GoId id2 = new GoId( "GO:0042630" );
146 final GoId id3 = new GoId( "GO:0042630" );
147 if ( id1.equals( id2 ) ) {
150 if ( !id2.equals( id3 ) ) {
153 if ( !id1.toString().equals( "GO:0042617" ) ) {
156 if ( id2.hashCode() != id3.hashCode() ) {
159 if ( id1.hashCode() == id2.hashCode() ) {
163 catch ( final Exception e ) {
164 e.printStackTrace( System.out );
170 private static boolean testNamespace() {
172 final GoNameSpace b = new GoNameSpace( "Biological_process" );
173 final GoNameSpace c = new GoNameSpace( "Cellular_Component" );
174 final GoNameSpace m = new GoNameSpace( "molecular_function" );
175 final GoNameSpace m2 = new GoNameSpace( GoNameSpace.GoNamespaceType.MOLECULAR_FUNCTION );
176 if ( b.equals( c ) ) {
179 if ( !m.equals( m2 ) ) {
182 if ( !b.toString().equals( "biological_process" ) ) {
185 if ( !c.toString().equals( "cellular_component" ) ) {
188 if ( !m.toString().equals( "molecular_function" ) ) {
192 catch ( final Exception e ) {
193 e.printStackTrace( System.out );
199 private static boolean testOBOparser( final File test_dir ) {
201 final OBOparser parser = new OBOparser( new File( test_dir + ForesterUtil.getFileSeparator() + "obo_test" ),
202 OBOparser.ReturnType.BASIC_GO_TERM );
203 final List<GoTerm> go_terms = parser.parse();
204 if ( parser.getGoTermCount() != 26 ) {
207 final GoTerm g0 = go_terms.get( 0 );
208 final GoTerm g1 = go_terms.get( 1 );
209 final GoTerm g3 = go_terms.get( 2 );
210 final GoTerm g2 = go_terms.get( 25 );
211 if ( !g0.getComment().equals( "" ) ) {
214 if ( !g0.getDefinition()
215 .equals( "\"The distribution of mitochondria, including the mitochondrial genome, into daughter cells after mitosis or meiosis, mediated by interactions between mitochondria and the cytoskeleton.\" [GOC:mcc, PMID:10873824, PMID:11389764]" ) ) {
218 if ( !g0.getGoId().getId().equals( "GO:0000001" ) ) {
221 if ( g0.getGoNameSpace().equals( GoNameSpace.GoNamespaceType.BIOLOGICAL_PROCESS ) ) {
224 if ( g0.getGoNameSpace().getType() != GoNameSpace.GoNamespaceType.BIOLOGICAL_PROCESS ) {
227 if ( g0.getGoRelationships().size() != 0 ) {
230 if ( g0.getGoXRefs().size() != 0 ) {
233 if ( !g0.getName().equals( "mitochondrion inheritance" ) ) {
236 if ( g0.getSuperGoIds().size() != 2 ) {
239 if ( !g0.isObsolete() ) {
242 if ( !g1.getComment().equals( "comment" ) ) {
245 if ( !g1.getDefinition()
246 .equals( "\"The maintenance of the structure and integrity of the mitochondrial genome.\" [GOC:ai]" ) ) {
249 if ( !g1.getGoId().getId().equals( "GO:0000002" ) ) {
252 if ( g1.getGoNameSpace().equals( GoNameSpace.GoNamespaceType.BIOLOGICAL_PROCESS ) ) {
255 if ( g1.getGoNameSpace().getType() != GoNameSpace.GoNamespaceType.BIOLOGICAL_PROCESS ) {
258 if ( g1.getGoRelationships().size() != 1 ) {
261 if ( g1.getGoXRefs().size() != 5 ) {
264 if ( !g1.getName().equals( "mitochondrial genome maintenance" ) ) {
267 if ( g1.getSuperGoIds().size() != 1 ) {
270 if ( g1.isObsolete() ) {
273 if ( !g1.getGoXRefs().get( 0 ).equals( new BasicGoXRef( "EC:2.4.1.-" ) ) ) {
276 if ( !g1.getGoXRefs().get( 0 ).getXRef().equals( "2.4.1.-" ) ) {
279 if ( g1.getGoXRefs().get( 0 ).getType() != GoXRef.Type.EC ) {
282 if ( g1.getGoXRefs().get( 0 ).equals( new BasicGoXRef( "EC:2.4.1.1" ) ) ) {
285 if ( g1.getGoXRefs().get( 0 ).equals( new BasicGoXRef( "Reactome:2.4.1.-" ) ) ) {
288 if ( !g1.getGoXRefs().get( 1 ).equals( new BasicGoXRef( "Reactome:7672" ) ) ) {
291 if ( !g1.getGoXRefs().get( 2 ).equals( new BasicGoXRef( "MetaCyc:SIROHEME-FERROCHELAT-RXN" ) ) ) {
294 if ( !g1.getGoXRefs().get( 3 ).equals( new BasicGoXRef( "RESID:AA02376" ) ) ) {
297 if ( !g1.getGoXRefs().get( 4 ).equals( new BasicGoXRef( "UM-BBD_enzymeID:e0271" ) ) ) {
300 if ( !g1.getGoRelationships().get( 0 ).equals( new BasicGoRelationship( "part_of GO:0007052" ) ) ) {
303 if ( !g1.getGoRelationships().get( 0 ).getGoId().equals( new GoId( "GO:0007052" ) ) ) {
306 if ( !g1.getGoRelationships().get( 0 ).getGoId().getId().equals( "GO:0007052" ) ) {
309 if ( g1.getGoRelationships().get( 0 ).getType() != GoRelationship.Type.PART_OF ) {
312 if ( g1.getGoRelationships().get( 0 ).equals( new BasicGoRelationship( "part_of GO:1007052" ) ) ) {
315 if ( !g1.getSuperGoIds().get( 0 ).equals( new GoId( "GO:0007005" ) ) ) {
318 if ( g1.getSuperGoIds().get( 0 ).equals( new GoId( "GO:1007005" ) ) ) {
321 if ( !g2.getGoId().getId().equals( "GO:0000030" ) ) {
324 if ( !g2.getGoId().equals( new GoId( "GO:0000030" ) ) ) {
327 if ( g2.getGoId().getId().equals( "GO:0000031" ) ) {
330 if ( g2.getGoId().equals( new GoId( "GO:0000031" ) ) ) {
333 if ( g3.getGoSubsets().size() != 3 ) {
336 if ( !g3.getGoSubsets().contains( new BasicGoSubset( "goslim_generic" ) ) ) {
339 if ( !g3.getGoSubsets().contains( new BasicGoSubset( "goslim_plant" ) ) ) {
342 if ( !g3.getGoSubsets().contains( new BasicGoSubset( "gosubset_prok" ) ) ) {
345 if ( g3.getGoSubsets().contains( new BasicGoSubset( "goslim_candida" ) ) ) {
349 catch ( final Exception e ) {
350 e.printStackTrace( System.out );
356 private static boolean testPfamToGoMapping() {
358 final PfamToGoMapping pg0 = new PfamToGoMapping( "A", new GoId( "GO:0000001" ) );
359 final PfamToGoMapping pg1 = new PfamToGoMapping( "A", new GoId( "GO:0000001" ) );
360 final PfamToGoMapping pg2 = new PfamToGoMapping( "B", new GoId( "GO:0000001" ) );
361 final PfamToGoMapping pg3 = new PfamToGoMapping( "A", new GoId( "GO:0000002" ) );
362 final PfamToGoMapping pg4 = new PfamToGoMapping( "B", new GoId( "GO:0000002" ) );
363 if ( !pg0.equals( pg0 ) ) {
366 if ( !pg0.equals( pg1 ) ) {
369 if ( pg0.equals( pg2 ) ) {
372 if ( pg0.equals( pg3 ) ) {
375 if ( pg0.equals( pg4 ) ) {
378 if ( pg0.compareTo( pg3 ) != 0 ) {
381 if ( pg0.compareTo( pg2 ) >= 0 ) {
384 if ( pg2.compareTo( pg0 ) <= 0 ) {
388 catch ( final Exception e ) {
389 e.printStackTrace( System.out );
395 private static boolean testPfamToGoParser( final File test_dir ) {
397 final PfamToGoParser parser = new PfamToGoParser( new File( test_dir + ForesterUtil.getFileSeparator()
398 + "pfam_to_go_test" ) );
399 final List<PfamToGoMapping> mappings = parser.parse();
400 if ( parser.getMappingCount() != 426 ) {
403 if ( mappings.size() != 426 ) {
406 final PfamToGoMapping m0 = mappings.get( 0 );
407 final PfamToGoMapping m1 = mappings.get( 1 );
408 final PfamToGoMapping m2 = mappings.get( 2 );
409 final PfamToGoMapping m3 = mappings.get( 3 );
410 final PfamToGoMapping m4 = mappings.get( 4 );
411 final PfamToGoMapping m5 = mappings.get( 5 );
412 final PfamToGoMapping m424 = mappings.get( 424 );
413 final PfamToGoMapping m425 = mappings.get( 425 );
414 if ( !m0.getKey().equals( "7tm_1" ) ) {
417 if ( !m0.getValue().equals( new GoId( "GO:0001584" ) ) ) {
420 if ( m0.getKey().equals( "7tm_x" ) ) {
423 if ( m0.getValue().equals( new GoId( "GO:0001585" ) ) ) {
426 if ( !m1.getKey().equals( "7tm_1" ) ) {
429 if ( !m1.getValue().equals( new GoId( "GO:0007186" ) ) ) {
432 if ( !m2.getKey().equals( "7tm_1" ) ) {
435 if ( !m2.getValue().equals( new GoId( "GO:0016021" ) ) ) {
438 if ( !m3.getKey().equals( "7tm_2" ) ) {
441 if ( !m3.getValue().equals( new GoId( "GO:0004930" ) ) ) {
444 if ( !m4.getKey().equals( "7tm_2" ) ) {
447 if ( !m4.getValue().equals( new GoId( "GO:0016020" ) ) ) {
450 if ( !m5.getKey().equals( "7tm_3" ) ) {
453 if ( !m5.getValue().equals( new GoId( "GO:0008067" ) ) ) {
456 if ( !m424.getKey().equals( "OMPdecase" ) ) {
459 if ( !m424.getValue().equals( new GoId( "GO:0006207" ) ) ) {
462 if ( !m425.getKey().equals( "Bac_DNA_binding" ) ) {
465 if ( !m425.getValue().equals( new GoId( "GO:0003677" ) ) ) {
469 catch ( final Exception e ) {
470 e.printStackTrace( System.out );
476 private static boolean testSuperTermCounting( final File test_dir ) {
478 final OBOparser parser = new OBOparser( new File( test_dir + ForesterUtil.getFileSeparator()
479 + "gene_ontology_edit.obo" ), OBOparser.ReturnType.BASIC_GO_TERM );
480 final List<GoTerm> all_go_terms = parser.parse();
481 if ( parser.getGoTermCount() != 27748 ) {
484 final Map<GoId, GoTerm> goid_to_term_map = GoUtils.createGoIdToGoTermMap( all_go_terms );
485 final List<GoTerm> categories = new ArrayList<GoTerm>();
486 final List<GoTerm> experiment_set = new ArrayList<GoTerm>();
487 experiment_set.add( new BasicGoTerm( new GoId( "GO:0005690" ), "snRNP U4atac", GoNameSpace
488 .createUnassigned(), false ) );
489 experiment_set.add( new BasicGoTerm( new GoId( "GO:0009698" ),
490 "phenylpropanoid metabolic process",
491 GoNameSpace.createUnassigned(),
493 experiment_set.add( new BasicGoTerm( new GoId( "GO:0008150" ), "biological_process", GoNameSpace
494 .createUnassigned(), false ) );
495 experiment_set.add( new BasicGoTerm( new GoId( "GO:0006915" ),
497 GoNameSpace.createUnassigned(),
499 experiment_set.add( new BasicGoTerm( new GoId( "GO:0001783" ), "B cell apoptosis", GoNameSpace
500 .createUnassigned(), false ) );
501 experiment_set.add( new BasicGoTerm( new GoId( "GO:0010657" ), "muscle cell apoptosis", GoNameSpace
502 .createUnassigned(), false ) );
503 experiment_set.add( new BasicGoTerm( new GoId( "GO:0010657" ), "muscle cell apoptosis", GoNameSpace
504 .createUnassigned(), false ) );
505 experiment_set.add( new BasicGoTerm( new GoId( "GO:0010658" ),
506 "striated muscle cell apoptosis",
507 GoNameSpace.createUnassigned(),
509 experiment_set.add( new BasicGoTerm( new GoId( "GO:0043065" ),
510 "positive regulation of apoptosis",
511 GoNameSpace.createUnassigned(),
514 .add( new BasicGoTerm( new GoId( "GO:0016265" ), "death", GoNameSpace.createUnassigned(), false ) );
515 categories.add( new BasicGoTerm( new GoId( "GO:0006915" ),
517 GoNameSpace.createUnassigned(),
519 categories.add( new BasicGoTerm( new GoId( "GO:0008150" ), "biological_process", GoNameSpace
520 .createUnassigned(), false ) );
521 categories.add( new BasicGoTerm( new GoId( "GO:0010657" ), "muscle cell apoptosis", GoNameSpace
522 .createUnassigned(), false ) );
523 categories.add( new BasicGoTerm( new GoId( "GO:0010658" ), "striated muscle cell apoptosis", GoNameSpace
524 .createUnassigned(), false ) );
525 categories.add( new BasicGoTerm( new GoId( "GO:0046242" ), "o-xylene biosynthetic process", GoNameSpace
526 .createUnassigned(), false ) );
527 categories.add( new BasicGoTerm( new GoId( "GO:0016326" ), "kinesin motor activity", GoNameSpace
528 .createUnassigned(), false ) );
529 categories.add( new BasicGoTerm( new GoId( "GO:0005575" ), "cellular_component", GoNameSpace
530 .createUnassigned(), false ) );
531 categories.add( new BasicGoTerm( new GoId( "GO:0032502" ), "developmental process", GoNameSpace
532 .createUnassigned(), false ) );
533 categories.add( new BasicGoTerm( new GoId( "GO:0051094" ),
534 "positive regulation of developmental process",
535 GoNameSpace.createUnassigned(),
537 categories.add( new BasicGoTerm( new GoId( "GO:0048522" ),
538 "positive regulation of cellular process",
539 GoNameSpace.createUnassigned(),
541 final Map<GoId, Integer> counts = GoUtils.countCategories( categories, experiment_set, goid_to_term_map );
543 if ( counts.get( new GoId( "GO:0016265" ) ) != 5 ) {
547 if ( counts.get( new GoId( "GO:0006915" ) ) != 5 ) {
550 // biological_process
551 if ( counts.get( new GoId( "GO:0008150" ) ) != 8 ) {
554 // muscle cell apoptosis
555 if ( counts.get( new GoId( "GO:0010657" ) ) != 3 ) {
558 // striated muscle cell apoptosis
559 if ( counts.get( new GoId( "GO:0010658" ) ) != 1 ) {
562 // o-xylene biosynthetic process
563 if ( counts.get( new GoId( "GO:0046242" ) ) != 0 ) {
566 // kinesin motor activity
567 if ( counts.get( new GoId( "GO:0016326" ) ) != 0 ) {
570 // cellular_component
571 if ( counts.get( new GoId( "GO:0005575" ) ) != 1 ) {
574 // developmental process
575 if ( counts.get( new GoId( "GO:0032502" ) ) != 5 ) {
578 // positive regulation of developmental process
579 if ( counts.get( new GoId( "GO:0051094" ) ) != 1 ) {
582 // positive regulation of cellular process
583 if ( counts.get( new GoId( "GO:0048522" ) ) != 1 ) {
586 final List<GoId> categories_id = new ArrayList<GoId>();
587 final List<GoId> experiment_set_id = new ArrayList<GoId>();
588 experiment_set_id.add( new GoId( "GO:0005690" ) );
589 experiment_set_id.add( new GoId( "GO:0009698" ) );
590 experiment_set_id.add( new GoId( "GO:0008150" ) );
591 experiment_set_id.add( new GoId( "GO:0006915" ) );
592 experiment_set_id.add( new GoId( "GO:0001783" ) );
593 experiment_set_id.add( new GoId( "GO:0010657" ) );
594 experiment_set_id.add( new GoId( "GO:0010657" ) );
595 experiment_set_id.add( new GoId( "GO:0010658" ) );
596 categories_id.add( new GoId( "GO:0016265" ) );
597 categories_id.add( new GoId( "GO:0006915" ) );
598 categories_id.add( new GoId( "GO:0008150" ) );
599 categories_id.add( new GoId( "GO:0010657" ) );
600 categories_id.add( new GoId( "GO:0010658" ) );
601 categories_id.add( new GoId( "GO:0046242" ) );
602 categories_id.add( new GoId( "GO:0016326" ) );
603 categories_id.add( new GoId( "GO:0005575" ) );
604 final Map<GoId, Integer> counts_id = GoUtils.countCategoriesId( categories_id,
608 if ( counts_id.get( new GoId( "GO:0016265" ) ) != 5 ) {
612 if ( counts_id.get( new GoId( "GO:0006915" ) ) != 5 ) {
615 // biological_process
616 if ( counts_id.get( new GoId( "GO:0008150" ) ) != 7 ) {
619 // muscle cell apoptosis
620 if ( counts_id.get( new GoId( "GO:0010657" ) ) != 3 ) {
623 // striated muscle cell apoptosis
624 if ( counts_id.get( new GoId( "GO:0010658" ) ) != 1 ) {
627 // o-xylene biosynthetic process
628 if ( counts_id.get( new GoId( "GO:0046242" ) ) != 0 ) {
631 // kinesin motor activity
632 if ( counts_id.get( new GoId( "GO:0016326" ) ) != 0 ) {
636 if ( counts_id.get( new GoId( "GO:0005575" ) ) != 1 ) {
640 catch ( final Exception e ) {
641 e.printStackTrace( System.out );
647 private static boolean testSuperTermGetting( final File test_dir ) {
649 final OBOparser parser = new OBOparser( new File( test_dir + ForesterUtil.getFileSeparator()
650 + "gene_ontology_edit.obo" ), OBOparser.ReturnType.BASIC_GO_TERM );
651 final List<GoTerm> go_terms = parser.parse();
652 if ( parser.getGoTermCount() != 27748 ) {
655 final Map<GoId, GoTerm> goid_to_term_map = GoUtils.createGoIdToGoTermMap( go_terms );
656 final SortedSet<GoTerm> b_cell_selection = GoUtils.getAllSuperGoTerms( new GoId( "GO:0002339" ),
658 if ( b_cell_selection.size() != 2 ) {
661 if ( !b_cell_selection.contains( new BasicGoTerm( new GoId( "GO:0002376" ),
662 "immune system process",
663 GoNameSpace.createBiologicalProcess(),
667 if ( !b_cell_selection.contains( new BasicGoTerm( new GoId( "GO:0008150" ),
668 "biological process",
669 GoNameSpace.createBiologicalProcess(),
673 final SortedSet<GoTerm> b_cell_differentation = GoUtils.getAllSuperGoTerms( new GoId( "GO:0030183" ),
675 if ( b_cell_differentation.size() != 12 ) {
678 final SortedSet<GoTerm> biological_process = GoUtils.getAllSuperGoTerms( new GoId( "GO:0008150" ),
680 if ( biological_process.size() != 0 ) {
683 final SortedSet<GoTerm> protein_aa_phosphorylation = GoUtils.getAllSuperGoTerms( new GoId( "GO:0006468" ),
685 if ( protein_aa_phosphorylation.size() != 16 ) {
689 catch ( final Exception e ) {
690 e.printStackTrace( System.out );