2 // FORESTER -- software libraries and applications
3 // for evolutionary biology research and applications.
5 // Copyright (C) 2008-2009 Christian M. Zmasek
6 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
23 // Contact: phylosoft @ gmail . com
24 // WWW: www.phylosoft.org/forester
26 package org.forester.go;
29 import java.util.ArrayList;
30 import java.util.List;
32 import java.util.SortedSet;
34 import org.forester.surfacing.DomainId;
35 import org.forester.util.ForesterUtil;
39 private final static double ZERO_DIFF = 1.0E-9;
41 public static boolean isEqual( final double a, final double b ) {
42 return ( ( Math.abs( a - b ) ) < ZERO_DIFF );
45 public static boolean test( final File test_dir ) {
46 System.out.print( " GO ID: " );
48 System.out.println( "failed." );
51 System.out.println( "OK." );
52 System.out.print( " Namespace: " );
53 if ( !testNamespace() ) {
54 System.out.println( "failed." );
57 System.out.println( "OK." );
58 System.out.print( " Basic GO term: " );
59 if ( !testBasicGoTerm() ) {
60 System.out.println( "failed." );
63 System.out.println( "OK." );
64 System.out.print( " OBO parser: " );
65 if ( !testOBOparser( test_dir ) ) {
66 System.out.println( "failed." );
69 System.out.println( "OK." );
70 System.out.print( " Pfam to GO mapping: " );
71 if ( !testPfamToGoMapping() ) {
72 System.out.println( "failed." );
75 System.out.println( "OK." );
76 System.out.print( " Pfam to GO parser: " );
77 if ( !testPfamToGoParser( test_dir ) ) {
78 System.out.println( "failed." );
81 System.out.println( "OK." );
82 System.out.print( " Super terms: " );
83 if ( !testSuperTermGetting( test_dir ) ) {
84 System.out.println( "failed." );
87 System.out.println( "OK." );
88 System.out.print( " Super term counting: " );
89 if ( !testSuperTermCounting( test_dir ) ) {
90 System.out.println( "failed." );
93 System.out.println( "OK." );
97 private static boolean testBasicGoTerm() {
99 final GoTerm gt1 = new BasicGoTerm( "GO:0047579",
100 "4-hydroxymandelate oxidase activity",
101 "molecular_function",
103 final GoTerm gt2 = new BasicGoTerm( "GO:0047579",
104 "4-hydroxymandelate oxidase activity",
105 "molecular_function",
107 final GoTerm gt3 = new BasicGoTerm( "GO:0047579", "?", "molecular_function", true );
108 final GoTerm gt4 = new BasicGoTerm( "GO:0047579",
109 "4-hydroxymandelate oxidase activity",
110 "biological_process",
112 final GoTerm gt5 = new BasicGoTerm( "GO:0047578",
113 "4-hydroxymandelate oxidase activity",
114 "molecular_function",
116 if ( !gt1.equals( gt2 ) ) {
119 if ( !gt1.equals( gt3 ) ) {
122 if ( gt1.equals( gt4 ) ) {
125 if ( gt1.hashCode() != gt4.hashCode() ) {
128 if ( gt1.equals( gt5 ) ) {
131 final GoTerm gt6 = ( GoTerm ) gt5.copy();
132 if ( !gt6.equals( gt5 ) ) {
136 catch ( final Exception e ) {
137 e.printStackTrace( System.out );
143 private static boolean testGoId() {
145 final GoId id1 = new GoId( "GO:0042617" );
146 final GoId id2 = new GoId( "GO:0042630" );
147 final GoId id3 = new GoId( "GO:0042630" );
148 if ( id1.equals( id2 ) ) {
151 if ( !id2.equals( id3 ) ) {
154 if ( !id1.toString().equals( "GO:0042617" ) ) {
157 if ( id2.hashCode() != id3.hashCode() ) {
160 if ( id1.hashCode() == id2.hashCode() ) {
164 catch ( final Exception e ) {
165 e.printStackTrace( System.out );
171 private static boolean testNamespace() {
173 final GoNameSpace b = new GoNameSpace( "Biological_process" );
174 final GoNameSpace c = new GoNameSpace( "Cellular_Component" );
175 final GoNameSpace m = new GoNameSpace( "molecular_function" );
176 final GoNameSpace m2 = new GoNameSpace( GoNameSpace.GoNamespaceType.MOLECULAR_FUNCTION );
177 if ( b.equals( c ) ) {
180 if ( !m.equals( m2 ) ) {
183 if ( !b.toString().equals( "biological_process" ) ) {
186 if ( !c.toString().equals( "cellular_component" ) ) {
189 if ( !m.toString().equals( "molecular_function" ) ) {
193 catch ( final Exception e ) {
194 e.printStackTrace( System.out );
200 private static boolean testOBOparser( final File test_dir ) {
202 final OBOparser parser = new OBOparser( new File( test_dir + ForesterUtil.getFileSeparator() + "obo_test" ),
203 OBOparser.ReturnType.BASIC_GO_TERM );
204 final List<GoTerm> go_terms = parser.parse();
205 if ( parser.getGoTermCount() != 26 ) {
208 final GoTerm g0 = go_terms.get( 0 );
209 final GoTerm g1 = go_terms.get( 1 );
210 final GoTerm g3 = go_terms.get( 2 );
211 final GoTerm g2 = go_terms.get( 25 );
212 if ( !g0.getComment().equals( "" ) ) {
217 .equals( "\"The distribution of mitochondria, including the mitochondrial genome, into daughter cells after mitosis or meiosis, mediated by interactions between mitochondria and the cytoskeleton.\" [GOC:mcc, PMID:10873824, PMID:11389764]" ) ) {
220 if ( !g0.getGoId().getId().equals( "GO:0000001" ) ) {
223 if ( g0.getGoNameSpace().equals( GoNameSpace.GoNamespaceType.BIOLOGICAL_PROCESS ) ) {
226 if ( g0.getGoNameSpace().getType() != GoNameSpace.GoNamespaceType.BIOLOGICAL_PROCESS ) {
229 if ( g0.getGoRelationships().size() != 0 ) {
232 if ( g0.getGoXRefs().size() != 0 ) {
235 if ( !g0.getName().equals( "mitochondrion inheritance" ) ) {
238 if ( g0.getSuperGoIds().size() != 2 ) {
241 if ( !g0.isObsolete() ) {
244 if ( !g1.getComment().equals( "comment" ) ) {
249 .equals( "\"The maintenance of the structure and integrity of the mitochondrial genome.\" [GOC:ai]" ) ) {
252 if ( !g1.getGoId().getId().equals( "GO:0000002" ) ) {
255 if ( g1.getGoNameSpace().equals( GoNameSpace.GoNamespaceType.BIOLOGICAL_PROCESS ) ) {
258 if ( g1.getGoNameSpace().getType() != GoNameSpace.GoNamespaceType.BIOLOGICAL_PROCESS ) {
261 if ( g1.getGoRelationships().size() != 1 ) {
264 if ( g1.getGoXRefs().size() != 5 ) {
267 if ( !g1.getName().equals( "mitochondrial genome maintenance" ) ) {
270 if ( g1.getSuperGoIds().size() != 1 ) {
273 if ( g1.isObsolete() ) {
276 if ( !g1.getGoXRefs().get( 0 ).equals( new BasicGoXRef( "EC:2.4.1.-" ) ) ) {
279 if ( !g1.getGoXRefs().get( 0 ).getXRef().equals( "2.4.1.-" ) ) {
282 if ( g1.getGoXRefs().get( 0 ).getType() != GoXRef.Type.EC ) {
285 if ( g1.getGoXRefs().get( 0 ).equals( new BasicGoXRef( "EC:2.4.1.1" ) ) ) {
288 if ( g1.getGoXRefs().get( 0 ).equals( new BasicGoXRef( "Reactome:2.4.1.-" ) ) ) {
291 if ( !g1.getGoXRefs().get( 1 ).equals( new BasicGoXRef( "Reactome:7672" ) ) ) {
294 if ( !g1.getGoXRefs().get( 2 ).equals( new BasicGoXRef( "MetaCyc:SIROHEME-FERROCHELAT-RXN" ) ) ) {
297 if ( !g1.getGoXRefs().get( 3 ).equals( new BasicGoXRef( "RESID:AA02376" ) ) ) {
300 if ( !g1.getGoXRefs().get( 4 ).equals( new BasicGoXRef( "UM-BBD_enzymeID:e0271" ) ) ) {
303 if ( !g1.getGoRelationships().get( 0 ).equals( new BasicGoRelationship( "part_of GO:0007052" ) ) ) {
306 if ( !g1.getGoRelationships().get( 0 ).getGoId().equals( new GoId( "GO:0007052" ) ) ) {
309 if ( !g1.getGoRelationships().get( 0 ).getGoId().getId().equals( "GO:0007052" ) ) {
312 if ( g1.getGoRelationships().get( 0 ).getType() != GoRelationship.Type.PART_OF ) {
315 if ( g1.getGoRelationships().get( 0 ).equals( new BasicGoRelationship( "part_of GO:1007052" ) ) ) {
318 if ( !g1.getSuperGoIds().get( 0 ).equals( new GoId( "GO:0007005" ) ) ) {
321 if ( g1.getSuperGoIds().get( 0 ).equals( new GoId( "GO:1007005" ) ) ) {
324 if ( !g2.getGoId().getId().equals( "GO:0000030" ) ) {
327 if ( !g2.getGoId().equals( new GoId( "GO:0000030" ) ) ) {
330 if ( g2.getGoId().getId().equals( "GO:0000031" ) ) {
333 if ( g2.getGoId().equals( new GoId( "GO:0000031" ) ) ) {
336 if ( g3.getGoSubsets().size() != 3 ) {
339 if ( !g3.getGoSubsets().contains( new BasicGoSubset( "goslim_generic" ) ) ) {
342 if ( !g3.getGoSubsets().contains( new BasicGoSubset( "goslim_plant" ) ) ) {
345 if ( !g3.getGoSubsets().contains( new BasicGoSubset( "gosubset_prok" ) ) ) {
348 if ( g3.getGoSubsets().contains( new BasicGoSubset( "goslim_candida" ) ) ) {
352 catch ( final Exception e ) {
353 e.printStackTrace( System.out );
359 private static boolean testPfamToGoMapping() {
361 final PfamToGoMapping pg0 = new PfamToGoMapping( new DomainId( "A" ), new GoId( "GO:0000001" ) );
362 final PfamToGoMapping pg1 = new PfamToGoMapping( new DomainId( "A" ), new GoId( "GO:0000001" ) );
363 final PfamToGoMapping pg2 = new PfamToGoMapping( new DomainId( "B" ), new GoId( "GO:0000001" ) );
364 final PfamToGoMapping pg3 = new PfamToGoMapping( new DomainId( "A" ), new GoId( "GO:0000002" ) );
365 final PfamToGoMapping pg4 = new PfamToGoMapping( new DomainId( "B" ), new GoId( "GO:0000002" ) );
366 if ( !pg0.equals( pg0 ) ) {
369 if ( !pg0.equals( pg1 ) ) {
372 if ( pg0.equals( pg2 ) ) {
375 if ( pg0.equals( pg3 ) ) {
378 if ( pg0.equals( pg4 ) ) {
381 if ( pg0.compareTo( pg3 ) != 0 ) {
384 if ( pg0.compareTo( pg2 ) >= 0 ) {
387 if ( pg2.compareTo( pg0 ) <= 0 ) {
391 catch ( final Exception e ) {
392 e.printStackTrace( System.out );
398 private static boolean testPfamToGoParser( final File test_dir ) {
400 final PfamToGoParser parser = new PfamToGoParser( new File( test_dir + ForesterUtil.getFileSeparator()
401 + "pfam_to_go_test" ) );
402 final List<PfamToGoMapping> mappings = parser.parse();
403 if ( parser.getMappingCount() != 426 ) {
406 if ( mappings.size() != 426 ) {
409 final PfamToGoMapping m0 = mappings.get( 0 );
410 final PfamToGoMapping m1 = mappings.get( 1 );
411 final PfamToGoMapping m2 = mappings.get( 2 );
412 final PfamToGoMapping m3 = mappings.get( 3 );
413 final PfamToGoMapping m4 = mappings.get( 4 );
414 final PfamToGoMapping m5 = mappings.get( 5 );
415 final PfamToGoMapping m424 = mappings.get( 424 );
416 final PfamToGoMapping m425 = mappings.get( 425 );
417 if ( !m0.getKey().equals( new DomainId( "7tm_1" ) ) ) {
420 if ( !m0.getValue().equals( new GoId( "GO:0001584" ) ) ) {
423 if ( m0.getKey().equals( new DomainId( "7tm_x" ) ) ) {
426 if ( m0.getValue().equals( new GoId( "GO:0001585" ) ) ) {
429 if ( !m1.getKey().equals( new DomainId( "7tm_1" ) ) ) {
432 if ( !m1.getValue().equals( new GoId( "GO:0007186" ) ) ) {
435 if ( !m2.getKey().equals( new DomainId( "7tm_1" ) ) ) {
438 if ( !m2.getValue().equals( new GoId( "GO:0016021" ) ) ) {
441 if ( !m3.getKey().equals( new DomainId( "7tm_2" ) ) ) {
444 if ( !m3.getValue().equals( new GoId( "GO:0004930" ) ) ) {
447 if ( !m4.getKey().equals( new DomainId( "7tm_2" ) ) ) {
450 if ( !m4.getValue().equals( new GoId( "GO:0016020" ) ) ) {
453 if ( !m5.getKey().equals( new DomainId( "7tm_3" ) ) ) {
456 if ( !m5.getValue().equals( new GoId( "GO:0008067" ) ) ) {
459 if ( !m424.getKey().equals( new DomainId( "OMPdecase" ) ) ) {
462 if ( !m424.getValue().equals( new GoId( "GO:0006207" ) ) ) {
465 if ( !m425.getKey().equals( new DomainId( "Bac_DNA_binding" ) ) ) {
468 if ( !m425.getValue().equals( new GoId( "GO:0003677" ) ) ) {
472 catch ( final Exception e ) {
473 e.printStackTrace( System.out );
479 private static boolean testSuperTermCounting( final File test_dir ) {
481 final OBOparser parser = new OBOparser( new File( test_dir + ForesterUtil.getFileSeparator()
482 + "gene_ontology_edit.obo" ), OBOparser.ReturnType.BASIC_GO_TERM );
483 final List<GoTerm> all_go_terms = parser.parse();
484 if ( parser.getGoTermCount() != 27748 ) {
487 final Map<GoId, GoTerm> goid_to_term_map = GoUtils.createGoIdToGoTermMap( all_go_terms );
488 final List<GoTerm> categories = new ArrayList<GoTerm>();
489 final List<GoTerm> experiment_set = new ArrayList<GoTerm>();
490 experiment_set.add( new BasicGoTerm( new GoId( "GO:0005690" ), "snRNP U4atac", GoNameSpace
491 .createUnassigned(), false ) );
492 experiment_set.add( new BasicGoTerm( new GoId( "GO:0009698" ),
493 "phenylpropanoid metabolic process",
494 GoNameSpace.createUnassigned(),
496 experiment_set.add( new BasicGoTerm( new GoId( "GO:0008150" ), "biological_process", GoNameSpace
497 .createUnassigned(), false ) );
498 experiment_set.add( new BasicGoTerm( new GoId( "GO:0006915" ),
500 GoNameSpace.createUnassigned(),
502 experiment_set.add( new BasicGoTerm( new GoId( "GO:0001783" ), "B cell apoptosis", GoNameSpace
503 .createUnassigned(), false ) );
504 experiment_set.add( new BasicGoTerm( new GoId( "GO:0010657" ), "muscle cell apoptosis", GoNameSpace
505 .createUnassigned(), false ) );
506 experiment_set.add( new BasicGoTerm( new GoId( "GO:0010657" ), "muscle cell apoptosis", GoNameSpace
507 .createUnassigned(), false ) );
508 experiment_set.add( new BasicGoTerm( new GoId( "GO:0010658" ),
509 "striated muscle cell apoptosis",
510 GoNameSpace.createUnassigned(),
512 experiment_set.add( new BasicGoTerm( new GoId( "GO:0043065" ),
513 "positive regulation of apoptosis",
514 GoNameSpace.createUnassigned(),
517 .add( new BasicGoTerm( new GoId( "GO:0016265" ), "death", GoNameSpace.createUnassigned(), false ) );
518 categories.add( new BasicGoTerm( new GoId( "GO:0006915" ),
520 GoNameSpace.createUnassigned(),
522 categories.add( new BasicGoTerm( new GoId( "GO:0008150" ), "biological_process", GoNameSpace
523 .createUnassigned(), false ) );
524 categories.add( new BasicGoTerm( new GoId( "GO:0010657" ), "muscle cell apoptosis", GoNameSpace
525 .createUnassigned(), false ) );
526 categories.add( new BasicGoTerm( new GoId( "GO:0010658" ), "striated muscle cell apoptosis", GoNameSpace
527 .createUnassigned(), false ) );
528 categories.add( new BasicGoTerm( new GoId( "GO:0046242" ), "o-xylene biosynthetic process", GoNameSpace
529 .createUnassigned(), false ) );
530 categories.add( new BasicGoTerm( new GoId( "GO:0016326" ), "kinesin motor activity", GoNameSpace
531 .createUnassigned(), false ) );
532 categories.add( new BasicGoTerm( new GoId( "GO:0005575" ), "cellular_component", GoNameSpace
533 .createUnassigned(), false ) );
534 categories.add( new BasicGoTerm( new GoId( "GO:0032502" ), "developmental process", GoNameSpace
535 .createUnassigned(), false ) );
536 categories.add( new BasicGoTerm( new GoId( "GO:0051094" ),
537 "positive regulation of developmental process",
538 GoNameSpace.createUnassigned(),
540 categories.add( new BasicGoTerm( new GoId( "GO:0048522" ),
541 "positive regulation of cellular process",
542 GoNameSpace.createUnassigned(),
544 final Map<GoId, Integer> counts = GoUtils.countCategories( categories, experiment_set, goid_to_term_map );
546 if ( counts.get( new GoId( "GO:0016265" ) ) != 5 ) {
550 if ( counts.get( new GoId( "GO:0006915" ) ) != 5 ) {
553 // biological_process
554 if ( counts.get( new GoId( "GO:0008150" ) ) != 8 ) {
557 // muscle cell apoptosis
558 if ( counts.get( new GoId( "GO:0010657" ) ) != 3 ) {
561 // striated muscle cell apoptosis
562 if ( counts.get( new GoId( "GO:0010658" ) ) != 1 ) {
565 // o-xylene biosynthetic process
566 if ( counts.get( new GoId( "GO:0046242" ) ) != 0 ) {
569 // kinesin motor activity
570 if ( counts.get( new GoId( "GO:0016326" ) ) != 0 ) {
573 // cellular_component
574 if ( counts.get( new GoId( "GO:0005575" ) ) != 1 ) {
577 // developmental process
578 if ( counts.get( new GoId( "GO:0032502" ) ) != 5 ) {
581 // positive regulation of developmental process
582 if ( counts.get( new GoId( "GO:0051094" ) ) != 1 ) {
585 // positive regulation of cellular process
586 if ( counts.get( new GoId( "GO:0048522" ) ) != 1 ) {
589 final List<GoId> categories_id = new ArrayList<GoId>();
590 final List<GoId> experiment_set_id = new ArrayList<GoId>();
591 experiment_set_id.add( new GoId( "GO:0005690" ) );
592 experiment_set_id.add( new GoId( "GO:0009698" ) );
593 experiment_set_id.add( new GoId( "GO:0008150" ) );
594 experiment_set_id.add( new GoId( "GO:0006915" ) );
595 experiment_set_id.add( new GoId( "GO:0001783" ) );
596 experiment_set_id.add( new GoId( "GO:0010657" ) );
597 experiment_set_id.add( new GoId( "GO:0010657" ) );
598 experiment_set_id.add( new GoId( "GO:0010658" ) );
599 categories_id.add( new GoId( "GO:0016265" ) );
600 categories_id.add( new GoId( "GO:0006915" ) );
601 categories_id.add( new GoId( "GO:0008150" ) );
602 categories_id.add( new GoId( "GO:0010657" ) );
603 categories_id.add( new GoId( "GO:0010658" ) );
604 categories_id.add( new GoId( "GO:0046242" ) );
605 categories_id.add( new GoId( "GO:0016326" ) );
606 categories_id.add( new GoId( "GO:0005575" ) );
607 final Map<GoId, Integer> counts_id = GoUtils.countCategoriesId( categories_id,
611 if ( counts_id.get( new GoId( "GO:0016265" ) ) != 5 ) {
615 if ( counts_id.get( new GoId( "GO:0006915" ) ) != 5 ) {
618 // biological_process
619 if ( counts_id.get( new GoId( "GO:0008150" ) ) != 7 ) {
622 // muscle cell apoptosis
623 if ( counts_id.get( new GoId( "GO:0010657" ) ) != 3 ) {
626 // striated muscle cell apoptosis
627 if ( counts_id.get( new GoId( "GO:0010658" ) ) != 1 ) {
630 // o-xylene biosynthetic process
631 if ( counts_id.get( new GoId( "GO:0046242" ) ) != 0 ) {
634 // kinesin motor activity
635 if ( counts_id.get( new GoId( "GO:0016326" ) ) != 0 ) {
639 if ( counts_id.get( new GoId( "GO:0005575" ) ) != 1 ) {
643 catch ( final Exception e ) {
644 e.printStackTrace( System.out );
650 private static boolean testSuperTermGetting( final File test_dir ) {
652 final OBOparser parser = new OBOparser( new File( test_dir + ForesterUtil.getFileSeparator()
653 + "gene_ontology_edit.obo" ), OBOparser.ReturnType.BASIC_GO_TERM );
654 final List<GoTerm> go_terms = parser.parse();
655 if ( parser.getGoTermCount() != 27748 ) {
658 final Map<GoId, GoTerm> goid_to_term_map = GoUtils.createGoIdToGoTermMap( go_terms );
659 final SortedSet<GoTerm> b_cell_selection = GoUtils.getAllSuperGoTerms( new GoId( "GO:0002339" ),
661 if ( b_cell_selection.size() != 2 ) {
664 if ( !b_cell_selection.contains( new BasicGoTerm( new GoId( "GO:0002376" ),
665 "immune system process",
666 GoNameSpace.createBiologicalProcess(),
670 if ( !b_cell_selection.contains( new BasicGoTerm( new GoId( "GO:0008150" ),
671 "biological process",
672 GoNameSpace.createBiologicalProcess(),
676 final SortedSet<GoTerm> b_cell_differentation = GoUtils.getAllSuperGoTerms( new GoId( "GO:0030183" ),
678 if ( b_cell_differentation.size() != 12 ) {
681 final SortedSet<GoTerm> biological_process = GoUtils.getAllSuperGoTerms( new GoId( "GO:0008150" ),
683 if ( biological_process.size() != 0 ) {
686 final SortedSet<GoTerm> protein_aa_phosphorylation = GoUtils.getAllSuperGoTerms( new GoId( "GO:0006468" ),
688 if ( protein_aa_phosphorylation.size() != 16 ) {
692 catch ( final Exception e ) {
693 e.printStackTrace( System.out );