2 // FORESTER -- software libraries and applications
3 // for evolutionary biology research and applications.
5 // Copyright (C) 2008-2009 Christian M. Zmasek
6 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
23 // Contact: phylosoft @ gmail . com
24 // WWW: https://sites.google.com/site/cmzmasek/home/software/forester
26 package org.forester.go;
29 import java.util.ArrayList;
30 import java.util.List;
32 import java.util.SortedSet;
34 import org.forester.protein.DomainId;
35 import org.forester.util.ForesterUtil;
39 private final static double ZERO_DIFF = 1.0E-9;
41 public static boolean isEqual( final double a, final double b ) {
42 return ( ( Math.abs( a - b ) ) < ZERO_DIFF );
45 public static boolean test( final File test_dir ) {
46 System.out.print( " GO ID: " );
48 System.out.println( "failed." );
51 System.out.println( "OK." );
52 System.out.print( " Namespace: " );
53 if ( !testNamespace() ) {
54 System.out.println( "failed." );
57 System.out.println( "OK." );
58 System.out.print( " Basic GO term: " );
59 if ( !testBasicGoTerm() ) {
60 System.out.println( "failed." );
63 System.out.println( "OK." );
64 System.out.print( " OBO parser: " );
65 if ( !testOBOparser( test_dir ) ) {
66 System.out.println( "failed." );
69 System.out.println( "OK." );
70 System.out.print( " Pfam to GO mapping: " );
71 if ( !testPfamToGoMapping() ) {
72 System.out.println( "failed." );
75 System.out.println( "OK." );
76 System.out.print( " Pfam to GO parser: " );
77 if ( !testPfamToGoParser( test_dir ) ) {
78 System.out.println( "failed." );
81 System.out.println( "OK." );
82 System.out.print( " Super terms: " );
83 if ( !testSuperTermGetting( test_dir ) ) {
84 System.out.println( "failed." );
87 System.out.println( "OK." );
88 System.out.print( " Super term counting: " );
89 if ( !testSuperTermCounting( test_dir ) ) {
90 System.out.println( "failed." );
93 System.out.println( "OK." );
97 private static boolean testBasicGoTerm() {
99 final GoTerm gt1 = new BasicGoTerm( "GO:0047579",
100 "4-hydroxymandelate oxidase activity",
101 "molecular_function",
103 final GoTerm gt2 = new BasicGoTerm( "GO:0047579",
104 "4-hydroxymandelate oxidase activity",
105 "molecular_function",
107 final GoTerm gt3 = new BasicGoTerm( "GO:0047579", "?", "molecular_function", true );
108 final GoTerm gt4 = new BasicGoTerm( "GO:0047579",
109 "4-hydroxymandelate oxidase activity",
110 "biological_process",
112 final GoTerm gt5 = new BasicGoTerm( "GO:0047578",
113 "4-hydroxymandelate oxidase activity",
114 "molecular_function",
116 if ( !gt1.equals( gt2 ) ) {
119 if ( !gt1.equals( gt3 ) ) {
122 if ( gt1.equals( gt4 ) ) {
125 if ( gt1.hashCode() != gt4.hashCode() ) {
128 if ( gt1.equals( gt5 ) ) {
131 final GoTerm gt6 = ( GoTerm ) gt5.copy();
132 if ( !gt6.equals( gt5 ) ) {
136 catch ( final Exception e ) {
137 e.printStackTrace( System.out );
143 private static boolean testGoId() {
145 final GoId id1 = new GoId( "GO:0042617" );
146 final GoId id2 = new GoId( "GO:0042630" );
147 final GoId id3 = new GoId( "GO:0042630" );
148 if ( id1.equals( id2 ) ) {
151 if ( !id2.equals( id3 ) ) {
154 if ( !id1.toString().equals( "GO:0042617" ) ) {
157 if ( id2.hashCode() != id3.hashCode() ) {
160 if ( id1.hashCode() == id2.hashCode() ) {
164 catch ( final Exception e ) {
165 e.printStackTrace( System.out );
171 private static boolean testNamespace() {
173 final GoNameSpace b = new GoNameSpace( "Biological_process" );
174 final GoNameSpace c = new GoNameSpace( "Cellular_Component" );
175 final GoNameSpace m = new GoNameSpace( "molecular_function" );
176 final GoNameSpace m2 = new GoNameSpace( GoNameSpace.GoNamespaceType.MOLECULAR_FUNCTION );
177 if ( b.equals( c ) ) {
180 if ( !m.equals( m2 ) ) {
183 if ( !b.toString().equals( "biological_process" ) ) {
186 if ( !c.toString().equals( "cellular_component" ) ) {
189 if ( !m.toString().equals( "molecular_function" ) ) {
193 catch ( final Exception e ) {
194 e.printStackTrace( System.out );
200 private static boolean testOBOparser( final File test_dir ) {
202 final OBOparser parser = new OBOparser( new File( test_dir + ForesterUtil.getFileSeparator() + "obo_test" ),
203 OBOparser.ReturnType.BASIC_GO_TERM );
204 final List<GoTerm> go_terms = parser.parse();
205 if ( parser.getGoTermCount() != 26 ) {
208 final GoTerm g0 = go_terms.get( 0 );
209 final GoTerm g1 = go_terms.get( 1 );
210 final GoTerm g3 = go_terms.get( 2 );
211 final GoTerm g2 = go_terms.get( 25 );
212 if ( !g0.getComment().equals( "" ) ) {
215 if ( !g0.getDefinition()
216 .equals( "\"The distribution of mitochondria, including the mitochondrial genome, into daughter cells after mitosis or meiosis, mediated by interactions between mitochondria and the cytoskeleton.\" [GOC:mcc, PMID:10873824, PMID:11389764]" ) ) {
219 if ( !g0.getGoId().getId().equals( "GO:0000001" ) ) {
222 if ( g0.getGoNameSpace().equals( GoNameSpace.GoNamespaceType.BIOLOGICAL_PROCESS ) ) {
225 if ( g0.getGoNameSpace().getType() != GoNameSpace.GoNamespaceType.BIOLOGICAL_PROCESS ) {
228 if ( g0.getGoRelationships().size() != 0 ) {
231 if ( g0.getGoXRefs().size() != 0 ) {
234 if ( !g0.getName().equals( "mitochondrion inheritance" ) ) {
237 if ( g0.getSuperGoIds().size() != 2 ) {
240 if ( !g0.isObsolete() ) {
243 if ( !g1.getComment().equals( "comment" ) ) {
246 if ( !g1.getDefinition()
247 .equals( "\"The maintenance of the structure and integrity of the mitochondrial genome.\" [GOC:ai]" ) ) {
250 if ( !g1.getGoId().getId().equals( "GO:0000002" ) ) {
253 if ( g1.getGoNameSpace().equals( GoNameSpace.GoNamespaceType.BIOLOGICAL_PROCESS ) ) {
256 if ( g1.getGoNameSpace().getType() != GoNameSpace.GoNamespaceType.BIOLOGICAL_PROCESS ) {
259 if ( g1.getGoRelationships().size() != 1 ) {
262 if ( g1.getGoXRefs().size() != 5 ) {
265 if ( !g1.getName().equals( "mitochondrial genome maintenance" ) ) {
268 if ( g1.getSuperGoIds().size() != 1 ) {
271 if ( g1.isObsolete() ) {
274 if ( !g1.getGoXRefs().get( 0 ).equals( new BasicGoXRef( "EC:2.4.1.-" ) ) ) {
277 if ( !g1.getGoXRefs().get( 0 ).getXRef().equals( "2.4.1.-" ) ) {
280 if ( g1.getGoXRefs().get( 0 ).getType() != GoXRef.Type.EC ) {
283 if ( g1.getGoXRefs().get( 0 ).equals( new BasicGoXRef( "EC:2.4.1.1" ) ) ) {
286 if ( g1.getGoXRefs().get( 0 ).equals( new BasicGoXRef( "Reactome:2.4.1.-" ) ) ) {
289 if ( !g1.getGoXRefs().get( 1 ).equals( new BasicGoXRef( "Reactome:7672" ) ) ) {
292 if ( !g1.getGoXRefs().get( 2 ).equals( new BasicGoXRef( "MetaCyc:SIROHEME-FERROCHELAT-RXN" ) ) ) {
295 if ( !g1.getGoXRefs().get( 3 ).equals( new BasicGoXRef( "RESID:AA02376" ) ) ) {
298 if ( !g1.getGoXRefs().get( 4 ).equals( new BasicGoXRef( "UM-BBD_enzymeID:e0271" ) ) ) {
301 if ( !g1.getGoRelationships().get( 0 ).equals( new BasicGoRelationship( "part_of GO:0007052" ) ) ) {
304 if ( !g1.getGoRelationships().get( 0 ).getGoId().equals( new GoId( "GO:0007052" ) ) ) {
307 if ( !g1.getGoRelationships().get( 0 ).getGoId().getId().equals( "GO:0007052" ) ) {
310 if ( g1.getGoRelationships().get( 0 ).getType() != GoRelationship.Type.PART_OF ) {
313 if ( g1.getGoRelationships().get( 0 ).equals( new BasicGoRelationship( "part_of GO:1007052" ) ) ) {
316 if ( !g1.getSuperGoIds().get( 0 ).equals( new GoId( "GO:0007005" ) ) ) {
319 if ( g1.getSuperGoIds().get( 0 ).equals( new GoId( "GO:1007005" ) ) ) {
322 if ( !g2.getGoId().getId().equals( "GO:0000030" ) ) {
325 if ( !g2.getGoId().equals( new GoId( "GO:0000030" ) ) ) {
328 if ( g2.getGoId().getId().equals( "GO:0000031" ) ) {
331 if ( g2.getGoId().equals( new GoId( "GO:0000031" ) ) ) {
334 if ( g3.getGoSubsets().size() != 3 ) {
337 if ( !g3.getGoSubsets().contains( new BasicGoSubset( "goslim_generic" ) ) ) {
340 if ( !g3.getGoSubsets().contains( new BasicGoSubset( "goslim_plant" ) ) ) {
343 if ( !g3.getGoSubsets().contains( new BasicGoSubset( "gosubset_prok" ) ) ) {
346 if ( g3.getGoSubsets().contains( new BasicGoSubset( "goslim_candida" ) ) ) {
350 catch ( final Exception e ) {
351 e.printStackTrace( System.out );
357 private static boolean testPfamToGoMapping() {
359 final PfamToGoMapping pg0 = new PfamToGoMapping( new DomainId( "A" ), new GoId( "GO:0000001" ) );
360 final PfamToGoMapping pg1 = new PfamToGoMapping( new DomainId( "A" ), new GoId( "GO:0000001" ) );
361 final PfamToGoMapping pg2 = new PfamToGoMapping( new DomainId( "B" ), new GoId( "GO:0000001" ) );
362 final PfamToGoMapping pg3 = new PfamToGoMapping( new DomainId( "A" ), new GoId( "GO:0000002" ) );
363 final PfamToGoMapping pg4 = new PfamToGoMapping( new DomainId( "B" ), new GoId( "GO:0000002" ) );
364 if ( !pg0.equals( pg0 ) ) {
367 if ( !pg0.equals( pg1 ) ) {
370 if ( pg0.equals( pg2 ) ) {
373 if ( pg0.equals( pg3 ) ) {
376 if ( pg0.equals( pg4 ) ) {
379 if ( pg0.compareTo( pg3 ) != 0 ) {
382 if ( pg0.compareTo( pg2 ) >= 0 ) {
385 if ( pg2.compareTo( pg0 ) <= 0 ) {
389 catch ( final Exception e ) {
390 e.printStackTrace( System.out );
396 private static boolean testPfamToGoParser( final File test_dir ) {
398 final PfamToGoParser parser = new PfamToGoParser( new File( test_dir + ForesterUtil.getFileSeparator()
399 + "pfam_to_go_test" ) );
400 final List<PfamToGoMapping> mappings = parser.parse();
401 if ( parser.getMappingCount() != 426 ) {
404 if ( mappings.size() != 426 ) {
407 final PfamToGoMapping m0 = mappings.get( 0 );
408 final PfamToGoMapping m1 = mappings.get( 1 );
409 final PfamToGoMapping m2 = mappings.get( 2 );
410 final PfamToGoMapping m3 = mappings.get( 3 );
411 final PfamToGoMapping m4 = mappings.get( 4 );
412 final PfamToGoMapping m5 = mappings.get( 5 );
413 final PfamToGoMapping m424 = mappings.get( 424 );
414 final PfamToGoMapping m425 = mappings.get( 425 );
415 if ( !m0.getKey().equals( new DomainId( "7tm_1" ) ) ) {
418 if ( !m0.getValue().equals( new GoId( "GO:0001584" ) ) ) {
421 if ( m0.getKey().equals( new DomainId( "7tm_x" ) ) ) {
424 if ( m0.getValue().equals( new GoId( "GO:0001585" ) ) ) {
427 if ( !m1.getKey().equals( new DomainId( "7tm_1" ) ) ) {
430 if ( !m1.getValue().equals( new GoId( "GO:0007186" ) ) ) {
433 if ( !m2.getKey().equals( new DomainId( "7tm_1" ) ) ) {
436 if ( !m2.getValue().equals( new GoId( "GO:0016021" ) ) ) {
439 if ( !m3.getKey().equals( new DomainId( "7tm_2" ) ) ) {
442 if ( !m3.getValue().equals( new GoId( "GO:0004930" ) ) ) {
445 if ( !m4.getKey().equals( new DomainId( "7tm_2" ) ) ) {
448 if ( !m4.getValue().equals( new GoId( "GO:0016020" ) ) ) {
451 if ( !m5.getKey().equals( new DomainId( "7tm_3" ) ) ) {
454 if ( !m5.getValue().equals( new GoId( "GO:0008067" ) ) ) {
457 if ( !m424.getKey().equals( new DomainId( "OMPdecase" ) ) ) {
460 if ( !m424.getValue().equals( new GoId( "GO:0006207" ) ) ) {
463 if ( !m425.getKey().equals( new DomainId( "Bac_DNA_binding" ) ) ) {
466 if ( !m425.getValue().equals( new GoId( "GO:0003677" ) ) ) {
470 catch ( final Exception e ) {
471 e.printStackTrace( System.out );
477 private static boolean testSuperTermCounting( final File test_dir ) {
479 final OBOparser parser = new OBOparser( new File( test_dir + ForesterUtil.getFileSeparator()
480 + "gene_ontology_edit.obo" ), OBOparser.ReturnType.BASIC_GO_TERM );
481 final List<GoTerm> all_go_terms = parser.parse();
482 if ( parser.getGoTermCount() != 27748 ) {
485 final Map<GoId, GoTerm> goid_to_term_map = GoUtils.createGoIdToGoTermMap( all_go_terms );
486 final List<GoTerm> categories = new ArrayList<GoTerm>();
487 final List<GoTerm> experiment_set = new ArrayList<GoTerm>();
488 experiment_set.add( new BasicGoTerm( new GoId( "GO:0005690" ), "snRNP U4atac", GoNameSpace
489 .createUnassigned(), false ) );
490 experiment_set.add( new BasicGoTerm( new GoId( "GO:0009698" ),
491 "phenylpropanoid metabolic process",
492 GoNameSpace.createUnassigned(),
494 experiment_set.add( new BasicGoTerm( new GoId( "GO:0008150" ), "biological_process", GoNameSpace
495 .createUnassigned(), false ) );
496 experiment_set.add( new BasicGoTerm( new GoId( "GO:0006915" ),
498 GoNameSpace.createUnassigned(),
500 experiment_set.add( new BasicGoTerm( new GoId( "GO:0001783" ), "B cell apoptosis", GoNameSpace
501 .createUnassigned(), false ) );
502 experiment_set.add( new BasicGoTerm( new GoId( "GO:0010657" ), "muscle cell apoptosis", GoNameSpace
503 .createUnassigned(), false ) );
504 experiment_set.add( new BasicGoTerm( new GoId( "GO:0010657" ), "muscle cell apoptosis", GoNameSpace
505 .createUnassigned(), false ) );
506 experiment_set.add( new BasicGoTerm( new GoId( "GO:0010658" ),
507 "striated muscle cell apoptosis",
508 GoNameSpace.createUnassigned(),
510 experiment_set.add( new BasicGoTerm( new GoId( "GO:0043065" ),
511 "positive regulation of apoptosis",
512 GoNameSpace.createUnassigned(),
515 .add( new BasicGoTerm( new GoId( "GO:0016265" ), "death", GoNameSpace.createUnassigned(), false ) );
516 categories.add( new BasicGoTerm( new GoId( "GO:0006915" ),
518 GoNameSpace.createUnassigned(),
520 categories.add( new BasicGoTerm( new GoId( "GO:0008150" ), "biological_process", GoNameSpace
521 .createUnassigned(), false ) );
522 categories.add( new BasicGoTerm( new GoId( "GO:0010657" ), "muscle cell apoptosis", GoNameSpace
523 .createUnassigned(), false ) );
524 categories.add( new BasicGoTerm( new GoId( "GO:0010658" ), "striated muscle cell apoptosis", GoNameSpace
525 .createUnassigned(), false ) );
526 categories.add( new BasicGoTerm( new GoId( "GO:0046242" ), "o-xylene biosynthetic process", GoNameSpace
527 .createUnassigned(), false ) );
528 categories.add( new BasicGoTerm( new GoId( "GO:0016326" ), "kinesin motor activity", GoNameSpace
529 .createUnassigned(), false ) );
530 categories.add( new BasicGoTerm( new GoId( "GO:0005575" ), "cellular_component", GoNameSpace
531 .createUnassigned(), false ) );
532 categories.add( new BasicGoTerm( new GoId( "GO:0032502" ), "developmental process", GoNameSpace
533 .createUnassigned(), false ) );
534 categories.add( new BasicGoTerm( new GoId( "GO:0051094" ),
535 "positive regulation of developmental process",
536 GoNameSpace.createUnassigned(),
538 categories.add( new BasicGoTerm( new GoId( "GO:0048522" ),
539 "positive regulation of cellular process",
540 GoNameSpace.createUnassigned(),
542 final Map<GoId, Integer> counts = GoUtils.countCategories( categories, experiment_set, goid_to_term_map );
544 if ( counts.get( new GoId( "GO:0016265" ) ) != 5 ) {
548 if ( counts.get( new GoId( "GO:0006915" ) ) != 5 ) {
551 // biological_process
552 if ( counts.get( new GoId( "GO:0008150" ) ) != 8 ) {
555 // muscle cell apoptosis
556 if ( counts.get( new GoId( "GO:0010657" ) ) != 3 ) {
559 // striated muscle cell apoptosis
560 if ( counts.get( new GoId( "GO:0010658" ) ) != 1 ) {
563 // o-xylene biosynthetic process
564 if ( counts.get( new GoId( "GO:0046242" ) ) != 0 ) {
567 // kinesin motor activity
568 if ( counts.get( new GoId( "GO:0016326" ) ) != 0 ) {
571 // cellular_component
572 if ( counts.get( new GoId( "GO:0005575" ) ) != 1 ) {
575 // developmental process
576 if ( counts.get( new GoId( "GO:0032502" ) ) != 5 ) {
579 // positive regulation of developmental process
580 if ( counts.get( new GoId( "GO:0051094" ) ) != 1 ) {
583 // positive regulation of cellular process
584 if ( counts.get( new GoId( "GO:0048522" ) ) != 1 ) {
587 final List<GoId> categories_id = new ArrayList<GoId>();
588 final List<GoId> experiment_set_id = new ArrayList<GoId>();
589 experiment_set_id.add( new GoId( "GO:0005690" ) );
590 experiment_set_id.add( new GoId( "GO:0009698" ) );
591 experiment_set_id.add( new GoId( "GO:0008150" ) );
592 experiment_set_id.add( new GoId( "GO:0006915" ) );
593 experiment_set_id.add( new GoId( "GO:0001783" ) );
594 experiment_set_id.add( new GoId( "GO:0010657" ) );
595 experiment_set_id.add( new GoId( "GO:0010657" ) );
596 experiment_set_id.add( new GoId( "GO:0010658" ) );
597 categories_id.add( new GoId( "GO:0016265" ) );
598 categories_id.add( new GoId( "GO:0006915" ) );
599 categories_id.add( new GoId( "GO:0008150" ) );
600 categories_id.add( new GoId( "GO:0010657" ) );
601 categories_id.add( new GoId( "GO:0010658" ) );
602 categories_id.add( new GoId( "GO:0046242" ) );
603 categories_id.add( new GoId( "GO:0016326" ) );
604 categories_id.add( new GoId( "GO:0005575" ) );
605 final Map<GoId, Integer> counts_id = GoUtils.countCategoriesId( categories_id,
609 if ( counts_id.get( new GoId( "GO:0016265" ) ) != 5 ) {
613 if ( counts_id.get( new GoId( "GO:0006915" ) ) != 5 ) {
616 // biological_process
617 if ( counts_id.get( new GoId( "GO:0008150" ) ) != 7 ) {
620 // muscle cell apoptosis
621 if ( counts_id.get( new GoId( "GO:0010657" ) ) != 3 ) {
624 // striated muscle cell apoptosis
625 if ( counts_id.get( new GoId( "GO:0010658" ) ) != 1 ) {
628 // o-xylene biosynthetic process
629 if ( counts_id.get( new GoId( "GO:0046242" ) ) != 0 ) {
632 // kinesin motor activity
633 if ( counts_id.get( new GoId( "GO:0016326" ) ) != 0 ) {
637 if ( counts_id.get( new GoId( "GO:0005575" ) ) != 1 ) {
641 catch ( final Exception e ) {
642 e.printStackTrace( System.out );
648 private static boolean testSuperTermGetting( final File test_dir ) {
650 final OBOparser parser = new OBOparser( new File( test_dir + ForesterUtil.getFileSeparator()
651 + "gene_ontology_edit.obo" ), OBOparser.ReturnType.BASIC_GO_TERM );
652 final List<GoTerm> go_terms = parser.parse();
653 if ( parser.getGoTermCount() != 27748 ) {
656 final Map<GoId, GoTerm> goid_to_term_map = GoUtils.createGoIdToGoTermMap( go_terms );
657 final SortedSet<GoTerm> b_cell_selection = GoUtils.getAllSuperGoTerms( new GoId( "GO:0002339" ),
659 if ( b_cell_selection.size() != 2 ) {
662 if ( !b_cell_selection.contains( new BasicGoTerm( new GoId( "GO:0002376" ),
663 "immune system process",
664 GoNameSpace.createBiologicalProcess(),
668 if ( !b_cell_selection.contains( new BasicGoTerm( new GoId( "GO:0008150" ),
669 "biological process",
670 GoNameSpace.createBiologicalProcess(),
674 final SortedSet<GoTerm> b_cell_differentation = GoUtils.getAllSuperGoTerms( new GoId( "GO:0030183" ),
676 if ( b_cell_differentation.size() != 12 ) {
679 final SortedSet<GoTerm> biological_process = GoUtils.getAllSuperGoTerms( new GoId( "GO:0008150" ),
681 if ( biological_process.size() != 0 ) {
684 final SortedSet<GoTerm> protein_aa_phosphorylation = GoUtils.getAllSuperGoTerms( new GoId( "GO:0006468" ),
686 if ( protein_aa_phosphorylation.size() != 16 ) {
690 catch ( final Exception e ) {
691 e.printStackTrace( System.out );