2 // FORESTER -- software libraries and applications
3 // for evolutionary biology research and applications.
5 // Copyright (C) 2008-2009 Christian M. Zmasek
6 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
23 // Contact: phylosoft @ gmail . com
24 // WWW: www.phylosoft.org/forester
26 package org.forester.go;
28 import java.io.BufferedReader;
29 import java.io.IOException;
30 import java.util.ArrayList;
31 import java.util.HashMap;
32 import java.util.HashSet;
33 import java.util.LinkedHashMap;
34 import java.util.List;
37 import java.util.SortedMap;
38 import java.util.SortedSet;
39 import java.util.TreeMap;
40 import java.util.TreeSet;
41 import java.util.regex.Matcher;
42 import java.util.regex.Pattern;
44 import org.forester.util.ForesterUtil;
46 public final class GoUtils {
52 * This is for counting the how many times each GO term in 'categories'
53 * is a (direct or indirect) super term of the GO terms in 'experiment_set'.
56 * @param categories the set of super terms to be counted
57 * @param experiment_set the list of GO terms to be analyzed
58 * @param all_go_terms all terms in the ontology
61 public static LinkedHashMap<GoId, Integer> countCategories( final List<GoTerm> categories,
62 final List<GoTerm> experiment_set,
63 final Map<GoId, GoTerm> all_go_terms ) {
64 final LinkedHashMap<GoId, Integer> counts = new LinkedHashMap<GoId, Integer>();
65 for( final GoTerm experiment_term : experiment_set ) {
66 final Set<GoTerm> super_terms = getAllSuperGoTerms( experiment_term.getGoId(), all_go_terms );
67 super_terms.add( experiment_term );
68 for( final GoTerm cat : categories ) {
69 if ( !counts.containsKey( cat.getGoId() ) ) {
70 counts.put( cat.getGoId(), 0 );
72 if ( super_terms.contains( cat ) ) {
73 counts.put( cat.getGoId(), 1 + counts.get( cat.getGoId() ) );
80 public static LinkedHashMap<GoId, Integer> countCategoriesId( final List<GoId> categories,
81 final List<GoId> experiment_set,
82 final Map<GoId, GoTerm> all_go_terms ) {
83 final LinkedHashMap<GoId, Integer> counts = new LinkedHashMap<GoId, Integer>();
84 for( final GoId experiment_id : experiment_set ) {
85 final Set<GoId> super_ids = new HashSet<GoId>();
86 for( final GoTerm term : getAllSuperGoTerms( experiment_id, all_go_terms ) ) {
87 super_ids.add( term.getGoId() );
89 super_ids.add( experiment_id );
90 for( final GoId cat : categories ) {
91 if ( !counts.containsKey( cat ) ) {
94 if ( super_ids.contains( cat ) ) {
95 counts.put( cat, 1 + counts.get( cat ) );
102 public static Map<GoId, GoTerm> createGoIdToGoTermMap( final List<GoTerm> go_terms ) {
103 final Map<GoId, GoTerm> go_id_to_term_map = new HashMap<GoId, GoTerm>();
104 for( final GoTerm go_term : go_terms ) {
105 go_id_to_term_map.put( go_term.getGoId(), go_term );
106 for( final GoId alt_id : go_term.getAltIds() ) {
107 go_id_to_term_map.put( alt_id, go_term );
110 return go_id_to_term_map;
113 public static SortedSet<GoId> getAllSuperGoIds( final GoId go_id, final Map<GoId, GoTerm> goid_to_term_map ) {
114 final SortedSet<GoId> ids = new TreeSet<GoId>();
115 final SortedSet<GoTerm> terms = GoUtils.getAllSuperGoTerms( go_id, goid_to_term_map );
116 for( final GoTerm term : terms ) {
117 ids.add( term.getGoId() );
122 public static SortedSet<GoTerm> getAllSuperGoTerms( final GoId go_id, final List<GoTerm> go_terms ) {
123 final Map<GoId, GoTerm> goid_to_term_map = GoUtils.createGoIdToGoTermMap( go_terms );
124 return getAllSuperGoTerms( go_id, goid_to_term_map );
127 public static SortedSet<GoTerm> getAllSuperGoTerms( final GoId go_id, final Map<GoId, GoTerm> goid_to_term_map ) {
128 if ( !goid_to_term_map.containsKey( go_id ) ) {
129 throw new IllegalArgumentException( "GO id [" + go_id + "] not found in GO id to term map" );
131 final GoTerm go_term = goid_to_term_map.get( go_id );
132 return getAllSuperGoTerms( go_term, goid_to_term_map );
135 public static SortedSet<GoTerm> getAllSuperGoTerms( final GoTerm go_term, final Map<GoId, GoTerm> goid_to_term_map ) {
136 final SortedSet<GoTerm> supers = new TreeSet<GoTerm>();
137 getAllSuperGoTerms( go_term, goid_to_term_map, supers );
141 private static void getAllSuperGoTerms( final GoTerm go_term,
142 final Map<GoId, GoTerm> goid_to_term_map,
143 final Set<GoTerm> supers ) {
144 if ( ( go_term.getSuperGoIds() != null ) && ( go_term.getSuperGoIds().size() > 0 ) ) {
145 for( final GoId super_go_id : go_term.getSuperGoIds() ) {
146 if ( !goid_to_term_map.containsKey( super_go_id ) ) {
147 throw new IllegalArgumentException( "GO id [" + super_go_id + "] not found in GO id to term map" );
149 final GoTerm super_go_term = goid_to_term_map.get( super_go_id );
150 supers.add( super_go_term );
151 getAllSuperGoTerms( super_go_term, goid_to_term_map, supers );
156 public static GoTerm getPenultimateGoTerm( final GoTerm go_term, final Map<GoId, GoTerm> map ) {
157 GoTerm my_go_term = go_term;
158 GoTerm penultimate = my_go_term;
159 while ( ( my_go_term.getSuperGoIds() != null ) && ( my_go_term.getSuperGoIds().size() > 0 ) ) {
160 penultimate = my_go_term;
161 if ( !map.containsKey( my_go_term.getSuperGoIds().get( 0 ) ) ) {
162 throw new IllegalArgumentException( "GO-id [" + my_go_term.getSuperGoIds().get( 0 )
163 + "] not found in map" );
165 my_go_term = map.get( my_go_term.getSuperGoIds().get( 0 ) );
170 public static GoTerm getUltimateGoTerm( final GoTerm go_term, final Map<GoId, GoTerm> map ) {
171 GoTerm my_go_term = go_term;
172 while ( ( my_go_term.getSuperGoIds() != null ) && ( my_go_term.getSuperGoIds().size() > 0 ) ) {
173 if ( !map.containsKey( my_go_term.getSuperGoIds().get( 0 ) ) ) {
174 throw new IllegalArgumentException( "GO-id [" + my_go_term.getSuperGoIds().get( 0 )
175 + "] not found in map" );
177 my_go_term = map.get( my_go_term.getSuperGoIds().get( 0 ) );
182 public static SortedMap<String, List<GoId>> parseGoIds( final Object source,
183 final String start_of_comment_line,
184 final String start_of_label_line ) throws IOException {
185 final Pattern label_matcher = Pattern.compile( start_of_label_line + "\\s*(.+?)" );
186 final BufferedReader reader = ForesterUtil.obtainReader( source );
187 final SortedMap<String, List<GoId>> results = new TreeMap<String, List<GoId>>();
190 final boolean use_label = !ForesterUtil.isEmpty( start_of_label_line );
191 final boolean use_comment = !ForesterUtil.isEmpty( start_of_comment_line );
192 List<GoId> current_list = new ArrayList<GoId>();
193 while ( ( line = reader.readLine() ) != null ) {
195 if ( ForesterUtil.isEmpty( line ) || ( use_comment && line.startsWith( start_of_comment_line ) ) ) {
198 else if ( use_label && line.startsWith( start_of_label_line ) ) {
199 final Matcher matcher = label_matcher.matcher( line );
200 if ( matcher.matches() ) {
201 if ( !ForesterUtil.isEmpty( label ) ) {
202 results.put( label, current_list );
203 current_list = new ArrayList<GoId>();
205 label = matcher.group( 1 );
209 final String[] s = line.split( "\\s+" );
210 final GoId id = new GoId( s[ 0 ] );
211 current_list.add( id );
214 if ( ForesterUtil.isEmpty( label ) ) {
217 results.put( label, current_list );