// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.go; import java.io.BufferedReader; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Set; import java.util.SortedMap; import java.util.SortedSet; import java.util.TreeMap; import java.util.TreeSet; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.forester.util.ForesterUtil; public final class GoUtils { private GoUtils() { } /** * This is for counting the how many times each GO term in 'categories' * is a (direct or indirect) super term of the GO terms in 'experiment_set'. * * * @param categories the set of super terms to be counted * @param experiment_set the list of GO terms to be analyzed * @param all_go_terms all terms in the ontology * @return */ public static LinkedHashMap countCategories( final List categories, final List experiment_set, final Map all_go_terms ) { final LinkedHashMap counts = new LinkedHashMap(); for( final GoTerm experiment_term : experiment_set ) { final Set super_terms = getAllSuperGoTerms( experiment_term.getGoId(), all_go_terms ); super_terms.add( experiment_term ); for( final GoTerm cat : categories ) { if ( !counts.containsKey( cat.getGoId() ) ) { counts.put( cat.getGoId(), 0 ); } if ( super_terms.contains( cat ) ) { counts.put( cat.getGoId(), 1 + counts.get( cat.getGoId() ) ); } } } return counts; } public static LinkedHashMap countCategoriesId( final List categories, final List experiment_set, final Map all_go_terms ) { final LinkedHashMap counts = new LinkedHashMap(); for( final GoId experiment_id : experiment_set ) { final Set super_ids = new HashSet(); for( final GoTerm term : getAllSuperGoTerms( experiment_id, all_go_terms ) ) { super_ids.add( term.getGoId() ); } super_ids.add( experiment_id ); for( final GoId cat : categories ) { if ( !counts.containsKey( cat ) ) { counts.put( cat, 0 ); } if ( super_ids.contains( cat ) ) { counts.put( cat, 1 + counts.get( cat ) ); } } } return counts; } public static Map createGoIdToGoTermMap( final List go_terms ) { final Map go_id_to_term_map = new HashMap(); for( final GoTerm go_term : go_terms ) { go_id_to_term_map.put( go_term.getGoId(), go_term ); for( final GoId alt_id : go_term.getAltIds() ) { go_id_to_term_map.put( alt_id, go_term ); } } return go_id_to_term_map; } public static SortedSet getAllSuperGoIds( final GoId go_id, final Map goid_to_term_map ) { final SortedSet ids = new TreeSet(); final SortedSet terms = GoUtils.getAllSuperGoTerms( go_id, goid_to_term_map ); for( final GoTerm term : terms ) { ids.add( term.getGoId() ); } return ids; } public static SortedSet getAllSuperGoTerms( final GoId go_id, final List go_terms ) { final Map goid_to_term_map = GoUtils.createGoIdToGoTermMap( go_terms ); return getAllSuperGoTerms( go_id, goid_to_term_map ); } public static SortedSet getAllSuperGoTerms( final GoId go_id, final Map goid_to_term_map ) { if ( !goid_to_term_map.containsKey( go_id ) ) { throw new IllegalArgumentException( "GO id [" + go_id + "] not found in GO id to term map" ); } final GoTerm go_term = goid_to_term_map.get( go_id ); return getAllSuperGoTerms( go_term, goid_to_term_map ); } public static SortedSet getAllSuperGoTerms( final GoTerm go_term, final Map goid_to_term_map ) { final SortedSet supers = new TreeSet(); getAllSuperGoTerms( go_term, goid_to_term_map, supers ); return supers; } private static void getAllSuperGoTerms( final GoTerm go_term, final Map goid_to_term_map, final Set supers ) { if ( ( go_term.getSuperGoIds() != null ) && ( go_term.getSuperGoIds().size() > 0 ) ) { for( final GoId super_go_id : go_term.getSuperGoIds() ) { if ( !goid_to_term_map.containsKey( super_go_id ) ) { throw new IllegalArgumentException( "GO id [" + super_go_id + "] not found in GO id to term map" ); } final GoTerm super_go_term = goid_to_term_map.get( super_go_id ); supers.add( super_go_term ); getAllSuperGoTerms( super_go_term, goid_to_term_map, supers ); } } } public static GoTerm getPenultimateGoTerm( final GoTerm go_term, final Map map ) { GoTerm my_go_term = go_term; GoTerm penultimate = my_go_term; while ( ( my_go_term.getSuperGoIds() != null ) && ( my_go_term.getSuperGoIds().size() > 0 ) ) { penultimate = my_go_term; if ( !map.containsKey( my_go_term.getSuperGoIds().get( 0 ) ) ) { throw new IllegalArgumentException( "GO-id [" + my_go_term.getSuperGoIds().get( 0 ) + "] not found in map" ); } my_go_term = map.get( my_go_term.getSuperGoIds().get( 0 ) ); } return penultimate; } public static GoTerm getUltimateGoTerm( final GoTerm go_term, final Map map ) { GoTerm my_go_term = go_term; while ( ( my_go_term.getSuperGoIds() != null ) && ( my_go_term.getSuperGoIds().size() > 0 ) ) { if ( !map.containsKey( my_go_term.getSuperGoIds().get( 0 ) ) ) { throw new IllegalArgumentException( "GO-id [" + my_go_term.getSuperGoIds().get( 0 ) + "] not found in map" ); } my_go_term = map.get( my_go_term.getSuperGoIds().get( 0 ) ); } return my_go_term; } public static SortedMap> parseGoIds( final Object source, final String start_of_comment_line, final String start_of_label_line ) throws IOException { final Pattern label_matcher = Pattern.compile( start_of_label_line + "\\s*(.+?)" ); final BufferedReader reader = ForesterUtil.obtainReader( source ); final SortedMap> results = new TreeMap>(); String line = ""; String label = ""; final boolean use_label = !ForesterUtil.isEmpty( start_of_label_line ); final boolean use_comment = !ForesterUtil.isEmpty( start_of_comment_line ); List current_list = new ArrayList(); while ( ( line = reader.readLine() ) != null ) { line = line.trim(); if ( ForesterUtil.isEmpty( line ) || ( use_comment && line.startsWith( start_of_comment_line ) ) ) { continue; } else if ( use_label && line.startsWith( start_of_label_line ) ) { final Matcher matcher = label_matcher.matcher( line ); if ( matcher.matches() ) { if ( !ForesterUtil.isEmpty( label ) ) { results.put( label, current_list ); current_list = new ArrayList(); } label = matcher.group( 1 ); } } else { final String[] s = line.split( "\\s+" ); final GoId id = new GoId( s[ 0 ] ); current_list.add( id ); } } if ( ForesterUtil.isEmpty( label ) ) { label = ""; } results.put( label, current_list ); reader.close(); return results; } }