// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.go; import java.io.BufferedReader; import java.io.File; import java.io.FileReader; import java.io.IOException; import java.util.ArrayList; import java.util.HashSet; import java.util.List; import java.util.Set; import org.forester.util.ForesterUtil; public class OBOparser { private final File _input_file; ; private final ReturnType _return_type; private int _go_term_count; public OBOparser( final File input_file, final ReturnType return_type ) { switch ( return_type ) { case BASIC_GO_TERM: break; default: throw new IllegalArgumentException( "unknown return type: " + return_type ); } _input_file = input_file; _return_type = return_type; init(); } private GoTerm createNewBasicGoTerm( final String id, final String name, final String namespace, final String is_obsolete, final String comment, final String definition, final Set alt_ids, final List go_xrefs, final List super_go_ids, final List go_relationships, final List go_subsets ) { final GoTerm gt = new BasicGoTerm( id, name, namespace, is_obsolete.trim().toLowerCase().equals( "true" ) ); ( ( BasicGoTerm ) gt ).setComment( comment ); ( ( BasicGoTerm ) gt ).setDefinition( definition ); for( final GoXRef x : go_xrefs ) { gt.getGoXRefs().add( x ); } for( final GoId s : super_go_ids ) { gt.getSuperGoIds().add( s ); } for( final GoRelationship r : go_relationships ) { gt.getGoRelationships().add( r ); } for( final GoSubset sub : go_subsets ) { gt.getGoSubsets().add( sub ); } for( final String alt_id : alt_ids ) { gt.getAltIds().add( new GoId( alt_id ) ); } ++_go_term_count; return gt; } private void createNewGoTerm( final List go_terms, final String id, final String name, final String namespace, final String is_obsolete, final String comment, final String definition, final Set alt_ids, final List go_xrefs, final List super_go_ids, final List go_relationships, final List go_subsets ) { GoTerm gt; switch ( getReturnType() ) { case BASIC_GO_TERM: gt = createNewBasicGoTerm( id, name, namespace, is_obsolete, comment, definition, alt_ids, go_xrefs, super_go_ids, go_relationships, go_subsets ); break; default: throw new AssertionError( "unknown return type: " + getReturnType() ); } go_terms.add( gt ); } public int getGoTermCount() { return _go_term_count; } private File getInputFile() { return _input_file; } private ReturnType getReturnType() { return _return_type; } private void init() { setGoTermCount( 0 ); } public List parse() throws IOException { final String error = ForesterUtil.isReadableFile( getInputFile() ); if ( !ForesterUtil.isEmpty( error ) ) { throw new IOException( error ); } final BufferedReader br = new BufferedReader( new FileReader( getInputFile() ) ); String line; final List go_terms = new ArrayList(); int line_number = 0; boolean in_term = false; String id = ""; String name = ""; String namespace = ""; String def = ""; String comment = ""; String is_obsolete = ""; HashSet alt_ids = new HashSet(); List super_go_ids = new ArrayList(); List go_xrefs = new ArrayList(); List go_relationships = new ArrayList(); List go_subsets = new ArrayList(); try { while ( ( line = br.readLine() ) != null ) { line_number++; line = line.trim(); if ( line.length() < 1 ) { if ( in_term ) { in_term = false; } } else if ( line.startsWith( "[Term]" ) ) { in_term = true; if ( id.length() > 0 ) { createNewGoTerm( go_terms, id, name, namespace, is_obsolete, comment, def, alt_ids, go_xrefs, super_go_ids, go_relationships, go_subsets ); } id = ""; name = ""; namespace = ""; alt_ids = new HashSet(); def = ""; comment = ""; is_obsolete = ""; super_go_ids = new ArrayList(); go_xrefs = new ArrayList(); go_relationships = new ArrayList(); go_subsets = new ArrayList(); } else if ( in_term && line.startsWith( "id:" ) ) { id = line.substring( 3 ).trim(); } else if ( in_term && line.startsWith( "name:" ) ) { name = line.substring( 5 ).trim(); } else if ( in_term && line.startsWith( "namespace:" ) ) { namespace = line.substring( 10 ).trim(); } else if ( in_term && line.startsWith( "alt_id:" ) ) { alt_ids.add( line.substring( 7 ).trim() ); } else if ( in_term && line.startsWith( "def:" ) ) { def = line.substring( 4 ).trim(); } else if ( in_term && line.startsWith( "is_obsolete:" ) ) { is_obsolete = line.substring( 12 ).trim(); } else if ( in_term && line.startsWith( "comment:" ) ) { comment = line.substring( 8 ).trim(); } else if ( in_term && line.startsWith( "xref:" ) ) { final String s = trimOffComment( line.substring( 5 ).trim() ); go_xrefs.add( new BasicGoXRef( s ) ); } else if ( in_term && line.startsWith( "is_a:" ) ) { final String s = trimOffComment( line.substring( 5 ).trim() ); super_go_ids.add( new GoId( s ) ); } else if ( in_term && line.startsWith( "relationship:" ) ) { final String s = trimOffComment( line.substring( 13 ).trim() ); go_relationships.add( new BasicGoRelationship( s ) ); } else if ( in_term && line.startsWith( "subset:" ) ) { final String s = line.substring( 8 ).trim(); go_subsets.add( new BasicGoSubset( s ) ); } } // while ( ( line = br.readLine() ) != null ) } catch ( final Exception e ) { throw new IOException( "parsing problem: " + e.getMessage() + " [at line " + line_number + "]" ); } if ( id.length() > 0 ) { createNewGoTerm( go_terms, id, name, namespace, is_obsolete, comment, def, alt_ids, go_xrefs, super_go_ids, go_relationships, go_subsets ); } return go_terms; } private void setGoTermCount( final int go_term_count ) { _go_term_count = go_term_count; } private String trimOffComment( String xref ) { final int i = xref.indexOf( '!' ); if ( i > 0 ) { xref = xref.substring( 0, xref.indexOf( '!' ) ).trim(); } return xref; } public static enum ReturnType { BASIC_GO_TERM } }