2 // FORESTER -- software libraries and applications
3 // for evolutionary biology research and applications.
5 // Copyright (C) 2008-2009 Christian M. Zmasek
6 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
23 // Contact: phylosoft @ gmail . com
24 // WWW: https://sites.google.com/site/cmzmasek/home/software/forester
26 package org.forester.go;
28 import java.io.BufferedReader;
30 import java.io.FileReader;
31 import java.io.IOException;
32 import java.util.ArrayList;
33 import java.util.HashSet;
34 import java.util.List;
37 import org.forester.util.ForesterUtil;
39 public class OBOparser {
41 private final File _input_file; ;
42 private final ReturnType _return_type;
43 private int _go_term_count;
45 public OBOparser( final File input_file, final ReturnType return_type ) {
46 switch ( return_type ) {
50 throw new IllegalArgumentException( "unknown return type: " + return_type );
52 _input_file = input_file;
53 _return_type = return_type;
57 private GoTerm createNewBasicGoTerm( final String id,
59 final String namespace,
60 final String is_obsolete,
62 final String definition,
63 final Set<String> alt_ids,
64 final List<GoXRef> go_xrefs,
65 final List<GoId> super_go_ids,
66 final List<GoRelationship> go_relationships,
67 final List<GoSubset> go_subsets ) {
68 final GoTerm gt = new BasicGoTerm( id, name, namespace, is_obsolete.trim().toLowerCase().equals( "true" ) );
69 ( ( BasicGoTerm ) gt ).setComment( comment );
70 ( ( BasicGoTerm ) gt ).setDefinition( definition );
71 for( final GoXRef x : go_xrefs ) {
72 gt.getGoXRefs().add( x );
74 for( final GoId s : super_go_ids ) {
75 gt.getSuperGoIds().add( s );
77 for( final GoRelationship r : go_relationships ) {
78 gt.getGoRelationships().add( r );
80 for( final GoSubset sub : go_subsets ) {
81 gt.getGoSubsets().add( sub );
83 for( final String alt_id : alt_ids ) {
84 gt.getAltIds().add( new GoId( alt_id ) );
90 private void createNewGoTerm( final List<GoTerm> go_terms,
93 final String namespace,
94 final String is_obsolete,
96 final String definition,
97 final Set<String> alt_ids,
98 final List<GoXRef> go_xrefs,
99 final List<GoId> super_go_ids,
100 final List<GoRelationship> go_relationships,
101 final List<GoSubset> go_subsets ) {
103 switch ( getReturnType() ) {
105 gt = createNewBasicGoTerm( id,
118 throw new AssertionError( "unknown return type: " + getReturnType() );
123 public int getGoTermCount() {
124 return _go_term_count;
127 private File getInputFile() {
131 private ReturnType getReturnType() {
135 private void init() {
139 public List<GoTerm> parse() throws IOException {
140 final String error = ForesterUtil.isReadableFile( getInputFile() );
141 if ( !ForesterUtil.isEmpty( error ) ) {
142 throw new IOException( error );
144 final BufferedReader br = new BufferedReader( new FileReader( getInputFile() ) );
146 final List<GoTerm> go_terms = new ArrayList<GoTerm>();
148 boolean in_term = false;
151 String namespace = "";
154 String is_obsolete = "";
155 HashSet<String> alt_ids = new HashSet<String>();
156 List<GoId> super_go_ids = new ArrayList<GoId>();
157 List<GoXRef> go_xrefs = new ArrayList<GoXRef>();
158 List<GoRelationship> go_relationships = new ArrayList<GoRelationship>();
159 List<GoSubset> go_subsets = new ArrayList<GoSubset>();
161 while ( ( line = br.readLine() ) != null ) {
164 if ( line.length() < 1 ) {
169 else if ( line.startsWith( "[Term]" ) ) {
171 if ( id.length() > 0 ) {
172 createNewGoTerm( go_terms,
188 alt_ids = new HashSet<String>();
192 super_go_ids = new ArrayList<GoId>();
193 go_xrefs = new ArrayList<GoXRef>();
194 go_relationships = new ArrayList<GoRelationship>();
195 go_subsets = new ArrayList<GoSubset>();
197 else if ( in_term && line.startsWith( "id:" ) ) {
198 id = line.substring( 3 ).trim();
200 else if ( in_term && line.startsWith( "name:" ) ) {
201 name = line.substring( 5 ).trim();
203 else if ( in_term && line.startsWith( "namespace:" ) ) {
204 namespace = line.substring( 10 ).trim();
206 else if ( in_term && line.startsWith( "alt_id:" ) ) {
207 alt_ids.add( line.substring( 7 ).trim() );
209 else if ( in_term && line.startsWith( "def:" ) ) {
210 def = line.substring( 4 ).trim();
212 else if ( in_term && line.startsWith( "is_obsolete:" ) ) {
213 is_obsolete = line.substring( 12 ).trim();
215 else if ( in_term && line.startsWith( "comment:" ) ) {
216 comment = line.substring( 8 ).trim();
218 else if ( in_term && line.startsWith( "xref:" ) ) {
219 final String s = trimOffComment( line.substring( 5 ).trim() );
220 go_xrefs.add( new BasicGoXRef( s ) );
222 else if ( in_term && line.startsWith( "is_a:" ) ) {
223 final String s = trimOffComment( line.substring( 5 ).trim() );
224 super_go_ids.add( new GoId( s ) );
226 else if ( in_term && line.startsWith( "relationship:" ) ) {
227 final String s = trimOffComment( line.substring( 13 ).trim() );
228 go_relationships.add( new BasicGoRelationship( s ) );
230 else if ( in_term && line.startsWith( "subset:" ) ) {
231 final String s = line.substring( 8 ).trim();
232 go_subsets.add( new BasicGoSubset( s ) );
234 } // while ( ( line = br.readLine() ) != null )
236 catch ( final Exception e ) {
237 throw new IOException( "parsing problem: " + e.getMessage() + " [at line " + line_number + "]" );
239 if ( id.length() > 0 ) {
240 createNewGoTerm( go_terms,
256 private void setGoTermCount( final int go_term_count ) {
257 _go_term_count = go_term_count;
260 private String trimOffComment( String xref ) {
261 final int i = xref.indexOf( '!' );
263 xref = xref.substring( 0, xref.indexOf( '!' ) ).trim();
268 public static enum ReturnType {