2 package org.forester.go;
4 import java.io.BufferedReader;
6 import java.io.FileReader;
7 import java.io.IOException;
8 import java.util.ArrayList;
10 import java.util.regex.Matcher;
11 import java.util.regex.Pattern;
13 import org.forester.util.ForesterUtil;
15 public class PfamToGoParser {
17 // Pfam:PF00001 7tm_1 > GO:rhodopsin-like receptor activity ; GO:0001584
18 private static final String PFAM_TO_GO_FORMAT = "Pfam:\\S+\\s+(\\S+)\\s*>\\s*GO:.+;\\s*(\\S+)";
19 private static final Pattern PFAM_TO_GO_PATTERN = Pattern.compile( PFAM_TO_GO_FORMAT );
20 private static final String PFAMACC_TO_GO_FORMAT = "Pfam:(\\S+)\\s+\\S+\\s*>\\s*GO:.+;\\s*(\\S+)";
21 private static final Pattern PFAMACC_TO_GO_PATTERN = Pattern.compile( PFAMACC_TO_GO_FORMAT );
22 private final File _input_file;
23 private int _mapping_count;
24 private boolean _use_acc;
26 public PfamToGoParser( final File input_file ) {
27 _input_file = input_file;
31 private File getInputFile() {
35 public int getMappingCount() {
36 return _mapping_count;
41 setUseAccessors( false );
44 public boolean isUseAccessors() {
48 public List<PfamToGoMapping> parse() throws IOException {
49 final String error = ForesterUtil.isReadableFile( getInputFile() );
50 if ( !ForesterUtil.isEmpty( error ) ) {
51 throw new IOException( error );
53 final BufferedReader br = new BufferedReader( new FileReader( getInputFile() ) );
55 final List<PfamToGoMapping> mappings = new ArrayList<PfamToGoMapping>();
58 while ( ( line = br.readLine() ) != null ) {
61 if ( ( line.length() > 0 ) && !line.startsWith( "!" ) ) {
63 if ( isUseAccessors() ) {
64 m = PFAMACC_TO_GO_PATTERN.matcher( line );
67 m = PFAM_TO_GO_PATTERN.matcher( line );
70 throw new IOException( "unexpected format [\"" + line + "\"]" );
72 if ( m.groupCount() != 2 ) {
73 throw new IOException( "unexpected format [\"" + line + "\"]" );
75 final String pfam = m.group( 1 );
76 final String go = m.group( 2 );
77 if ( ForesterUtil.isEmpty( pfam ) || ForesterUtil.isEmpty( go ) ) {
78 throw new IOException( "unexpected format [\"" + line + "\"]" );
80 final PfamToGoMapping map = new PfamToGoMapping( pfam, new GoId( go ) );
84 } // while ( ( line = br.readLine() ) != null )
86 catch ( final Exception e ) {
87 throw new IOException( "parsing problem: " + e.getMessage() + " [at line " + line_number + "]" );
92 private void setMappingCount( final int mapping_count ) {
93 _mapping_count = mapping_count;
96 public void setUseAccessors( final boolean use_ids ) {