2 package org.forester.go;
4 import java.io.BufferedReader;
6 import java.io.FileReader;
7 import java.io.IOException;
8 import java.util.ArrayList;
10 import java.util.regex.Matcher;
11 import java.util.regex.Pattern;
13 import org.forester.surfacing.DomainId;
14 import org.forester.util.ForesterUtil;
16 public class PfamToGoParser {
18 // Pfam:PF00001 7tm_1 > GO:rhodopsin-like receptor activity ; GO:0001584
19 private static final String PFAM_TO_GO_FORMAT = "Pfam:\\S+\\s+(\\S+)\\s*>\\s*GO:.+;\\s*(\\S+)";
20 private static final Pattern PFAM_TO_GO_PATTERN = Pattern.compile( PFAM_TO_GO_FORMAT );
21 private static final String PFAMACC_TO_GO_FORMAT = "Pfam:(\\S+)\\s+\\S+\\s*>\\s*GO:.+;\\s*(\\S+)";
22 private static final Pattern PFAMACC_TO_GO_PATTERN = Pattern.compile( PFAMACC_TO_GO_FORMAT );
23 private final File _input_file;
24 private int _mapping_count;
25 private boolean _use_acc;
27 public PfamToGoParser( final File input_file ) {
28 _input_file = input_file;
32 private File getInputFile() {
36 public int getMappingCount() {
37 return _mapping_count;
42 setUseAccessors( false );
45 public boolean isUseAccessors() {
49 public List<PfamToGoMapping> parse() throws IOException {
50 final String error = ForesterUtil.isReadableFile( getInputFile() );
51 if ( !ForesterUtil.isEmpty( error ) ) {
52 throw new IOException( error );
54 final BufferedReader br = new BufferedReader( new FileReader( getInputFile() ) );
56 final List<PfamToGoMapping> mappings = new ArrayList<PfamToGoMapping>();
59 while ( ( line = br.readLine() ) != null ) {
62 if ( ( line.length() > 0 ) && !line.startsWith( "!" ) ) {
64 if ( isUseAccessors() ) {
65 m = PFAMACC_TO_GO_PATTERN.matcher( line );
68 m = PFAM_TO_GO_PATTERN.matcher( line );
71 throw new IOException( "unexpected format [\"" + line + "\"]" );
73 if ( m.groupCount() != 2 ) {
74 throw new IOException( "unexpected format [\"" + line + "\"]" );
76 final String pfam = m.group( 1 );
77 final String go = m.group( 2 );
78 if ( ForesterUtil.isEmpty( pfam ) || ForesterUtil.isEmpty( go ) ) {
79 throw new IOException( "unexpected format [\"" + line + "\"]" );
81 final PfamToGoMapping map = new PfamToGoMapping( new DomainId( pfam ), new GoId( go ) );
85 } // while ( ( line = br.readLine() ) != null )
87 catch ( final Exception e ) {
88 throw new IOException( "parsing problem: " + e.getMessage() + " [at line " + line_number + "]" );
93 private void setMappingCount( final int mapping_count ) {
94 _mapping_count = mapping_count;
97 public void setUseAccessors( final boolean use_ids ) {