package org.forester.ws.seqdb;
import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import org.forester.go.BasicGoTerm;
+import org.forester.go.GoTerm;
import org.forester.util.ForesterUtil;
public final class UniProtEntry implements SequenceDatabaseEntry {
- private String _ac;
- private String _name;
- private String _os_scientific_name;
- private String _tax_id;
+ public final static Pattern GO_PATTERN = Pattern.compile( "GO;\\s+GO:(\\d+);\\s+([PF]):([^;]+);" );
+ private String _ac;
+ private String _name;
+ private String _symbol;
+ private String _gene_name;
+ private String _os_scientific_name;
+ private String _tax_id;
private UniProtEntry() {
}
public static SequenceDatabaseEntry createInstanceFromPlainText( final List<String> lines ) {
final UniProtEntry e = new UniProtEntry();
for( final String line : lines ) {
+ //System.out.println( line );
if ( line.startsWith( "AC" ) ) {
e.setAc( DatabaseTools.extract( line, "AC", ";" ) );
}
e.setSequenceName( DatabaseTools.extract( line, "Full=", ";" ) );
}
}
+ else if ( line.startsWith( "DE" ) && ForesterUtil.isEmpty( e.getSequenceSymbol() ) ) {
+ if ( line.indexOf( "Short=" ) > 0 ) {
+ e.setSequenceSymbol( DatabaseTools.extract( line, "Short=", ";" ) );
+ }
+ }
+ else if ( line.startsWith( "GN" ) && ForesterUtil.isEmpty( e.getGeneName() ) ) {
+ if ( line.indexOf( "Name=" ) > 0 ) {
+ e.setGeneName( DatabaseTools.extract( line, "Name=", ";" ) );
+ }
+ }
+ else if ( line.startsWith( "DR" ) ) {
+ if ( line.indexOf( "GO;" ) > 0 ) {
+ Matcher m = GO_PATTERN.matcher( line );
+ if ( m.find() ) {
+ String n = m.group( 1 );
+ String ns_str = m.group( 2 );
+ String desc = m.group( 3 );
+ if ( ns_str.equals( "F" ) ) {
+
+ System.out.println( "GO:" + n + " " + desc + " " + ns );
+ GoTerm go = new BasicGoTerm( n, desc, ns, false );
+ // e.setGeneName( DatabaseTools.extract( line, "Name=", ";" ) );
+ }
+ }
+ }
else if ( line.startsWith( "OS" ) ) {
if ( line.indexOf( "(" ) > 0 ) {
e.setOsScientificName( DatabaseTools.extract( line, "OS", "(" ) );
return e;
}
+ private void setSequenceSymbol( String symbol ) {
+ _symbol = symbol;
+ }
+
@Override
public String getAccession() {
return _ac;
}
}
+ private void setGeneName( final String gene_name ) {
+ if ( _gene_name == null ) {
+ _gene_name = gene_name;
+ }
+ }
+
@Override
public String getSequenceSymbol() {
- return "";
+ return _symbol;
}
@Override
public boolean isEmpty() {
return ( ForesterUtil.isEmpty( getAccession() ) && ForesterUtil.isEmpty( getSequenceName() )
- && ForesterUtil.isEmpty( getTaxonomyScientificName() )
- && ForesterUtil.isEmpty( getTaxonomyIdentifier() ) && ForesterUtil.isEmpty( getSequenceSymbol() ) );
+ && ForesterUtil.isEmpty( getTaxonomyScientificName() ) && ForesterUtil.isEmpty( getSequenceSymbol() )
+ && ForesterUtil.isEmpty( getGeneName() ) && ForesterUtil.isEmpty( getTaxonomyIdentifier() ) && ForesterUtil
+ .isEmpty( getSequenceSymbol() ) );
}
@Override
public String getProvider() {
return "uniprot";
}
+
+ @Override
+ public String getGeneName() {
+ return _gene_name;
+ }
}