final Pattern mim_PATTERN = Pattern.compile( "\\s+/db_xref=\"MIM:(\\d+)\"" );
final Pattern taxon_PATTERN = Pattern.compile( "\\s+/db_xref=\"taxon:(\\d+)\"" );
final Pattern interpro_PATTERN = Pattern.compile( "\\s+/db_xref=\"InterPro:([A-Z0-9]+)\"" );
- final Pattern uniprot_PATTERN = Pattern.compile( "\\s+/db_xref=\"UniProtKB/TrEMBL:(\\w+)\"" );
- final Pattern hgnc_PATTERN = Pattern.compile( "\\s+/db_xref=\"HGNC:(\\d+)\"" );
+ final Pattern uniprot_PATTERN = Pattern.compile( "\\s+/db_xref=\"UniProtKB/[A-Za-z-]*:(\\w+)\"" );
+ final Pattern hgnc_PATTERN = Pattern.compile( "\\s+/db_xref=\"[A-Z:]*HGNC:(\\d+)\"" );
final Pattern geneid_PATTERN = Pattern.compile( "\\s+/db_xref=\"GeneID:(\\d+)\"" );
+ final Pattern pdb_PATTERN = Pattern.compile( "\\s+/db_xref=\"PDB:([A-Z0-9]+)\"" );
final Pattern ec_PATTERN = Pattern.compile( "\\s+/EC_number=\"([\\.\\-\\d]+)\"" );
final Pattern product_PATTERN = Pattern.compile( "\\s+/product=\"(\\w{1,10})\"" );
final EbiDbEntry e = new EbiDbEntry();
in_protein = false;
}
if ( in_source ) {
- final Matcher m = taxon_PATTERN.matcher( line );
- if ( m.find() ) {
- e.setTaxId( m.group( 1 ) );
+ final Matcher ti = taxon_PATTERN.matcher( line );
+ if ( ti.find() ) {
+ e.setTaxId( ti.group( 1 ) );
+ }
+ final Matcher chr = chromosome_PATTERN.matcher( line );
+ if ( chr.find() ) {
+ e.setChromosome( chr.group( 1 ) );
+ }
+ final Matcher map = map_PATTERN.matcher( line );
+ if ( map.find() ) {
+ e.setMap( map.group( 1 ) );
}
}
if ( in_cds || in_gene ) {
if ( product.find() ) {
e.setSequenceSymbol( product.group( 1 ) );
}
+ final Matcher pdb = pdb_PATTERN.matcher( line );
+ if ( pdb.find() ) {
+ e.addCrossReference( new Accession( pdb.group( 1 ), "pdb" ) );
+ }
}
}
if ( def.length() > 0 ) {
}
return e;
}
+ private String _map;
+ private String _chromosome;
+
+ private void setMap( String map ) {
+ _map = map;
+
+ }
+
+ private void setChromosome( String chromosome ) {
+ _chromosome = chromosome;
+
+ }
+ @Override
+ public String getMap( ) {
+ return _map;
+
+ }
+ @Override
+ public String getChromosome() {
+ return _chromosome;
+
+ }
+
+
private static void x( final StringBuilder sb, final String s ) {
if ( sb.length() > 0 ) {
sb.append( " " );
return _symbol;
}
- private void setSequenceSymbol( String symbol ) {
+ private void setSequenceSymbol( final String symbol ) {
_symbol = symbol;
}