X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Fws%2Fseqdb%2FUniProtEntry.java;h=2565339b4db63a234979f79071f6bc5fed8a04d8;hb=90c7930fef3df43e214e4660f999174ba3164680;hp=05e2e59e00bd995bec036f858bef46a15acd96ee;hpb=45f26280b2ab3b14a640c942bc92a8f6caab4519;p=jalview.git diff --git a/forester/java/src/org/forester/ws/seqdb/UniProtEntry.java b/forester/java/src/org/forester/ws/seqdb/UniProtEntry.java index 05e2e59..2565339 100644 --- a/forester/java/src/org/forester/ws/seqdb/UniProtEntry.java +++ b/forester/java/src/org/forester/ws/seqdb/UniProtEntry.java @@ -26,15 +26,22 @@ package org.forester.ws.seqdb; import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import org.forester.go.BasicGoTerm; +import org.forester.go.GoTerm; import org.forester.util.ForesterUtil; public final class UniProtEntry implements SequenceDatabaseEntry { - private String _ac; - private String _name; - private String _os_scientific_name; - private String _tax_id; + public final static Pattern GO_PATTERN = Pattern.compile( "GO;\\s+GO:(\\d+);\\s+([PF]):([^;]+);" ); + private String _ac; + private String _name; + private String _symbol; + private String _gene_name; + private String _os_scientific_name; + private String _tax_id; private UniProtEntry() { } @@ -47,6 +54,7 @@ public final class UniProtEntry implements SequenceDatabaseEntry { public static SequenceDatabaseEntry createInstanceFromPlainText( final List lines ) { final UniProtEntry e = new UniProtEntry(); for( final String line : lines ) { + //System.out.println( line ); if ( line.startsWith( "AC" ) ) { e.setAc( DatabaseTools.extract( line, "AC", ";" ) ); } @@ -58,6 +66,31 @@ public final class UniProtEntry implements SequenceDatabaseEntry { e.setSequenceName( DatabaseTools.extract( line, "Full=", ";" ) ); } } + else if ( line.startsWith( "DE" ) && ForesterUtil.isEmpty( e.getSequenceSymbol() ) ) { + if ( line.indexOf( "Short=" ) > 0 ) { + e.setSequenceSymbol( DatabaseTools.extract( line, "Short=", ";" ) ); + } + } + else if ( line.startsWith( "GN" ) && ForesterUtil.isEmpty( e.getGeneName() ) ) { + if ( line.indexOf( "Name=" ) > 0 ) { + e.setGeneName( DatabaseTools.extract( line, "Name=", ";" ) ); + } + } + else if ( line.startsWith( "DR" ) ) { + if ( line.indexOf( "GO;" ) > 0 ) { + Matcher m = GO_PATTERN.matcher( line ); + if ( m.find() ) { + String n = m.group( 1 ); + String ns_str = m.group( 2 ); + String desc = m.group( 3 ); + if ( ns_str.equals( "F" ) ) { + + System.out.println( "GO:" + n + " " + desc + " " + ns ); + GoTerm go = new BasicGoTerm( n, desc, ns, false ); + // e.setGeneName( DatabaseTools.extract( line, "Name=", ";" ) ); + } + } + } else if ( line.startsWith( "OS" ) ) { if ( line.indexOf( "(" ) > 0 ) { e.setOsScientificName( DatabaseTools.extract( line, "OS", "(" ) ); @@ -75,6 +108,10 @@ public final class UniProtEntry implements SequenceDatabaseEntry { return e; } + private void setSequenceSymbol( String symbol ) { + _symbol = symbol; + } + @Override public String getAccession() { return _ac; @@ -119,20 +156,32 @@ public final class UniProtEntry implements SequenceDatabaseEntry { } } + private void setGeneName( final String gene_name ) { + if ( _gene_name == null ) { + _gene_name = gene_name; + } + } + @Override public String getSequenceSymbol() { - return ""; + return _symbol; } @Override public boolean isEmpty() { return ( ForesterUtil.isEmpty( getAccession() ) && ForesterUtil.isEmpty( getSequenceName() ) - && ForesterUtil.isEmpty( getTaxonomyScientificName() ) - && ForesterUtil.isEmpty( getTaxonomyIdentifier() ) && ForesterUtil.isEmpty( getSequenceSymbol() ) ); + && ForesterUtil.isEmpty( getTaxonomyScientificName() ) && ForesterUtil.isEmpty( getSequenceSymbol() ) + && ForesterUtil.isEmpty( getGeneName() ) && ForesterUtil.isEmpty( getTaxonomyIdentifier() ) && ForesterUtil + .isEmpty( getSequenceSymbol() ) ); } @Override public String getProvider() { return "uniprot"; } + + @Override + public String getGeneName() { + return _gene_name; + } }