public class GoNameSpace {
- public final String MOLECULAR_FUNCTION_STR = "molecular_function";
- public final String BIOLOGICAL_PROCESS_STR = "biological_process";
- public final String CELLULAR_COMPONENT_STR = "cellular_component";
- public final String UNASSIGNED_STR = "unassigned";
+ public final static String MOLECULAR_FUNCTION_STR = "molecular_function";
+ public final static String BIOLOGICAL_PROCESS_STR = "biological_process";
+ public final static String CELLULAR_COMPONENT_STR = "cellular_component";
+ public final static String UNASSIGNED_STR = "unassigned";
private final GoNamespaceType _type;
public GoNameSpace( final GoNamespaceType type ) {
import java.util.SortedSet;
import java.util.TreeSet;
+import org.forester.go.GoTerm;
import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException;
import org.forester.phylogeny.Phylogeny;
import org.forester.phylogeny.PhylogenyNode;
}
}
if ( !ForesterUtil.isEmpty( db_entry.getGeneName() ) ) {
- seq.addAnnotation( new Annotation( "GN", db_entry.getGeneName() ) );
+ // seq.addAnnotation( new Annotation( "GN", db_entry.getGeneName() ) );
}
+ if ( db_entry.getGoTerms() != null && !db_entry.getGoTerms().isEmpty() ) {
+ for( final GoTerm go : db_entry.getGoTerms() ) {
+ seq.addAnnotation( new Annotation( go.getGoId().getId(), go.getName() ) );
+ }
+ }
+
final Taxonomy tax = node.getNodeData().isHasTaxonomy() ? node.getNodeData().getTaxonomy()
: new Taxonomy();
if ( !ForesterUtil.isEmpty( db_entry.getTaxonomyScientificName() ) ) {
package org.forester.ws.seqdb;
+import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.forester.go.BasicGoTerm;
+import org.forester.go.GoNameSpace;
import org.forester.go.GoTerm;
import org.forester.util.ForesterUtil;
public final class UniProtEntry implements SequenceDatabaseEntry {
- public final static Pattern GO_PATTERN = Pattern.compile( "GO;\\s+GO:(\\d+);\\s+([PF]):([^;]+);" );
+ public final static Pattern GO_PATTERN = Pattern.compile( "GO;\\s+(GO:\\d+);\\s+([PF]):([^;]+);" );
private String _ac;
private String _name;
private String _symbol;
private String _gene_name;
private String _os_scientific_name;
private String _tax_id;
+ private List<GoTerm> _go_terms;
private UniProtEntry() {
}
if ( line.indexOf( "GO;" ) > 0 ) {
Matcher m = GO_PATTERN.matcher( line );
if ( m.find() ) {
- String n = m.group( 1 );
+ String id = m.group( 1 );
String ns_str = m.group( 2 );
String desc = m.group( 3 );
+ String gns = GoNameSpace.BIOLOGICAL_PROCESS_STR;
if ( ns_str.equals( "F" ) ) {
+ gns = GoNameSpace.MOLECULAR_FUNCTION_STR;
+ }
- System.out.println( "GO:" + n + " " + desc + " " + ns );
- GoTerm go = new BasicGoTerm( n, desc, ns, false );
- // e.setGeneName( DatabaseTools.extract( line, "Name=", ";" ) );
+ System.out.println( "GO:" + id + " " + desc + " " + ns_str );
+
+ e.addGoTerm( new BasicGoTerm( id, desc, gns, false ) );
}
}
}
return e;
}
+ private void addGoTerm( BasicGoTerm g ) {
+ if ( _go_terms == null ) {
+ _go_terms = new ArrayList<GoTerm>();
+ }
+ _go_terms.add( g );
+
+ }
+
private void setSequenceSymbol( String symbol ) {
_symbol = symbol;
}
_gene_name = gene_name;
}
}
+
+ @Override
+ public List<GoTerm> getGoTerms() {
+ return _go_terms;
+ }
+
@Override
public String getSequenceSymbol() {
return ( ForesterUtil.isEmpty( getAccession() ) && ForesterUtil.isEmpty( getSequenceName() )
&& ForesterUtil.isEmpty( getTaxonomyScientificName() ) && ForesterUtil.isEmpty( getSequenceSymbol() )
&& ForesterUtil.isEmpty( getGeneName() ) && ForesterUtil.isEmpty( getTaxonomyIdentifier() ) && ForesterUtil
- .isEmpty( getSequenceSymbol() ) );
+ .isEmpty( getSequenceSymbol() ) && ( getGoTerms() == null || getGoTerms().isEmpty() ) );
}
@Override