package org.forester.ws.seqdb;
-import java.util.ArrayList;
import java.util.List;
+import java.util.SortedSet;
+import java.util.TreeSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.forester.go.GoNameSpace;
import org.forester.go.GoTerm;
import org.forester.phylogeny.data.Accession;
+import org.forester.phylogeny.data.Annotation;
import org.forester.util.ForesterUtil;
public final class UniProtEntry implements SequenceDatabaseEntry {
public final static Pattern PharmGKB_PATTERN = Pattern.compile( "PharmGKB;\\s+([0-9A-Z]+);" );
public final static Pattern Reactome_PATTERN = Pattern.compile( "Reactome;\\s+([0-9A-Z]+);\\s+([^\\.]+)" );
private String _ac;
- private ArrayList<Accession> _cross_references;
+ private SortedSet<Accession> _cross_references;
private String _gene_name;
- private List<GoTerm> _go_terms;
+ private SortedSet<GoTerm> _go_terms;
private String _name;
private String _os_scientific_name;
private String _symbol;
}
@Override
- public List<Accession> getCrossReferences() {
+ public SortedSet<Accession> getCrossReferences() {
return _cross_references;
}
}
@Override
- public List<GoTerm> getGoTerms() {
+ public SortedSet<GoTerm> getGoTerms() {
return _go_terms;
}
private void addCrossReference( final Accession accession ) {
if ( _cross_references == null ) {
- _cross_references = new ArrayList<Accession>();
+ _cross_references = new TreeSet<Accession>();
}
- System.out.println( "XREF ADDED: " + accession );
_cross_references.add( accession );
}
private void addGoTerm( final BasicGoTerm g ) {
if ( _go_terms == null ) {
- _go_terms = new ArrayList<GoTerm>();
+ _go_terms = new TreeSet<GoTerm>();
}
- System.out.println( "GOTERM ADDED: " + g );
_go_terms.add( g );
}
for( final String line : lines ) {
//System.out.println( line );
if ( line.startsWith( "AC" ) ) {
- e.setAc( DatabaseTools.extract( line, "AC", ";" ) );
+ e.setAc( SequenceDbWsTools.extractFromTo( line, "AC", ";" ) );
}
else if ( line.startsWith( "DE" ) && ForesterUtil.isEmpty( e.getSequenceName() ) ) {
if ( ( line.indexOf( "RecName:" ) > 0 ) && ( line.indexOf( "Full=" ) > 0 ) ) {
- e.setSequenceName( DatabaseTools.extract( line, "Full=", ";" ) );
+ e.setSequenceName( SequenceDbWsTools.extractFromTo( line, "Full=", ";" ) );
}
else if ( ( line.indexOf( "SubName:" ) > 0 ) && ( line.indexOf( "Full=" ) > 0 ) ) {
- e.setSequenceName( DatabaseTools.extract( line, "Full=", ";" ) );
+ e.setSequenceName( SequenceDbWsTools.extractFromTo( line, "Full=", ";" ) );
}
}
else if ( line.startsWith( "DE" ) && ForesterUtil.isEmpty( e.getSequenceSymbol() ) ) {
if ( line.indexOf( "Short=" ) > 0 ) {
- e.setSequenceSymbol( DatabaseTools.extract( line, "Short=", ";" ) );
+ e.setSequenceSymbol( SequenceDbWsTools.extractFromTo( line, "Short=", ";" ) );
}
}
else if ( line.startsWith( "GN" ) && ForesterUtil.isEmpty( e.getGeneName() ) ) {
if ( line.indexOf( "Name=" ) > 0 ) {
- e.setGeneName( DatabaseTools.extract( line, "Name=", ";" ) );
+ e.setGeneName( SequenceDbWsTools.extractFromTo( line, "Name=", ";" ) );
}
}
else if ( line.startsWith( "DR" ) ) {
else if ( ns_str.equals( "C" ) ) {
gns = GoNameSpace.CELLULAR_COMPONENT_STR;
}
- System.out.println( "GO:" + id + " " + desc + " " + ns_str );
e.addGoTerm( new BasicGoTerm( id, desc, gns, false ) );
}
}
}
else if ( line.startsWith( "OS" ) ) {
if ( line.indexOf( "(" ) > 0 ) {
- e.setOsScientificName( DatabaseTools.extract( line, "OS", "(" ) );
+ e.setOsScientificName( SequenceDbWsTools.extractFromTo( line, "OS", "(" ) );
}
else {
- e.setOsScientificName( DatabaseTools.extract( line, "OS", "." ) );
+ e.setOsScientificName( SequenceDbWsTools.extractFromTo( line, "OS", "." ) );
}
}
else if ( line.startsWith( "OX" ) ) {
if ( line.indexOf( "NCBI_TaxID=" ) > 0 ) {
- e.setTaxId( DatabaseTools.extract( line, "NCBI_TaxID=", ";" ) );
+ e.setTaxId( SequenceDbWsTools.extractFromTo( line, "NCBI_TaxID=", ";" ) );
}
}
}
return e;
}
+
+ @Override
+ public SortedSet<Annotation> getAnnotations() {
+ return null;
+ }
}