From: cmzmasek@gmail.com Date: Fri, 18 Oct 2013 02:56:33 +0000 (+0000) Subject: inprogress X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=6f8480b9e13a9a720cb8dd7434c0b46f2f6ae948;p=jalview.git inprogress --- diff --git a/forester/java/src/org/forester/test/Test.java b/forester/java/src/org/forester/test/Test.java index f374e01..2a9ca57 100644 --- a/forester/java/src/org/forester/test/Test.java +++ b/forester/java/src/org/forester/test/Test.java @@ -11051,8 +11051,7 @@ public final class Test { private static boolean testEbiEntryRetrieval() { try { - final SequenceDatabaseEntry entry = SequenceDbWsTools - .obtainEmblEntry( new Accession( "AAK41263", Accession.Source.NCBI ) ); + final SequenceDatabaseEntry entry = SequenceDbWsTools.obtainEntry( "AAK41263" ); if ( !entry.getAccession().equals( "AAK41263" ) ) { System.out.println( entry.getAccession() ); return false; @@ -11078,12 +11077,102 @@ public final class Test { System.out.println( entry.getTaxonomyIdentifier() ); return false; } - if ( !entry.getAnnotations().get( 0 ).getRefValue().equals( "3.2.1.33" ) ) { - System.out.println( entry.getAnnotations().get( 0 ).getRefValue() ); + if ( !entry.getAnnotations().first().getRefValue().equals( "3.2.1.33" ) ) { + System.out.println( entry.getAnnotations().first().getRefValue() ); return false; } - if ( !entry.getAnnotations().get( 0 ).getRefSource().equals( "EC" ) ) { - System.out.println( entry.getAnnotations().get( 0 ).getRefSource() ); + if ( !entry.getAnnotations().first().getRefSource().equals( "EC" ) ) { + System.out.println( entry.getAnnotations().first().getRefSource() ); + return false; + } + if ( entry.getCrossReferences().size() != 5 ) { + return false; + } + // + final SequenceDatabaseEntry entry1 = SequenceDbWsTools.obtainEntry( "ABJ16409" ); + if ( !entry1.getAccession().equals( "ABJ16409" ) ) { + return false; + } + if ( !entry1.getTaxonomyScientificName().equals( "Felis catus" ) ) { + System.out.println( entry1.getTaxonomyScientificName() ); + return false; + } + if ( !entry1.getSequenceName().equals( "Felis catus (domestic cat) partial BCL2" ) ) { + System.out.println( entry1.getSequenceName() ); + return false; + } + if ( !entry1.getTaxonomyIdentifier().equals( "9685" ) ) { + System.out.println( entry1.getTaxonomyIdentifier() ); + return false; + } + if ( !entry1.getGeneName().equals( "BCL2" ) ) { + System.out.println( entry1.getGeneName() ); + return false; + } + if ( entry1.getCrossReferences().size() != 6 ) { + return false; + } + // + final SequenceDatabaseEntry entry2 = SequenceDbWsTools.obtainEntry( "NM_184234" ); + if ( !entry2.getAccession().equals( "NM_184234" ) ) { + return false; + } + if ( !entry2.getTaxonomyScientificName().equals( "Homo sapiens" ) ) { + System.out.println( entry2.getTaxonomyScientificName() ); + return false; + } + if ( !entry2.getSequenceName() + .equals( "Homo sapiens RNA binding motif protein 39 (RBM39), transcript variant 1, mRNA" ) ) { + System.out.println( entry2.getSequenceName() ); + return false; + } + if ( !entry2.getTaxonomyIdentifier().equals( "9606" ) ) { + System.out.println( entry2.getTaxonomyIdentifier() ); + return false; + } + if ( !entry2.getGeneName().equals( "RBM39" ) ) { + System.out.println( entry2.getGeneName() ); + return false; + } + if ( entry2.getCrossReferences().size() != 3 ) { + return false; + } + // + final SequenceDatabaseEntry entry3 = SequenceDbWsTools.obtainEntry( "HM043801" ); + if ( !entry3.getAccession().equals( "HM043801" ) ) { + return false; + } + if ( !entry3.getTaxonomyScientificName().equals( "Bursaphelenchus xylophilus" ) ) { + System.out.println( entry3.getTaxonomyScientificName() ); + return false; + } + if ( !entry3.getSequenceName().equals( "Bursaphelenchus xylophilus RAF gene, complete cds" ) ) { + System.out.println( entry3.getSequenceName() ); + return false; + } + if ( !entry3.getTaxonomyIdentifier().equals( "6326" ) ) { + System.out.println( entry3.getTaxonomyIdentifier() ); + return false; + } + if ( !entry3.getSequenceSymbol().equals( "RAF" ) ) { + System.out.println( entry3.getSequenceSymbol() ); + return false; + } + if ( !ForesterUtil.isEmpty( entry3.getGeneName() ) ) { + return false; + } + if ( entry3.getCrossReferences().size() != 8 ) { + return false; + } + // + // + final SequenceDatabaseEntry entry4 = SequenceDbWsTools.obtainEntry( "AAA36557.1" ); + if ( !entry4.getAccession().equals( "AAA36557" ) ) { + return false; + } + // + final SequenceDatabaseEntry entry5 = SequenceDbWsTools.obtainEntry( "M30539" ); + if ( !entry5.getAccession().equals( "HM043801" ) ) { return false; } } diff --git a/forester/java/src/org/forester/ws/seqdb/EbiDbEntry.java b/forester/java/src/org/forester/ws/seqdb/EbiDbEntry.java index ce72e2f..f3ae59c 100644 --- a/forester/java/src/org/forester/ws/seqdb/EbiDbEntry.java +++ b/forester/java/src/org/forester/ws/seqdb/EbiDbEntry.java @@ -25,8 +25,9 @@ package org.forester.ws.seqdb; -import java.util.ArrayList; import java.util.List; +import java.util.SortedSet; +import java.util.TreeSet; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -67,11 +68,14 @@ public final class EbiDbEntry implements SequenceDatabaseEntry { final Pattern chromosome_PATTERN = Pattern.compile( "\\s+/chromosome=\"(\\w+)\"" ); final Pattern map_PATTERN = Pattern.compile( "\\s+/map=\"([\\w+\\.])\"" ); final Pattern gene_PATTERN = Pattern.compile( "\\s+/gene=\"(.+)\"" ); - final Pattern mim_xref_PATTERN = Pattern.compile( "\\s+/db_xref=\"MIM:(\\d+)\"" ); - final Pattern taxon_xref_PATTERN = Pattern.compile( "\\s+/db_xref=\"taxon:(\\d+)\"" ); - final Pattern interpro_PATTERN = Pattern.compile( "\\s+/db_xref=\"InterPro:(IP\\d+)\"" ); + final Pattern mim_PATTERN = Pattern.compile( "\\s+/db_xref=\"MIM:(\\d+)\"" ); + final Pattern taxon_PATTERN = Pattern.compile( "\\s+/db_xref=\"taxon:(\\d+)\"" ); + final Pattern interpro_PATTERN = Pattern.compile( "\\s+/db_xref=\"InterPro:([A-Z0-9]+)\"" ); final Pattern uniprot_PATTERN = Pattern.compile( "\\s+/db_xref=\"UniProtKB/TrEMBL:(\\w+)\"" ); + final Pattern hgnc_PATTERN = Pattern.compile( "\\s+/db_xref=\"HGNC:(\\d+)\"" ); + final Pattern geneid_PATTERN = Pattern.compile( "\\s+/db_xref=\"GeneID:(\\d+)\"" ); final Pattern ec_PATTERN = Pattern.compile( "\\s+/EC_number=\"([\\.\\-\\d]+)\"" ); + final Pattern product_PATTERN = Pattern.compile( "\\s+/product=\"(\\w{1,10})\"" ); final EbiDbEntry e = new EbiDbEntry(); final StringBuilder def = new StringBuilder(); boolean in_definition = false; @@ -79,6 +83,7 @@ public final class EbiDbEntry implements SequenceDatabaseEntry { boolean in_source = false; boolean in_gene = false; boolean in_cds = false; + boolean in_mrna = false; boolean in_protein = false; for( final String line : lines ) { if ( line.startsWith( "ACCESSION " ) ) { @@ -159,6 +164,7 @@ public final class EbiDbEntry implements SequenceDatabaseEntry { in_source = false; in_gene = false; in_cds = false; + in_mrna = false; in_protein = false; // in_def = false; } @@ -169,42 +175,77 @@ public final class EbiDbEntry implements SequenceDatabaseEntry { in_source = true; in_gene = false; in_cds = false; + in_mrna = false; in_protein = false; } if ( in_features && ( line.startsWith( " gene " ) || line.startsWith( "FT gene " ) ) ) { in_source = false; in_gene = true; in_cds = false; + in_mrna = false; in_protein = false; } if ( in_features && ( line.startsWith( " CDS " ) || line.startsWith( "FT CDS " ) ) ) { in_source = false; in_gene = false; in_cds = true; + in_mrna = false; in_protein = false; } if ( in_features && ( line.startsWith( " Protein " ) || line.startsWith( "FT Protein " ) ) ) { in_source = false; in_gene = false; in_cds = false; + in_mrna = false; in_protein = true; } + if ( in_features && ( line.startsWith( " mRNA " ) || line.startsWith( "FT mRNA " ) ) ) { + in_source = false; + in_gene = false; + in_cds = false; + in_mrna = true; + in_protein = false; + } if ( in_source ) { - final Matcher m = taxon_xref_PATTERN.matcher( line ); + final Matcher m = taxon_PATTERN.matcher( line ); if ( m.find() ) { e.setTaxId( m.group( 1 ) ); } } - if ( in_protein || in_cds ) { - final Matcher m = ec_PATTERN.matcher( line ); - if ( m.find() ) { - e.addAnnotation( new Annotation( "EC", m.group( 1 ) ) ); + if ( in_cds || in_gene ) { + final Matcher hgnc = hgnc_PATTERN.matcher( line ); + if ( hgnc.find() ) { + e.addCrossReference( new Accession( hgnc.group( 1 ), "hgnc" ) ); + } + final Matcher geneid = geneid_PATTERN.matcher( line ); + if ( geneid.find() ) { + e.addCrossReference( new Accession( geneid.group( 1 ), "geneid" ) ); } } - if ( in_protein || in_cds || in_gene ) { - final Matcher m = gene_PATTERN.matcher( line ); - if ( m.find() ) { - e.setGeneName( m.group( 1 ) ); + if ( in_protein || in_cds || in_gene || in_mrna ) { + final Matcher ec = ec_PATTERN.matcher( line ); + if ( ec.find() ) { + e.addAnnotation( new Annotation( "EC", ec.group( 1 ) ) ); + } + final Matcher gene = gene_PATTERN.matcher( line ); + if ( gene.find() ) { + e.setGeneName( gene.group( 1 ) ); + } + final Matcher uniprot = uniprot_PATTERN.matcher( line ); + if ( uniprot.find() ) { + e.addCrossReference( new Accession( uniprot.group( 1 ), "uniprot" ) ); + } + final Matcher interpro = interpro_PATTERN.matcher( line ); + if ( interpro.find() ) { + e.addCrossReference( new Accession( interpro.group( 1 ), "interpro" ) ); + } + final Matcher mim = mim_PATTERN.matcher( line ); + if ( mim.find() ) { + e.addCrossReference( new Accession( mim.group( 1 ), "mim" ) ); + } + final Matcher product = product_PATTERN.matcher( line ); + if ( product.find() ) { + e.setSequenceSymbol( product.group( 1 ) ); } } } @@ -222,15 +263,15 @@ public final class EbiDbEntry implements SequenceDatabaseEntry { } // FIXME actually this is NCBI entry //http://www.ebi.ac.uk/Tools/dbfetch/dbfetch/emb/AAR37336/ - private String _pa; - private String _de; - private String _os; - private String _tax_id; - private String _symbol; - private String _provider; - private List _cross_references; - private List _annotations; - private String _gene_name; + private String _pa; + private String _de; + private String _os; + private String _tax_id; + private String _symbol; + private String _provider; + private SortedSet _cross_references; + private SortedSet _annotations; + private String _gene_name; // TODO PUBMED 15798186 //TODO (FEATURES) @@ -488,7 +529,7 @@ public final class EbiDbEntry implements SequenceDatabaseEntry { private void addCrossReference( final Accession accession ) { if ( _cross_references == null ) { - _cross_references = new ArrayList(); + _cross_references = new TreeSet(); } System.out.println( "XREF ADDED: " + accession ); _cross_references.add( accession ); @@ -505,7 +546,7 @@ public final class EbiDbEntry implements SequenceDatabaseEntry { } @Override - public List getCrossReferences() { + public SortedSet getCrossReferences() { return _cross_references; } @@ -515,7 +556,7 @@ public final class EbiDbEntry implements SequenceDatabaseEntry { } @Override - public List getGoTerms() { + public SortedSet getGoTerms() { return null; } @@ -534,6 +575,10 @@ public final class EbiDbEntry implements SequenceDatabaseEntry { return _symbol; } + private void setSequenceSymbol( String symbol ) { + _symbol = symbol; + } + @Override public String getTaxonomyIdentifier() { return _tax_id; @@ -586,13 +631,13 @@ public final class EbiDbEntry implements SequenceDatabaseEntry { } @Override - public List getAnnotations() { + public SortedSet getAnnotations() { return _annotations; } private void addAnnotation( final Annotation annotation ) { if ( _annotations == null ) { - _annotations = new ArrayList(); + _annotations = new TreeSet(); } _annotations.add( annotation ); } diff --git a/forester/java/src/org/forester/ws/seqdb/SequenceDatabaseEntry.java b/forester/java/src/org/forester/ws/seqdb/SequenceDatabaseEntry.java index b060d0d..38c3437 100644 --- a/forester/java/src/org/forester/ws/seqdb/SequenceDatabaseEntry.java +++ b/forester/java/src/org/forester/ws/seqdb/SequenceDatabaseEntry.java @@ -25,7 +25,7 @@ package org.forester.ws.seqdb; -import java.util.List; +import java.util.SortedSet; import org.forester.go.GoTerm; import org.forester.phylogeny.data.Accession; @@ -37,9 +37,9 @@ public interface SequenceDatabaseEntry { public String getGeneName(); - public List getGoTerms(); + public SortedSet getGoTerms(); - public List getAnnotations(); + public SortedSet getAnnotations(); public String getProvider(); @@ -53,5 +53,5 @@ public interface SequenceDatabaseEntry { public boolean isEmpty(); - public List getCrossReferences(); + public SortedSet getCrossReferences(); } \ No newline at end of file diff --git a/forester/java/src/org/forester/ws/seqdb/SequenceDbWsTools.java b/forester/java/src/org/forester/ws/seqdb/SequenceDbWsTools.java index 209d284..c65fb2a 100644 --- a/forester/java/src/org/forester/ws/seqdb/SequenceDbWsTools.java +++ b/forester/java/src/org/forester/ws/seqdb/SequenceDbWsTools.java @@ -54,35 +54,15 @@ import org.forester.util.SequenceAccessionTools; public final class SequenceDbWsTools { - public final static String EMBL_REFSEQ = "http://www.ebi.ac.uk/Tools/dbfetch/dbfetch?db=REFSEQ&style=raw&id="; - public final static String EMBL_GENBANK = "http://www.ebi.ac.uk/Tools/dbfetch/dbfetch?db=GENBANK&style=raw&id="; public final static String BASE_UNIPROT_URL = "http://www.uniprot.org/"; + public final static int DEFAULT_LINES_TO_RETURN = 4000; //public final static String EMBL_DBS_EMBL = "embl"; public final static String EMBL_DBS_REFSEQ_N = "refseqn"; public final static String EMBL_DBS_REFSEQ_P = "refseqp"; + public final static String EMBL_GENBANK = "http://www.ebi.ac.uk/Tools/dbfetch/dbfetch?db=GENBANK&style=raw&id="; + public final static String EMBL_REFSEQ = "http://www.ebi.ac.uk/Tools/dbfetch/dbfetch?db=REFSEQ&style=raw&id="; private final static boolean DEBUG = true; private final static String URL_ENC = "UTF-8"; - public final static int DEFAULT_LINES_TO_RETURN = 4000; - - final static String extractFrom( final String target, final String a ) { - final int i_a = target.indexOf( a ); - return target.substring( i_a + a.length() ).trim(); - } - - final static String extractFromTo( final String target, final String a, final String b ) { - final int i_a = target.indexOf( a ); - final int i_b = target.indexOf( b ); - if ( ( i_a < 0 ) || ( i_b < i_a ) ) { - throw new IllegalArgumentException( "attempt to extract from \"" + target + "\" between \"" + a - + "\" and \"" + b + "\"" ); - } - return target.substring( i_a + a.length(), i_b ).trim(); - } - - final static String extractTo( final String target, final String b ) { - final int i_b = target.indexOf( b ); - return target.substring( 0, i_b ).trim(); - } public static List getTaxonomiesFromCommonNameStrict( final String cn, final int max_taxonomies_return ) @@ -142,22 +122,40 @@ public final class SequenceDbWsTools { return null; } + public static SequenceDatabaseEntry obtainEmblEntry( final Accession acc ) throws IOException { + return obtainEmblEntry( acc, DEFAULT_LINES_TO_RETURN ); + } + public static SequenceDatabaseEntry obtainEmblEntry( final Accession acc, final int max_lines_to_return ) throws IOException { final List lines = queryEmblDb( acc, max_lines_to_return ); return EbiDbEntry.createInstanceFromPlainTextForRefSeq( lines ); } - public static SequenceDatabaseEntry obtainEmblEntry( final Accession acc ) throws IOException { - return obtainEmblEntry( acc, DEFAULT_LINES_TO_RETURN ); + public static SequenceDatabaseEntry obtainEntry( final String acc_str ) throws IOException { + if ( ForesterUtil.isEmpty( acc_str ) ) { + throw new IllegalArgumentException( "cannot not extract sequence db accessor from null or empty string" ); + } + final Accession acc = SequenceAccessionTools.parseAccessorFromString( acc_str ); + if ( acc == null ) { + throw new IllegalArgumentException( "could not extract acceptable sequence db accessor from \"" + acc_str + + "\"" ); + } + if ( acc.getSource().equals( Source.REFSEQ.toString() ) || acc.getSource().equals( Source.EMBL.toString() ) + || acc.getSource().equals( Source.NCBI.toString() ) ) { + return obtainEmblEntry( acc, DEFAULT_LINES_TO_RETURN ); + } + else if ( acc.getSource().equals( Source.UNIPROT.toString() ) ) { + return obtainUniProtEntry( acc.getValue(), DEFAULT_LINES_TO_RETURN ); + } + else { + throw new IllegalArgumentException( "don't know how to handle request for source \"" + acc.getSource() + + "\"" ); + } } - public final static Accession obtainSeqAccession( final PhylogenyNode node ) { - Accession acc = SequenceAccessionTools.obtainFromSeqAccession( node ); - if ( !isAccessionAcceptable( acc ) ) { - acc = SequenceAccessionTools.obtainAccessorFromDataFields( node ); - } - return acc; + public static SequenceDatabaseEntry obtainRefSeqEntryFromEmbl( final Accession acc ) throws IOException { + return obtainRefSeqEntryFromEmbl( acc, DEFAULT_LINES_TO_RETURN ); } public static SequenceDatabaseEntry obtainRefSeqEntryFromEmbl( final Accession acc, final int max_lines_to_return ) @@ -166,8 +164,12 @@ public final class SequenceDbWsTools { return EbiDbEntry.createInstanceFromPlainTextForRefSeq( lines ); } - public static SequenceDatabaseEntry obtainRefSeqEntryFromEmbl( final Accession acc ) throws IOException { - return obtainRefSeqEntryFromEmbl( acc, DEFAULT_LINES_TO_RETURN ); + public final static Accession obtainSeqAccession( final PhylogenyNode node ) { + Accession acc = SequenceAccessionTools.obtainFromSeqAccession( node ); + if ( !isAccessionAcceptable( acc ) ) { + acc = SequenceAccessionTools.obtainAccessorFromDataFields( node ); + } + return acc; } public final static void obtainSeqInformation( final boolean allow_to_set_taxonomic_data, @@ -191,10 +193,6 @@ public final class SequenceDbWsTools { obtainSeqInformation( allow_to_set_taxonomic_data, DEFAULT_LINES_TO_RETURN, not_found, node ); } - public final static void obtainSeqInformation( final PhylogenyNode node ) throws IOException { - obtainSeqInformation( true, DEFAULT_LINES_TO_RETURN, new TreeSet(), node ); - } - public final static SortedSet obtainSeqInformation( final Phylogeny phy, final boolean ext_nodes_only, final boolean allow_to_set_taxonomic_data, @@ -209,16 +207,20 @@ public final class SequenceDbWsTools { return not_found; } - public static SequenceDatabaseEntry obtainUniProtEntry( final String query, final int max_lines_to_return ) - throws IOException { - final List lines = queryUniprot( "uniprot/" + query + ".txt", max_lines_to_return ); - return UniProtEntry.createInstanceFromPlainText( lines ); + public final static void obtainSeqInformation( final PhylogenyNode node ) throws IOException { + obtainSeqInformation( true, DEFAULT_LINES_TO_RETURN, new TreeSet(), node ); } public static SequenceDatabaseEntry obtainUniProtEntry( final String query ) throws IOException { return obtainUniProtEntry( query, DEFAULT_LINES_TO_RETURN ); } + public static SequenceDatabaseEntry obtainUniProtEntry( final String query, final int max_lines_to_return ) + throws IOException { + final List lines = queryUniprot( "uniprot/" + query + ".txt", max_lines_to_return ); + return UniProtEntry.createInstanceFromPlainText( lines ); + } + public static List queryDb( final String query, int max_lines_to_return, final String base_url ) throws IOException { if ( ForesterUtil.isEmpty( query ) ) { @@ -255,13 +257,6 @@ public final class SequenceDbWsTools { return result; } - public static List queryEmblDbForRefSeqEntry( final Accession id, final int max_lines_to_return ) - throws IOException { - final StringBuilder url_sb = new StringBuilder(); - url_sb.append( EMBL_REFSEQ ); - return queryDb( id.getValue(), max_lines_to_return, url_sb.toString() ); - } - public static List queryEmblDb( final Accession id, final int max_lines_to_return ) throws IOException { final StringBuilder url_sb = new StringBuilder(); // url_sb.append( BASE_EMBL_DB_URL ); @@ -286,10 +281,37 @@ public final class SequenceDbWsTools { return queryDb( id.getValue(), max_lines_to_return, url_sb.toString() ); } + public static List queryEmblDbForRefSeqEntry( final Accession id, final int max_lines_to_return ) + throws IOException { + final StringBuilder url_sb = new StringBuilder(); + url_sb.append( EMBL_REFSEQ ); + return queryDb( id.getValue(), max_lines_to_return, url_sb.toString() ); + } + public static List queryUniprot( final String query, final int max_lines_to_return ) throws IOException { return queryDb( query, max_lines_to_return, BASE_UNIPROT_URL ); } + final static String extractFrom( final String target, final String a ) { + final int i_a = target.indexOf( a ); + return target.substring( i_a + a.length() ).trim(); + } + + final static String extractFromTo( final String target, final String a, final String b ) { + final int i_a = target.indexOf( a ); + final int i_b = target.indexOf( b ); + if ( ( i_a < 0 ) || ( i_b < i_a ) ) { + throw new IllegalArgumentException( "attempt to extract from \"" + target + "\" between \"" + a + + "\" and \"" + b + "\"" ); + } + return target.substring( i_a + a.length(), i_b ).trim(); + } + + final static String extractTo( final String target, final String b ) { + final int i_b = target.indexOf( b ); + return target.substring( 0, i_b ).trim(); + } + private static void addDataFromDbToNode( final boolean allow_to_set_taxonomic_data, final int lines_to_return, final SortedSet not_found, diff --git a/forester/java/src/org/forester/ws/seqdb/UniProtEntry.java b/forester/java/src/org/forester/ws/seqdb/UniProtEntry.java index 1345523..029c4b1 100644 --- a/forester/java/src/org/forester/ws/seqdb/UniProtEntry.java +++ b/forester/java/src/org/forester/ws/seqdb/UniProtEntry.java @@ -25,8 +25,9 @@ package org.forester.ws.seqdb; -import java.util.ArrayList; import java.util.List; +import java.util.SortedSet; +import java.util.TreeSet; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -51,9 +52,9 @@ public final class UniProtEntry implements SequenceDatabaseEntry { public final static Pattern PharmGKB_PATTERN = Pattern.compile( "PharmGKB;\\s+([0-9A-Z]+);" ); public final static Pattern Reactome_PATTERN = Pattern.compile( "Reactome;\\s+([0-9A-Z]+);\\s+([^\\.]+)" ); private String _ac; - private ArrayList _cross_references; + private SortedSet _cross_references; private String _gene_name; - private List _go_terms; + private SortedSet _go_terms; private String _name; private String _os_scientific_name; private String _symbol; @@ -73,7 +74,7 @@ public final class UniProtEntry implements SequenceDatabaseEntry { } @Override - public List getCrossReferences() { + public SortedSet getCrossReferences() { return _cross_references; } @@ -83,7 +84,7 @@ public final class UniProtEntry implements SequenceDatabaseEntry { } @Override - public List getGoTerms() { + public SortedSet getGoTerms() { return _go_terms; } @@ -123,14 +124,14 @@ public final class UniProtEntry implements SequenceDatabaseEntry { private void addCrossReference( final Accession accession ) { if ( _cross_references == null ) { - _cross_references = new ArrayList(); + _cross_references = new TreeSet(); } _cross_references.add( accession ); } private void addGoTerm( final BasicGoTerm g ) { if ( _go_terms == null ) { - _go_terms = new ArrayList(); + _go_terms = new TreeSet(); } _go_terms.add( g ); } @@ -290,7 +291,7 @@ public final class UniProtEntry implements SequenceDatabaseEntry { } @Override - public List getAnnotations() { + public SortedSet getAnnotations() { return null; } }