From: tcofoegbu Date: Wed, 18 May 2016 15:50:26 +0000 (+0100) Subject: merge X-Git-Tag: Release_2_10_0~220 X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=ca15c06c7c05af6f540fdd2a6f9a8fb76f8ef76d;hp=1861ba00ddfe2274a788ad03f82c44b6523ad8b7;p=jalview.git merge --- diff --git a/help/html/features/uniprotqueryfields.html b/help/html/features/uniprotqueryfields.html new file mode 100644 index 0000000..376180a --- /dev/null +++ b/help/html/features/uniprotqueryfields.html @@ -0,0 +1,599 @@ + + + +UniProtKB query fields + + + +

+ UniProtKB query fields +

+

Supported query fields for searching specific data in UniProtKB (see also query syntax).

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldExampleDescription
accession + accession:P62988 + + Lists all entries with the primary or secondary + accession number P62988. +
active + active:no + + Lists all obsolete entries. +
annotation + + annotation:(type:non-positional) +
+ annotation:(type:positional) +
+ annotation:(type:mod_res "Pyrrolidone carboxylic acid" evidence:experimental) +
+
+ Lists all entries with: +
    +
  • any general annotation (comments [CC])
  • +
  • any sequence annotation (features [FT])
  • +
  • at least one amino acid modified with a Pyrrolidone carboxylic acid group
  • +
+
author + + author:ashburner + + + Lists all entries with at least one reference co-authored by Michael Ashburner. +
cdantigen + + cdantigen:CD233 + + + Lists all entries whose cluster of differentiation number is CD233. +
citation + + citation:("intracellular structural proteins") +
+ citation:(author:ashburner journal:nature) + citation:9169874 +
+
+ Lists all entries with a literature citation: +
    +
  • containing the phrase "intracellular structural proteins" in either title or abstract
  • +
  • co-authored by Michael Ashburner and published in Nature
  • +
  • with the PubMed identifier 9169874
  • +
+
cluster + + cluster:UniRef90_A5YMT3 + + + Lists all entries in the UniRef 90% identity cluster whose + representative sequence is UniProtKB entry A5YMT3. +
count + + annotation:(type:transmem count:5)
+ annotation:(type:transmem count:[5 TO *])
+ annotation:(type:cofactor count:[3 TO *]) +
+
Lists all entries with: +
    +
  • exactly 5 transmembrane regions
  • +
  • 5 or more transmembrane regions
  • +
  • 3 or more Cofactor comments
  • +
+
created + + created:[20121001 TO *]
+ reviewed:yes AND created:[current TO *] +
+
+ Lists all entries created since October 1st 2012.
+ Lists all new UniProtKB/Swiss-Prot entries in the last release. +
database + + database:(type:pfam) +
+ database:(type:pdb 1aut) +
+
+ Lists all entries with: +
    +
  • a cross-reference to the Pfam database
  • +
  • a cross-reference to the PDB database entry 1aut
  • +
+ +
domain + + domain:VWFA + + + Lists all entries with a Von Willebrand factor type A domain described + in the 'Family and Domains' section. +
ec + + ec:3.2.1.23 + + + Lists all beta-galactosidases. +
evidence + + annotation:(type:signal evidence:ECO_0000269)
+ (type:mod_res phosphoserine evidence:ECO_0000269)
+ annotation:(type:function AND evidence:ECO_0000255) +
+
Lists all entries with: +
    +
  • a signal sequence whose positions have been experimentally proven
  • +
  • experimentally proven phosphoserine sites
  • +
  • a function manually asserted according to rules
  • +
+
family + + family:serpin + + + Lists all entries belonging to the Serpin family of proteins. +
fragment + + fragment:yes + + + Lists all entries with an incomplete sequence. +
gene + + gene:HSPC233 + + + Lists all entries for proteins encoded by gene HSPC233. +
go + + go:cytoskeleton +
+ go:0015629 +
+
+ Lists all entries associated with: +
    +
  • a GO term containing the word "cytoskeleton"
  • +
  • the GO term Actin cytoskeleton and any subclasses
  • +
+
host + + host:mouse +
+ host:10090 +
+ host:40674 +
+
+ Lists all entries for viruses infecting: +
    +
  • organisms with a name containing the word "mouse"
  • +
  • Mus musculus (Mouse)
  • +
  • all mammals (all taxa classified under the taxonomy node for Mammalia)
  • +
+
id + id:P00750 + + Returns the entry with the primary + accession number P00750. +
inn + + inn:Anakinra + + + Lists all entries whose "International Nonproprietary Name" is Anakinra. +
interactor + + interactor:P00520 + + + Lists all entries describing interactions with the protein described by + entry P00520. +
keyword + + keyword:toxin + + + Lists all entries associated with the keyword Toxin. +
length + + length:[500 TO 700] + + + Lists all entries describing sequences of length between 500 and 700 residues. +
lineage + + This field is a synonym for the field taxonomy. +
mass + + mass:[500000 TO *] + + + Lists all entries describing sequences with a mass of at least 500,000 Da. +
method + + method:maldi +
+ method:xray +
+
+ Lists all entries for proteins identified by: matrix-assisted laser + desorption/ionization (MALDI), crystallography (X-Ray). The + method field searches names of physico-chemical + identification methods in the 'Biophysicochemical properties' subsection of the 'Function' section, the 'Publications' and + 'Cross-references' sections. +
mnemonic + + mnemonic:ATP6_HUMAN + + + Lists all entries with entry name (ID) ATP6_HUMAN. Searches also + obsolete entry names. +
modified + + modified:[20120101 TO 20120301]
+ reviewed:yes AND modified:[current TO *] +
+
+ Lists all entries that were last modified between January and March 2012.
+ Lists all UniProtKB/Swiss-Prot entries modified in the last release. +
name + + name:"prion protein" + + + Lists all entries for prion proteins. +
organelle + + organelle:Mitochondrion + + + Lists all entries for proteins encoded by a gene of the mitochondrial + chromosome. +
organism + + organism:"Ovis aries" +
+ organism:9940 +
+ organism:sheep +
+
+
+ Lists all entries for proteins expressed in sheep (first 2 examples) and + organisms whose name contains the term "sheep". +
plasmid + + plasmid:ColE1 + + + Lists all entries for proteins encoded by a gene of plasmid ColE1. +
proteome + + proteome:UP000005640 + + + Lists all entries from the human proteome. +
proteomecomponent + + proteomecomponent:"chromosome 1" and organism:9606 + + + Lists all entries from the human chromosome 1. +
replaces + + replaces:P02023 + + + Lists all entries that were created from a merge with entry P02023. +
reviewed + + reviewed:yes + + + Lists all UniProtKB/Swiss-Prot entries. +
scope + + scope:mutagenesis + + + Lists all entries containing a reference that was used to gather + information about mutagenesis. +
sequence + + sequence:P05067-9 + + + Lists all entries containing a link to isoform 9 of the sequence + described in entry P05067. Allows searching by specific sequence + identifier. +
sequence_modified + + sequence_modified:[20120101 TO 20120301]
+ reviewed:yes AND sequence_modified:[current TO *] +
+
+ Lists all entries whose sequences were last modified between January and March 2012.
+ Lists all UniProtKB/Swiss-Prot entries whose sequences were modified in the last release. +
source + + source:intact + + + Lists all entries containing a GO term whose annotation source is the + IntAct database. +
strain + + strain:wistar + + + Lists all entries containing a reference relevant to strain wistar. +
taxonomy + + taxonomy:40674 + + + Lists all entries for proteins expressed in Mammals. This field is used to retrieve + entries for all organisms classified below a given taxonomic node taxonomy classification). +
tissue + + tissue:liver + + + Lists all entries containing a reference describing the protein sequence + obtained from a clone isolated from liver. +
web + + web:wikipedia + + + Lists all entries for proteins that are described in Wikipedia. +
+ + + \ No newline at end of file diff --git a/help/html/features/uniprotseqfetcher.png b/help/html/features/uniprotseqfetcher.png new file mode 100644 index 0000000..a592e8e Binary files /dev/null and b/help/html/features/uniprotseqfetcher.png differ diff --git a/help/html/features/uniprotsequencefetcher.html b/help/html/features/uniprotsequencefetcher.html new file mode 100644 index 0000000..55b4d71 --- /dev/null +++ b/help/html/features/uniprotsequencefetcher.html @@ -0,0 +1,161 @@ + + + +The Uniprot Free Text Search Interface + + + + The Uniprot Free Text Search Interface +

+ Jalview provides a specialised interface that allows fast and + efficient discovery and retrieval of data from the Uniprot database. + It allows + interactive querying of Uniprot metadata with free text and structured + queries, so sequences can be located without prior knowledge of + their database accessions, or via manual cross-referencing + from Uniprot or other bioinformatics websites. +

+

+ To open the UniProt Sequence Fetcher, select UniProt as the database from + any Sequence Fetcher dialog (opened via + "File →Fetch Sequences"). +

+

+ Uniprot sequence fetcher (introduced in Jalview 2.9.1) +

+ +

+ Searching the Uniprot Database +

+

+ To search the Uniprot, begin typing in the text box. The results of your + query are shown in the search results tab, which queries Uniprot after 1.5secs every time + you type in the search text box. You can sort results according to + the displayed columns, and select entries with the mouse or + keyboard. Once you have selected one or more entries, hit the OK + button to retrieve and visualise the sequences in Jalview Alignment interface. +

+ +

+ Result pagination +

+ The query results returned from the Uniprot server are paginated for performance optimisation. + The button labelled ' << ' and ' >> ' can be used to navigate to the next or previous result page respectively. + The page range is shown on the title bar of the Free Text Search interface. Jalview's pagination implementation supports multiple selection of entries across multiple pages. + + +

+ Customising The Uniprot Sequence Fetcher +

+

+ To change the displayed meta-data in the search result, click the + 'Customise Displayed Options' tab, and select the fields you'd like + to displayed or remove. +

+

+ The Uniprot Free Test Search Interface was introduced in + Jalview 2.9.1 +

+ + \ No newline at end of file diff --git a/src/jalview/fts/service/uniprot/UniProtFTSRestClient.java b/src/jalview/fts/service/uniprot/UniProtFTSRestClient.java index a4f82cf..dfdf715 100644 --- a/src/jalview/fts/service/uniprot/UniProtFTSRestClient.java +++ b/src/jalview/fts/service/uniprot/UniProtFTSRestClient.java @@ -63,22 +63,20 @@ public class UniProtFTSRestClient extends FTSRestClient : uniportRestRequest.getResponseSize(); int offSet = uniportRestRequest.getOffSet(); + String query; + if (isAdvancedQuery(uniportRestRequest.getSearchTerm())) + { + query = uniportRestRequest.getSearchTerm(); + } + else + { + query = uniportRestRequest.getFieldToSearchBy().equalsIgnoreCase( + "Search All") ? uniportRestRequest.getSearchTerm() + + " or mnemonic:" + uniportRestRequest.getSearchTerm() + : uniportRestRequest.getFieldToSearchBy() + ":" + + uniportRestRequest.getSearchTerm(); + } - String query = uniportRestRequest.getFieldToSearchBy() - .equalsIgnoreCase("Search All") ? uniportRestRequest - .getSearchTerm() - + " or mnemonic:" - + uniportRestRequest - .getSearchTerm() : uniportRestRequest.getFieldToSearchBy() - + ":" + uniportRestRequest.getSearchTerm(); - - // + (uniportRestRequest.isAllowUnpublishedEntries() ? "" - // : " AND status:REL"); - // System.out.println(">>>>> Query : " + query); - // System.out.println(">>>>> Columns : " + wantedFields); - // System.out.println(">>>>> Response size: " + responseSize - // + " offset : " - // + offSet); WebResource webResource = null; webResource = client.resource(UNIPROT_SEARCH_ENDPOINT) .queryParam("format", "tab") @@ -131,6 +129,17 @@ public class UniProtFTSRestClient extends FTSRestClient } } + public boolean isAdvancedQuery(String query) + { + if (query.contains(" AND ") || query.contains(" OR ") + || query.contains(" NOT ") || query.contains(" ! ") + || query.contains(" || ") || query.contains(" && ") + || query.contains(":") || query.contains("-")) + { + return true; + } + return false; + } public FTSRestResponse parseUniprotResponse( String uniProtTabDelimittedResponseString,