JAL-3224 JAL-3225 Fixed help image mangling, moved help to help/help and added this...
[jalview.git] / help / help / html / features / uniprotqueryfields.html
diff --git a/help/help/html/features/uniprotqueryfields.html b/help/help/html/features/uniprotqueryfields.html
new file mode 100644 (file)
index 0000000..182b206
--- /dev/null
@@ -0,0 +1,391 @@
+<html>
+<!--
+ * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
+ * Copyright (C) $$Year-Rel$$ The Jalview Authors
+ * 
+ * This file is part of Jalview.
+ * 
+ * Jalview is free software: you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License 
+ * as published by the Free Software Foundation, either version 3
+ * of the License, or (at your option) any later version.
+ *  
+ * Jalview is distributed in the hope that it will be useful, but 
+ * WITHOUT ANY WARRANTY; without even the implied warranty 
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR 
+ * PURPOSE.  See the GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with Jalview.  If not, see <http://www.gnu.org/licenses/>.
+ * The Jalview Authors are detailed in the 'AUTHORS' file.
+ -->
+<head>
+<title>UniProtKB query fields</title>
+</head>
+
+<body>
+  <p>
+    <strong>UniProtKB query fields</strong>
+  </p>
+  <p>
+    Supported query fields for searching specific data in UniProtKB (see
+    also <a href="uniprotsequencefetcher.html#text-search">query
+      syntax</a>).
+  </p>
+
+  <table border="1" width="95%">
+    <tr>
+      <th>Field</th>
+      <th>Example</th>
+      <th>Description</th>
+    </tr>
+    <tr>
+      <td>accession</td>
+      <td><code>accession:P62988</code></td>
+      <td>Lists all entries with the primary or secondary accession
+        number P62988.</td>
+    </tr>
+    <tr>
+      <td>active</td>
+      <td><code>active:no </code></td>
+      <td>Lists all obsolete entries.</td>
+    </tr>
+    <tr>
+      <td>annotation</td>
+      <td><code>
+          annotation:(type:non-positional) <br />
+          annotation:(type:positional) <br /> annotation:(type:mod_res
+          "Pyrrolidone carboxylic acid" evidence:experimental)
+        </code></td>
+      <td>Lists all entries with:
+        <ul>
+          <li>any general annotation (comments [CC])</li>
+          <li>any sequence annotation (features [FT])</li>
+          <li>at least one amino acid modified with a Pyrrolidone
+            carboxylic acid group</li>
+        </ul>
+      </td>
+    </tr>
+    <tr>
+      <td>author</td>
+      <td><code> author:ashburner </code></td>
+      <td>Lists all entries with at least one reference co-authored
+        by Michael Ashburner.</td>
+    </tr>
+    <tr>
+      <td>cdantigen</td>
+      <td><code> cdantigen:CD233 </code></td>
+      <td>Lists all entries whose cluster of differentiation number
+        is CD233.</td>
+    </tr>
+    <tr>
+      <td>citation</td>
+      <td><code>
+          citation:("intracellular structural proteins") <br />
+          citation:(author:ashburner journal:nature) citation:9169874
+        </code></td>
+      <td>Lists all entries with a literature citation:
+        <ul>
+          <li>containing the phrase "intracellular structural
+            proteins" in either title or abstract</li>
+          <li>co-authored by Michael Ashburner and published in
+            Nature</li>
+          <li>with the PubMed identifier 9169874</li>
+        </ul>
+      </td>
+    </tr>
+    <tr>
+      <td>cluster</td>
+      <td><code> cluster:UniRef90_A5YMT3 </code></td>
+      <td>Lists all entries in the UniRef 90% identity cluster
+        whose representative sequence is UniProtKB entry A5YMT3.</td>
+    </tr>
+    <tr>
+      <td>count</td>
+      <td><code>
+          annotation:(type:transmem count:5)<br />
+          annotation:(type:transmem count:[5 TO *])<br />
+          annotation:(type:cofactor count:[3 TO *])
+        </code></td>
+      <td>Lists all entries with:
+        <ul>
+          <li>exactly 5 transmembrane regions</li>
+          <li>5 or more transmembrane regions</li>
+          <li>3 or more Cofactor comments</li>
+        </ul>
+      </td>
+    </tr>
+    <tr>
+      <td>created</td>
+      <td><code>
+          created:[20121001 TO *]<br /> reviewed:yes AND
+          created:[current TO *]
+        </code></td>
+      <td>Lists all entries created since October 1st 2012.<br />
+        Lists all new UniProtKB/Swiss-Prot entries in the last release.
+      </td>
+    </tr>
+    <tr>
+      <td>database</td>
+      <td><code>
+          database:(type:pfam) <br /> database:(type:pdb 1aut)
+        </code></td>
+      <td>Lists all entries with:
+        <ul>
+          <li>a cross-reference to the Pfam database</li>
+          <li>a cross-reference to the PDB database entry 1aut</li>
+        </ul>
+
+      </td>
+    </tr>
+    <tr>
+      <td>domain</td>
+      <td><code> domain:VWFA </code></td>
+      <td>Lists all entries with a Von Willebrand factor type A
+        domain described in the 'Family and Domains' section.</td>
+    </tr>
+    <tr>
+      <td>ec</td>
+      <td><code> ec:3.2.1.23 </code></td>
+      <td>Lists all beta-galactosidases.</td>
+    </tr>
+    <tr>
+      <td>evidence</td>
+      <td><code>
+          annotation:(type:signal evidence:ECO_0000269)<br />
+          (type:mod_res phosphoserine evidence:ECO_0000269)<br />
+          annotation:(type:function AND evidence:ECO_0000255)
+        </code></td>
+      <td>Lists all entries with:
+        <ul>
+          <li>a signal sequence whose positions have been
+            experimentally proven</li>
+          <li>experimentally proven phosphoserine sites</li>
+          <li>a function manually asserted according to rules</li>
+        </ul>
+      </td>
+    </tr>
+    <tr>
+      <td>family</td>
+      <td><code> family:serpin </code></td>
+      <td>Lists all entries belonging to the Serpin family of
+        proteins.</td>
+    </tr>
+    <tr>
+      <td>fragment</td>
+      <td><code> fragment:yes </code></td>
+      <td>Lists all entries with an incomplete sequence.</td>
+    </tr>
+
+    <tr>
+      <td>gene</td>
+      <td><code> gene:HSPC233 </code></td>
+      <td>Lists all entries for proteins encoded by gene HSPC233.</td>
+    </tr>
+    <tr>
+      <td>go</td>
+      <td><code>
+          go:cytoskeleton <br /> go:0015629
+        </code></td>
+      <td>Lists all entries associated with:
+        <ul>
+          <li>a GO term containing the word "cytoskeleton"</li>
+          <li>the GO term Actin cytoskeleton and any subclasses</li>
+        </ul>
+      </td>
+    </tr>
+    <tr>
+      <td>host</td>
+      <td><code>
+          host:mouse <br /> host:10090 <br /> host:40674
+        </code></td>
+      <td>Lists all entries for viruses infecting:
+        <ul>
+          <li>organisms with a name containing the word "mouse"</li>
+          <li>Mus musculus (Mouse)</li>
+          <li>all mammals (all taxa classified under the taxonomy
+            node for Mammalia)</li>
+        </ul>
+      </td>
+    </tr>
+    <tr>
+      <td>id</td>
+      <td><code>id:P00750</code></td>
+      <td>Returns the entry with the primary accession number
+        P00750.</td>
+    </tr>
+    <tr>
+      <td>inn</td>
+      <td><code> inn:Anakinra </code></td>
+      <td>Lists all entries whose "International Nonproprietary
+        Name" is Anakinra.</td>
+    </tr>
+    <tr>
+      <td>interactor</td>
+      <td><code> interactor:P00520 </code></td>
+      <td>Lists all entries describing interactions with the
+        protein described by entry P00520.</td>
+    </tr>
+    <tr>
+      <td>keyword</td>
+      <td><code> keyword:toxin </code></td>
+      <td>Lists all entries associated with the keyword Toxin.</td>
+    </tr>
+    <tr>
+      <td>length</td>
+      <td><code> length:[500 TO 700] </code></td>
+      <td>Lists all entries describing sequences of length between
+        500 and 700 residues.</td>
+    </tr>
+    <tr>
+      <td>lineage</td>
+      <td />
+      <td>This field is a synonym for the field <code>taxonomy</code>.
+      </td>
+    </tr>
+    <tr>
+      <td>mass</td>
+      <td><code> mass:[500000 TO *] </code></td>
+      <td>Lists all entries describing sequences with a mass of at
+        least 500,000 Da.</td>
+    </tr>
+    <tr>
+      <td>method</td>
+      <td><code>
+          method:maldi <br /> method:xray
+        </code></td>
+      <td>Lists all entries for proteins identified by:
+        matrix-assisted laser desorption/ionization (MALDI),
+        crystallography (X-Ray). The <code>method</code> field searches
+        names of physico-chemical identification methods in the
+        'Biophysicochemical properties' subsection of the 'Function'
+        section, the 'Publications' and 'Cross-references' sections.
+      </td>
+    </tr>
+    <tr>
+      <td>mnemonic</td>
+      <td><code> mnemonic:ATP6_HUMAN </code></td>
+      <td>Lists all entries with entry name (ID) ATP6_HUMAN.
+        Searches also obsolete entry names.</td>
+    </tr>
+    <tr>
+      <td>modified</td>
+      <td><code>
+          modified:[20120101 TO 20120301]<br /> reviewed:yes AND
+          modified:[current TO *]
+        </code></td>
+      <td>Lists all entries that were last modified between January
+        and March 2012.<br /> Lists all UniProtKB/Swiss-Prot entries
+        modified in the last release.
+      </td>
+    </tr>
+    <tr>
+      <td>name</td>
+      <td><code> name:"prion protein" </code></td>
+      <td>Lists all entries for prion proteins.</td>
+    </tr>
+    <tr>
+      <td>organelle</td>
+      <td><code> organelle:Mitochondrion </code></td>
+      <td>Lists all entries for proteins encoded by a gene of the
+        mitochondrial chromosome.</td>
+    </tr>
+    <tr>
+      <td>organism</td>
+      <td><code>
+          organism:"Ovis aries" <br /> organism:9940 <br />
+          organism:sheep <br />
+        </code></td>
+      <td>Lists all entries for proteins expressed in sheep (first
+        2 examples) and organisms whose name contains the term "sheep".
+      </td>
+    </tr>
+
+    <tr>
+      <td>plasmid</td>
+      <td><code> plasmid:ColE1 </code></td>
+      <td>Lists all entries for proteins encoded by a gene of
+        plasmid ColE1.</td>
+    </tr>
+    <tr>
+      <td>proteome</td>
+      <td><code> proteome:UP000005640 </code></td>
+      <td>Lists all entries from the human proteome.</td>
+    </tr>
+    <tr>
+      <td>proteomecomponent</td>
+      <td><code> proteomecomponent:"chromosome 1" and
+          organism:9606 </code></td>
+      <td>Lists all entries from the human chromosome 1.</td>
+    </tr>
+    <tr>
+      <td>replaces</td>
+      <td><code> replaces:P02023 </code></td>
+      <td>Lists all entries that were created from a merge with
+        entry P02023.</td>
+    </tr>
+    <tr>
+      <td>reviewed</td>
+      <td><code> reviewed:yes </code></td>
+      <td>Lists all UniProtKB/Swiss-Prot entries.</td>
+    </tr>
+    <tr>
+      <td>scope</td>
+      <td><code> scope:mutagenesis </code></td>
+      <td>Lists all entries containing a reference that was used to
+        gather information about mutagenesis.</td>
+    </tr>
+    <tr>
+      <td>sequence</td>
+      <td><code> sequence:P05067-9 </code></td>
+      <td>Lists all entries containing a link to isoform 9 of the
+        sequence described in entry P05067. Allows searching by specific
+        sequence identifier.</td>
+    </tr>
+    <tr>
+      <td>sequence_modified</td>
+      <td><code>
+          sequence_modified:[20120101 TO 20120301]<br /> reviewed:yes
+          AND sequence_modified:[current TO *]
+        </code></td>
+      <td>Lists all entries whose sequences were last modified
+        between January and March 2012.<br /> Lists all
+        UniProtKB/Swiss-Prot entries whose sequences were modified in
+        the last release.
+      </td>
+    </tr>
+    <tr>
+      <td>source</td>
+      <td><code> source:intact </code></td>
+      <td>Lists all entries containing a GO term whose annotation
+        source is the IntAct database.</td>
+    </tr>
+    <tr>
+      <td>strain</td>
+      <td><code> strain:wistar </code></td>
+      <td>Lists all entries containing a reference relevant to
+        strain wistar.</td>
+    </tr>
+    <tr>
+      <td>taxonomy</td>
+      <td><code> taxonomy:40674 </code></td>
+      <td>Lists all entries for proteins expressed in Mammals. This
+        field is used to retrieve entries for all organisms classified
+        below a given taxonomic node taxonomy classification).</td>
+    </tr>
+    <tr>
+      <td>tissue</td>
+      <td><code> tissue:liver </code></td>
+      <td>Lists all entries containing a reference describing the
+        protein sequence obtained from a clone isolated from liver.</td>
+    </tr>
+    <tr>
+      <td>web</td>
+      <td><code> web:wikipedia </code></td>
+      <td>Lists all entries for proteins that are described in
+        Wikipedia.</td>
+    </tr>
+  </table>
+
+</body>
+</html>
\ No newline at end of file