From 735e346c658597baaa9194dfb160b294a1198eae Mon Sep 17 00:00:00 2001 From: Ben Soares Date: Thu, 21 Jul 2022 20:58:06 +0100 Subject: [PATCH] JAL-4036 New configuration, target and pagination process for 2022-07 new Uniprot API --- resources/fts/uniprot_data_columns-2022.txt | 356 ++++++++++++++++++++ .../fts/service/uniprot/UniProtFTSRestClient.java | 231 +++++++++++-- .../fts/service/uniprot/UniprotFTSPanel.java | 90 ++++- 3 files changed, 645 insertions(+), 32 deletions(-) create mode 100644 resources/fts/uniprot_data_columns-2022.txt diff --git a/resources/fts/uniprot_data_columns-2022.txt b/resources/fts/uniprot_data_columns-2022.txt new file mode 100644 index 0000000..7e51cd8 --- /dev/null +++ b/resources/fts/uniprot_data_columns-2022.txt @@ -0,0 +1,356 @@ +/* + * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) + * Copyright (C) $$Year-Rel$$ The Jalview Authors + * + * This file is part of Jalview. + * + * Jalview is free software: you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, either version 3 + * of the License, or (at your option) any later version. + * + * Jalview is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Jalview. If not, see . + * The Jalview Authors are detailed in the 'AUTHORS' file. + */ + +uniprot_data_columns +# +_group.id +_group.name +_group.sort_order +g3;Names & Taxonomy;1 +g6;Miscellaneous;6 +g7;Sequences;7 +g8;Function;8 +g9;Interaction;9 +g10;Expression;10 +g11;Gene Ontology (GO);11 +g12;Pathology & Biotech;12 +g13;Subcellular location;13 +g14;PTM / Processing;14 +g15;Structure;15 +g16;Publications;16 +g17;Date of;17 +g18;Family & Domains;18 +g19;2D Gel Databases;1000 +g20;3D Structure Databases;1000 +g21;Chemistry Databases;1000 +g22;Enzyme And Pathway Databases;1000 +g23;Family And Domain Databases;1000 +g24;Gene Expression Databases;1000 +g25;Genetic Variation Databases;1000 +g26;Genome Annotation Databases;1000 +g27;Miscellaneous Databases;1000 +g28;Organism-Specific Databases;1000 +g29;Phylogenomic Databases;1000 +g30;Protein Family/Group Databases;1000 +g31;Protein-Protein Interaction Databases;1000 +g32;Proteomic Databases;1000 +g33;Protocols And Materials Databases;1000 +g34;Ptm Databases;1000 +g35;Sequence Databases;1000 +# +_data_column.primary_key;id +_data_column.default_response_page_size;500 +# +_data_column.name +_data_column.code|_data_column.alt_code (optional: used for specifying search code when different from original code) +_data_column.group_id +_data_column.data_type +_data_column.min_col_width +_data_column.max_col_width +_data_column.preferred_col_width +_data_column.is_shown_by_default +_data_column.is_searchable +ALL;Search All;String;g7;50;1000;95;false;true +Entry;accession;String;g3;80;150;85;true;true +Entry name;id|accession_id;String;g3;100;150;105;true;true +Gene names;gene_names|gene;String;g3;100;1000;145;true;true +Gene names (primary);gene_primary;String;g3;50;1000;95;false;false +Gene names (synonym);gene_synonym;String;g3;50;1000;95;false;false +Gene names (ordered locus);gene_oln;String;g3;50;1000;95;false;false +Gene names (ORF);gene_orf;String;g3;50;1000;95;false;false +Organism;organism_name;String;g3;100;1000;200;true;true +Organism ID;organism_id;int;g3;60;100;80;false;true +Protein names;protein_name;String;g3;300;1500;500;true;true +Proteomes;xref_proteomes;String;g3;50;1000;95;false;false +Taxonomic lineage;lineage|taxonomy_name;String;g3;50;400;95;false;false +Taxonomic lineage (IDs);lineage_ids|taxonomy_id;String;g3;50;400;95;false;false +Virus hosts;virus_hosts|virus_host;String;g3;50;1000;95;false;true +Annotation;annotation_score;String;g6;50;1000;95;false;false +Caution;cc_caution;String;g6;50;1000;95;false;false +Comment Count;comment_count;String;g6;50;1000;95;false;false +Features;feature_count;String;g6;50;1000;95;false;false +Keyword ID;keywordid;String;g6;50;1000;95;false;false +Keywords;keyword;String;g6;50;1000;95;false;true +Miscellaneous [CC];cc_miscellaneous;String;g6;50;1000;95;false;false +Protein existence;protein_existence|existence;String;g6;50;1000;95;false;true +Reviewed;reviewed;String;g6;50;100;95;true;true +Tools;tools;String;g6;50;1000;95;false;false +UniParc;uniparc_id;String;g6;50;1000;95;false;false +Alternative products;cc_alternative_products;String;g7;50;1000;95;false;false +Alternative sequence;ft_var_seq;String;g7;50;1000;95;false;false +Erroneous gene model prediction;error_gmodel_pred;String;g7;50;1000;95;false;false +Fragment;fragment;String;g7;50;1000;95;false;false +Gene encoded by;organelle;String;g7;50;1000;95;false;false +Length;length;int|T|0;g7;50;100;65;true;true +Mass;mass;int|T|0;g7;50;100;80;false;true +Mass spectrometry;cc_mass_spectrometry;String;g7;50;1000;95;false;false +Natural variant;ft_variant;String;g7;50;1000;95;false;false +Non-adjacent residues;ft_non_cons;String;g7;50;1000;95;false;false +Non-standard residue;ft_non_std;String;g7;50;1000;95;false;false +Non-terminal residue;ft_non_ter;String;g7;50;1000;95;false;false +Polymorphism;cc_polymorphism;String;g7;50;1000;95;false;false +RNA editing;cc_rna_editing;String;g7;50;1000;95;false;false +Sequence;sequence;String;g7;50;1000;95;false;false +Sequence caution;cc_sequence_caution;String;g7;50;1000;95;false;false +Sequence conflict;ft_conflict;String;g7;50;1000;95;false;false +Sequence uncertainty;ft_unsure;String;g7;50;1000;95;false;false +Sequence version;sequence_version;String;g7;50;1000;95;false;false +Absorption;absorption;String;g8;50;1000;95;false;false +Active site;ft_act_site;String;g8;50;1000;95;false;false +Activity regulation;cc_activity_regulation;String;g8;50;1000;95;false;false +Binding site;ft_binding;String;g8;50;1000;95;false;false +Calcium binding;ft_ca_bind;String;g8;50;1000;95;false;false +Catalytic activity;cc_catalytic_activity;String;g8;50;1000;95;false;false +Cofactor;cc_cofactor;String;g8;50;1000;95;false;false +DNA binding;ft_dna_bind;String;g8;50;1000;95;false;false +EC number;ec;String;g8;50;1000;95;false;true +Function [CC];cc_function;String;g8;50;1000;95;false;false +Kinetics;kinetics;String;g8;50;1000;95;false;false +Metal binding;ft_metal;String;g8;50;1000;95;false;false +Nucleotide binding;ft_np_bind;String;g8;50;1000;95;false;false +Pathway;cc_pathway;String;g8;50;1000;95;false;false +pH dependence;ph_dependence;String;g8;50;1000;95;false;false +Redox potential;redox_potential;String;g8;50;1000;95;false;false +Rhea ID;rhea;String;g8;50;1000;95;false;false +Site;ft_site;String;g8;50;1000;95;false;false +Temperature dependence;temp_dependence;String;g8;50;1000;95;false;false +Interacts with;cc_interaction;String;g9;50;1000;95;false;false +Subunit structure[CC];cc_subunit;String;g9;50;1000;95;false;false +Developmental stage;cc_developmental_stage;String;g10;50;1000;95;false;false +Induction;cc_induction;String;g10;50;1000;95;false;false +Tissue specificity;cc_tissue_specificity;String;g10;50;1000;95;false;false +Gene ontology (biological process);go_p;String;g11;50;1000;95;false;false +Gene ontology (cellular component);go_c;String;g11;50;1000;95;false;false +Gene ontology (GO);go;String;g11;50;1000;95;false;true +Gene ontology (molecular function);go_f;String;g11;50;1000;95;false;false +Gene ontology IDs;go_id;String;g11;50;1000;95;false;false +Allergenic properties;cc_allergen;String;g12;50;1000;95;false;false +Biotechnological use;cc_biotechnology;String;g12;50;1000;95;false;false +Disruption phenotype;cc_disruption_phenotype;String;g12;50;1000;95;false;false +Involvement in disease;cc_disease;String;g12;50;1000;95;false;false +Mutagenesis;ft_mutagen;String;g12;50;1000;95;false;false +Pharmaceutical use;cc_pharmaceutical;String;g12;50;1000;95;false;false +Toxic dose;cc_toxic_dose;String;g12;50;1000;95;false;false +Intramembrane;ft_intramem;String;g13;50;1000;95;false;false +Subcellular location[CC];cc_subcellular_location;String;g13;50;1000;95;false;false +Topological domain;ft_topo_dom;String;g13;50;1000;95;false;false +Transmembrane;ft_transmem;String;g13;50;1000;95;false;false +Chain;ft_chain;String;g14;50;1000;95;false;false +Cross-link;ft_crosslnk;String;g14;50;1000;95;false;false +Disulfide bond;ft_disulfid;String;g14;50;1000;95;false;false +Glycosylation;ft_carbohyd;String;g14;50;1000;95;false;false +Initiator methionine;ft_init_met;String;g14;50;1000;95;false;false +Lipidation;ft_lipid;String;g14;50;1000;95;false;false +Modified residue;ft_mod_res;String;g14;50;1000;95;false;false +Peptide;ft_peptide;String;g14;50;1000;95;false;false +Post-translational modification;cc_ptm;String;g14;50;1000;95;false;false +Propeptide;ft_propep;String;g14;50;1000;95;false;false +Signal peptide;ft_signal;String;g14;50;1000;95;false;false +Transit peptide;ft_transit;String;g14;50;1000;95;false;false +3D;structure_3d;String;g15;50;1000;95;false;false +Beta strand;ft_strand;String;g15;50;1000;95;false;false +Helix;ft_helix;String;g15;50;1000;95;false;false +Turn;ft_turn;String;g15;50;1000;95;false;false +PubMed ID;lit_pubmed_id;String;g16;50;1000;95;false;false +Date of creation;date_created;String;g17;80;150;100;false;true +Date of last modification;date_modified;String;g17;80;150;100;false;true +Date of last sequence modification;date_sequence_modified;String;g17;80;150;100;false;true +Entry version;version;int;g17;80;100;80;false;false +Coiled coil;ft_coiled;String;g18;50;1000;95;false;false +Compositional bias;ft_compbias;String;g18;50;1000;95;false;false +Domain[CC];cc_domain;String;g18;80;1000;95;false;false +Domain[FT];ft_domain;String;g18;50;1000;95;false;false +Motif;ft_motif;String;g18;50;1000;95;false;false +Protein families;protein_families|family;String;g18;50;1000;95;false;true +Region;ft_region;String;g18;50;1000;95;false;false +Repeat;ft_repeat;String;g18;50;1000;95;false;false +Zinc finger;ft_zn_fing;String;g18;50;1000;95;false;false +COMPLUYEAST-2DPAGE;xref_compluyeast-2dpage;String;g19;50;1000;95;false;false +DOSAC-COBS-2DPAGE;xref_dosac-cobs-2dpage;String;g19;50;1000;95;false;false +OGP;xref_ogp;String;g19;50;1000;95;false;false +REPRODUCTION-2DPAGE;xref_reproduction-2dpage;String;g19;50;1000;95;false;false +SWISS-2DPAGE;xref_swiss-2dpage;String;g19;50;1000;95;false;false +UCD-2DPAGE;xref_ucd-2dpage;String;g19;50;1000;95;false;false +World-2DPAGE;xref_world-2dpage;String;g19;50;1000;95;false;false +AlphaFoldDB;xref_alphafolddb;String;g20;50;1000;95;false;false +BMRB;xref_bmrb;String;g20;50;1000;95;false;false +PCDDB;xref_pcddb;String;g20;50;1000;95;false;false +PDB;xref_pdb;String;g20;50;1000;95;false;false +PDBsum;xref_pdbsum;String;g20;50;1000;95;false;false +SASBDB;xref_sasbdb;String;g20;50;1000;95;false;false +SMR;xref_smr;String;g20;50;1000;95;false;false +BindingDB;xref_bindingdb;String;g21;50;1000;95;false;false +ChEMBL;xref_chembl;String;g21;50;1000;95;false;false +DrugBank;xref_drugbank;String;g21;50;1000;95;false;false +DrugCentral;xref_drugcentral;String;g21;50;1000;95;false;false +GuidetoPHARMACOLOGY;xref_guidetopharmacology;String;g21;50;1000;95;false;false +SwissLipids;xref_swisslipids;String;g21;50;1000;95;false;false +BRENDA;xref_brenda;String;g22;50;1000;95;false;false +BioCyc;xref_biocyc;String;g22;50;1000;95;false;false +PathwayCommons;xref_pathwaycommons;String;g22;50;1000;95;false;false +PlantReactome;xref_plantreactome;String;g22;50;1000;95;false;false +Reactome;xref_reactome;String;g22;50;1000;95;false;false +SABIO-RK;xref_sabio-rk;String;g22;50;1000;95;false;false +SIGNOR;xref_signor;String;g22;50;1000;95;false;false +SignaLink;xref_signalink;String;g22;50;1000;95;false;false +UniPathway;xref_unipathway;String;g22;50;1000;95;false;false +CDD;xref_cdd;String;g23;50;1000;95;false;false +DisProt;xref_disprot;String;g23;50;1000;95;false;false +Gene3D;xref_gene3d;String;g23;50;1000;95;false;false +HAMAP;xref_hamap;String;g23;50;1000;95;false;false +IDEAL;xref_ideal;String;g23;50;1000;95;false;false +InterPro;xref_interpro;String;g23;50;1000;95;false;false +PANTHER;xref_panther;String;g23;50;1000;95;false;false +PIRSF;xref_pirsf;String;g23;50;1000;95;false;false +PRINTS;xref_prints;String;g23;50;1000;95;false;false +PROSITE;xref_prosite;String;g23;50;1000;95;false;false +Pfam;xref_pfam;String;g23;50;1000;95;false;false +ProDom;xref_prodom;String;g23;50;1000;95;false;false +SFLD;xref_sfld;String;g23;50;1000;95;false;false +SMART;xref_smart;String;g23;50;1000;95;false;false +SUPFAM;xref_supfam;String;g23;50;1000;95;false;false +TIGRFAMs;xref_tigrfams;String;g23;50;1000;95;false;false +Bgee;xref_bgee;String;g24;50;1000;95;false;false +CleanEx;xref_cleanex;String;g24;50;1000;95;false;false +CollecTF;xref_collectf;String;g24;50;1000;95;false;false +ExpressionAtlas;xref_expressionatlas;String;g24;50;1000;95;false;false +Genevisible;xref_genevisible;String;g24;50;1000;95;false;false +BioMuta;xref_biomuta;String;g25;50;1000;95;false;false +DMDM;xref_dmdm;String;g25;50;1000;95;false;false +dbSNP;xref_dbsnp;String;g25;50;1000;95;false;false +Ensembl;xref_ensembl;String;g26;50;1000;95;false;false +EnsemblBacteria;xref_ensemblbacteria;String;g26;50;1000;95;false;false +EnsemblFungi;xref_ensemblfungi;String;g26;50;1000;95;false;false +EnsemblMetazoa;xref_ensemblmetazoa;String;g26;50;1000;95;false;false +EnsemblPlants;xref_ensemblplants;String;g26;50;1000;95;false;false +EnsemblProtists;xref_ensemblprotists;String;g26;50;1000;95;false;false +GeneID;xref_geneid;String;g26;50;1000;95;false;false +Gramene;xref_gramene;String;g26;50;1000;95;false;false +KEGG;xref_kegg;String;g26;50;1000;95;false;false +MANE-Select;xref_mane-select;String;g26;50;1000;95;false;false +PATRIC;xref_patric;String;g26;50;1000;95;false;false +UCSC;xref_ucsc;String;g26;50;1000;95;false;false +VectorBase;xref_vectorbase;String;g26;50;1000;95;false;false +WBParaSite;xref_wbparasite;String;g26;50;1000;95;false;false +WBParaSiteTranscriptProtein;xref_wbparasitetranscriptprotein;String;g26;50;1000;95;false;false +BioGRID-ORCS;xref_biogrid-orcs;String;g27;50;1000;95;false;false +ChiTaRS;xref_chitars;String;g27;50;1000;95;false;false +EvolutionaryTrace;xref_evolutionarytrace;String;g27;50;1000;95;false;false +GeneWiki;xref_genewiki;String;g27;50;1000;95;false;false +GenomeRNAi;xref_genomernai;String;g27;50;1000;95;false;false +PHI-base;xref_phi-base;String;g27;50;1000;95;false;false +PRO;xref_pro;String;g27;50;1000;95;false;false +Pharos;xref_pharos;String;g27;50;1000;95;false;false +RNAct;xref_rnact;String;g27;50;1000;95;false;false +ArachnoServer;xref_arachnoserver;String;g28;50;1000;95;false;false +Araport;xref_araport;String;g28;50;1000;95;false;false +CGD;xref_cgd;String;g28;50;1000;95;false;false +CTD;xref_ctd;String;g28;50;1000;95;false;false +ConoServer;xref_conoserver;String;g28;50;1000;95;false;false +DisGeNET;xref_disgenet;String;g28;50;1000;95;false;false +EchoBASE;xref_echobase;String;g28;50;1000;95;false;false +FlyBase;xref_flybase;String;g28;50;1000;95;false;false +GeneCards;xref_genecards;String;g28;50;1000;95;false;false +GeneReviews;xref_genereviews;String;g28;50;1000;95;false;false +HGNC;xref_hgnc;String;g28;50;1000;95;false;false +HPA;xref_hpa;String;g28;50;1000;95;false;false +LegioList;xref_legiolist;String;g28;50;1000;95;false;false +Leproma;xref_leproma;String;g28;50;1000;95;false;false +MGI;xref_mgi;String;g28;50;1000;95;false;false +MIM;xref_mim;String;g28;50;1000;95;false;false +MaizeGDB;xref_maizegdb;String;g28;50;1000;95;false;false +MalaCards;xref_malacards;String;g28;50;1000;95;false;false +NIAGADS;xref_niagads;String;g28;50;1000;95;false;false +OpenTargets;xref_opentargets;String;g28;50;1000;95;false;false +Orphanet;xref_orphanet;String;g28;50;1000;95;false;false +PharmGKB;xref_pharmgkb;String;g28;50;1000;95;false;false +PomBase;xref_pombase;String;g28;50;1000;95;false;false +PseudoCAP;xref_pseudocap;String;g28;50;1000;95;false;false +RGD;xref_rgd;String;g28;50;1000;95;false;false +SGD;xref_sgd;String;g28;50;1000;95;false;false +TAIR;xref_tair;String;g28;50;1000;95;false;false +TubercuList;xref_tuberculist;String;g28;50;1000;95;false;false +VEuPathDB;xref_veupathdb;String;g28;50;1000;95;false;false +VGNC;xref_vgnc;String;g28;50;1000;95;false;false +WormBase;xref_wormbase;String;g28;50;1000;95;false;false +Xenbase;xref_xenbase;String;g28;50;1000;95;false;false +ZFIN;xref_zfin;String;g28;50;1000;95;false;false +dictyBase;xref_dictybase;String;g28;50;1000;95;false;false +euHCVdb;xref_euhcvdb;String;g28;50;1000;95;false;false +neXtProt;xref_nextprot;String;g28;50;1000;95;false;false +GeneTree;xref_genetree;String;g29;50;1000;95;false;false +HOGENOM;xref_hogenom;String;g29;50;1000;95;false;false +InParanoid;xref_inparanoid;String;g29;50;1000;95;false;false +KO;xref_ko;String;g29;50;1000;95;false;false +OMA;xref_oma;String;g29;50;1000;95;false;false +OrthoDB;xref_orthodb;String;g29;50;1000;95;false;false +PhylomeDB;xref_phylomedb;String;g29;50;1000;95;false;false +TreeFam;xref_treefam;String;g29;50;1000;95;false;false +eggNOG;xref_eggnog;String;g29;50;1000;95;false;false +Allergome;xref_allergome;String;g30;50;1000;95;false;false +CAZy;xref_cazy;String;g30;50;1000;95;false;false +CLAE;xref_clae;String;g30;50;1000;95;false;false +ESTHER;xref_esther;String;g30;50;1000;95;false;false +IMGT_GENE-DB;xref_imgt_gene-db;String;g30;50;1000;95;false;false +MEROPS;xref_merops;String;g30;50;1000;95;false;false +MoonDB;xref_moondb;String;g30;50;1000;95;false;false +MoonProt;xref_moonprot;String;g30;50;1000;95;false;false +PeroxiBase;xref_peroxibase;String;g30;50;1000;95;false;false +REBASE;xref_rebase;String;g30;50;1000;95;false;false +TCDB;xref_tcdb;String;g30;50;1000;95;false;false +UniLectin;xref_unilectin;String;g30;50;1000;95;false;false +BioGRID;xref_biogrid;String;g31;50;1000;95;false;false +CORUM;xref_corum;String;g31;50;1000;95;false;false +ComplexPortal;xref_complexportal;String;g31;50;1000;95;false;false +DIP;xref_dip;String;g31;50;1000;95;false;false +ELM;xref_elm;String;g31;50;1000;95;false;false +IntAct;xref_intact;String;g31;50;1000;95;false;false +MINT;xref_mint;String;g31;50;1000;95;false;false +STRING;xref_string;String;g31;50;1000;95;false;false +CPTAC;xref_cptac;String;g32;50;1000;95;false;false +EPD;xref_epd;String;g32;50;1000;95;false;false +MassIVE;xref_massive;String;g32;50;1000;95;false;false +MaxQB;xref_maxqb;String;g32;50;1000;95;false;false +PRIDE;xref_pride;String;g32;50;1000;95;false;false +PaxDb;xref_paxdb;String;g32;50;1000;95;false;false +PeptideAtlas;xref_peptideatlas;String;g32;50;1000;95;false;false +ProMEX;xref_promex;String;g32;50;1000;95;false;false +ProteomicsDB;xref_proteomicsdb;String;g32;50;1000;95;false;false +TopDownProteomics;xref_topdownproteomics;String;g32;50;1000;95;false;false +jPOST;xref_jpost;String;g32;50;1000;95;false;false +ABCD;xref_abcd;String;g33;50;1000;95;false;false +Antibodypedia;xref_antibodypedia;String;g33;50;1000;95;false;false +CPTC;xref_cptc;String;g33;50;1000;95;false;false +DNASU;xref_dnasu;String;g33;50;1000;95;false;false +CarbonylDB;xref_carbonyldb;String;g34;50;1000;95;false;false +DEPOD;xref_depod;String;g34;50;1000;95;false;false +GlyConnect;xref_glyconnect;String;g34;50;1000;95;false;false +GlyGen;xref_glygen;String;g34;50;1000;95;false;false +MetOSite;xref_metosite;String;g34;50;1000;95;false;false +PhosphoSitePlus;xref_phosphositeplus;String;g34;50;1000;95;false;false +SwissPalm;xref_swisspalm;String;g34;50;1000;95;false;false +UniCarbKB;xref_unicarbkb;String;g34;50;1000;95;false;false +iPTMnet;xref_iptmnet;String;g34;50;1000;95;false;false +CCDS;xref_ccds;String;g35;50;1000;95;false;false +EMBL;xref_embl;String;g35;50;1000;95;false;false +PIR;xref_pir;String;g35;50;1000;95;false;false +RefSeq;xref_refseq;String;g35;50;1000;95;false;false +# diff --git a/src/jalview/fts/service/uniprot/UniProtFTSRestClient.java b/src/jalview/fts/service/uniprot/UniProtFTSRestClient.java index 2606b62..05ccba7 100644 --- a/src/jalview/fts/service/uniprot/UniProtFTSRestClient.java +++ b/src/jalview/fts/service/uniprot/UniProtFTSRestClient.java @@ -19,8 +19,37 @@ * The Jalview Authors are detailed in the 'AUTHORS' file. */ +/* + * 2022-07-20 bsoares + * See https://issues.jalview.org/browse/JAL-4036 + * The new Uniprot API is not dissimilar to the old one, but has some important changes. + * Some group names have changed slightly, some old groups have gone and there are quite a few new groups. + * + * Most changes are mappings of old column ids to new field ids. There are a handful of old + * columns not mapped to new fields, and new fields without an old column. + * [aside: not all possible columns were listed in the resources/fts/uniprot_data_columns.txt file. + * These were presumably additions after the file was created] + * For existing/mapped fields, the same preferences found in the resource file have been migrated to + * the new file with the new field name, id and group. + * + * The new mapped groups and files are stored and read from resources/fts/uniprot_data_columns-2022.txt. + * + * There is now no "sort" query string parameter. + * + * See https://www.uniprot.org/help/api_queries + * + * SIGNIFICANT CHANGE: Pagination is no longer performed using a record offset, but with a "cursor" + * query string parameter that is not really a cursor. The value is an opaque string that is passed (or + * rather a whole URL is passed) in the "Link" header of the HTTP response of the previous page. + * Where such a link is passed it is put into the cursors ArrayList. + * There are @Overridden methods in UniprotFTSPanel. + */ + package jalview.fts.service.uniprot; +import java.lang.invoke.MethodHandles; +import java.net.MalformedURLException; +import java.net.URL; import java.util.ArrayList; import java.util.Collection; import java.util.List; @@ -37,31 +66,37 @@ import jalview.bin.Cache; import jalview.bin.Console; import jalview.fts.api.FTSData; import jalview.fts.api.FTSDataColumnI; -import jalview.fts.api.FTSRestClientI; import jalview.fts.core.FTSRestClient; import jalview.fts.core.FTSRestRequest; import jalview.fts.core.FTSRestResponse; +import jalview.util.ChannelProperties; import jalview.util.MessageManager; import jalview.util.Platform; public class UniProtFTSRestClient extends FTSRestClient { - private static final String DEFAULT_UNIPROT_DOMAIN = "https://legacy.uniprot.org"; + private static final String DEFAULT_UNIPROT_DOMAIN = "https://rest.uniprot.org"; + + private static final String USER_AGENT = ChannelProperties + .getProperty("app_name", "Jalview") + " " + + Cache.getDefault("VERSION", "Unknown") + " " + + MethodHandles.lookup().lookupClass() + " help@jalview.org"; static { Platform.addJ2SDirectDatabaseCall(DEFAULT_UNIPROT_DOMAIN); } - private static FTSRestClientI instance = null; + private static UniProtFTSRestClient instance = null; public final String uniprotSearchEndpoint; public UniProtFTSRestClient() { super(); + this.clearCursors(); uniprotSearchEndpoint = Cache.getDefault("UNIPROT_DOMAIN", - DEFAULT_UNIPROT_DOMAIN) + "/uniprot/"; + DEFAULT_UNIPROT_DOMAIN) + "/uniprotkb/search"; } @SuppressWarnings("unchecked") @@ -69,6 +104,12 @@ public class UniProtFTSRestClient extends FTSRestClient public FTSRestResponse executeRequest(FTSRestRequest uniprotRestRequest) throws Exception { + return executeRequest(uniprotRestRequest, null); + } + + public FTSRestResponse executeRequest(FTSRestRequest uniprotRestRequest, + String cursor) throws Exception + { try { String wantedFields = getDataColumnsFieldsAsCommaDelimitedString( @@ -85,11 +126,10 @@ public class UniProtFTSRestClient extends FTSRestClient } else { - query = uniprotRestRequest.getFieldToSearchBy() - .equalsIgnoreCase("Search All") - ? uniprotRestRequest.getSearchTerm() - + " or mnemonic:" - + uniprotRestRequest.getSearchTerm() + query = uniprotRestRequest.getFieldToSearchBy().equalsIgnoreCase( + "Search All") ? uniprotRestRequest.getSearchTerm() + // + " or mnemonic:" + // + uniprotRestRequest.getSearchTerm() : uniprotRestRequest.getFieldToSearchBy() + ":" + uniprotRestRequest.getSearchTerm(); } @@ -119,18 +159,62 @@ public class UniProtFTSRestClient extends FTSRestClient WebResource webResource = null; webResource = client.resource(uniprotSearchEndpoint) - .queryParam("format", "tab") - .queryParam("columns", wantedFields) - .queryParam("limit", String.valueOf(responseSize)) - .queryParam("offset", String.valueOf(offSet)) - .queryParam("sort", "score").queryParam("query", query); - if (Console.isDebugEnabled()) + .queryParam("format", "tsv") + .queryParam("fields", wantedFields) + .queryParam("size", String.valueOf(responseSize)) + /* 2022 new api has no "sort" + * .queryParam("sort", "score") + */ + .queryParam("query", query); + if (offSet != 0 && cursor != null && cursor.length() > 0) + // 2022 new api does not do pagination with an offset, it requires a + // "cursor" parameter with a key (given for the next page). + // (see https://www.uniprot.org/help/pagination) { - Console.debug("Uniprot FTS Request: " + webResource.toString()); + webResource = webResource.queryParam("cursor", cursor); } + Console.debug( + "Uniprot FTS Request: " + webResource.getURI().toString()); // Execute the REST request - ClientResponse clientResponse = webResource - .accept(MediaType.TEXT_PLAIN).get(clientResponseClass); + WebResource.Builder wrBuilder = webResource + .accept(MediaType.TEXT_PLAIN); + if (!Platform.isJS()) + /** + * Java only + * + * @j2sIgnore + */ + { + wrBuilder.header("User-Agent", USER_AGENT); + } + ClientResponse clientResponse = wrBuilder.get(clientResponseClass); + + if (!Platform.isJS()) + /** + * Java only + * + * @j2sIgnore + */ + { + if (clientResponse.getHeaders().containsKey("Link")) + { + // extract the URL from the 'Link: ; ref="stuff"' header + String linkHeader = clientResponse.getHeaders().get("Link") + .get(0); + if (linkHeader.indexOf("<") > -1) + { + String temp = linkHeader.substring(linkHeader.indexOf("<") + 1); + if (temp.indexOf(">") > -1) + { + String nextUrl = temp.substring(0, temp.indexOf(">")); + // then get the cursor value from the query string parameters + String nextCursor = getQueryParam("cursor", nextUrl); + setCursor(cursorPage + 1, nextCursor); + } + } + } + } + String uniProtTabDelimittedResponseString = clientResponse .getEntity(String.class); // Make redundant objects eligible for garbage collection to conserve @@ -144,15 +228,26 @@ public class UniProtFTSRestClient extends FTSRestClient throw new Exception(errorMessage); } - int xTotalResults = Platform.isJS() ? 1 - : Integer.valueOf(clientResponse.getHeaders() - .get("X-Total-Results").get(0)); + // new Uniprot API is not including a "X-Total-Results" header when there + // are 0 results + List resultsHeaders = clientResponse.getHeaders() + .get("X-Total-Results"); + int xTotalResults = 0; + if (Platform.isJS()) + { + xTotalResults = 1; + } + else if (resultsHeaders != null && resultsHeaders.size() >= 1) + { + xTotalResults = Integer.valueOf(resultsHeaders.get(0)); + } clientResponse = null; client = null; return parseUniprotResponse(uniProtTabDelimittedResponseString, uniprotRestRequest, xTotalResults); } catch (Exception e) { + Console.debug("Exception caught from response", e); String exceptionMsg = e.getMessage(); if (exceptionMsg.contains("SocketException")) { @@ -352,7 +447,7 @@ public class UniProtFTSRestClient extends FTSRestClient }; } - public static FTSRestClientI getInstance() + public static UniProtFTSRestClient getInstance() { if (instance == null) { @@ -364,7 +459,95 @@ public class UniProtFTSRestClient extends FTSRestClient @Override public String getColumnDataConfigFileName() { - return "/fts/uniprot_data_columns.txt"; + return "/fts/uniprot_data_columns-2022.txt"; + } + + /* 2022-07-20 bsoares + * used for the new API "cursor" pagination. See https://www.uniprot.org/help/pagination + */ + private ArrayList cursors; + + private int cursorPage = 0; + + protected int getCursorPage() + { + return cursorPage; + } + + protected void setCursorPage(int i) + { + cursorPage = i; + } + + protected void setPrevCursorPage() + { + if (cursorPage > 0) + cursorPage--; + } + + protected void setNextCursorPage() + { + cursorPage++; + } + + protected void clearCursors() + { + cursors = new ArrayList(10); } -} + protected String getCursor(int i) + { + return cursors.get(i); + } + + protected String getNextCursor() + { + if (cursors.size() < cursorPage + 2) + return null; + return cursors.get(cursorPage + 1); + } + + protected String getPrevCursor() + { + if (cursorPage == 0) + return null; + return cursors.get(cursorPage - 1); + } + + protected void setCursor(int i, String c) + { + cursors.ensureCapacity(i + 1); + while (cursors.size() <= i) + { + cursors.add(null); + } + cursors.set(i, c); + Console.debug( + "Set UniprotFRSRestClient cursors[" + i + "] to '" + c + "'"); + // cursors.add(c); + } + + public static String getQueryParam(String param, String u) + { + if (param == null || u == null) + return null; + try + { + URL url = new URL(u); + String[] kevs = url.getQuery().split("&"); + for (int j = 0; j < kevs.length; j++) + { + String[] kev = kevs[j].split("=", 2); + if (param.equals(kev[0])) + { + return kev[1]; + } + } + } catch (MalformedURLException e) + { + // TODO Auto-generated catch block + e.printStackTrace(); + } + return null; + } +} \ No newline at end of file diff --git a/src/jalview/fts/service/uniprot/UniprotFTSPanel.java b/src/jalview/fts/service/uniprot/UniprotFTSPanel.java index 33ad8c4..0d9767c 100644 --- a/src/jalview/fts/service/uniprot/UniprotFTSPanel.java +++ b/src/jalview/fts/service/uniprot/UniprotFTSPanel.java @@ -21,6 +21,13 @@ package jalview.fts.service.uniprot; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; + +import javax.help.HelpSetException; + +import jalview.bin.Console; import jalview.fts.api.FTSDataColumnI; import jalview.fts.api.FTSRestClientI; import jalview.fts.core.FTSRestRequest; @@ -31,12 +38,6 @@ import jalview.gui.Help.HelpId; import jalview.gui.SequenceFetcher; import jalview.util.MessageManager; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Map; - -import javax.help.HelpSetException; - @SuppressWarnings("serial") public class UniprotFTSPanel extends GFTSPanel { @@ -69,10 +70,19 @@ public class UniprotFTSPanel extends GFTSPanel @Override public void searchAction(boolean isFreshSearch) { + searchAction(null, isFreshSearch); + } + + public void searchAction(String cursor, boolean isFreshSearch) + { mainFrame.requestFocusInWindow(); if (isFreshSearch) { offSet = 0; + UniProtFTSRestClient c = UniProtFTSRestClient.getInstance(); + c.clearCursors(); + c.setCursorPage(0); + c.setCursor(0, ""); } new Thread() { @@ -97,12 +107,12 @@ public class UniprotFTSPanel extends GFTSPanel request.setSearchTerm(searchTerm); request.setOffSet(offSet); request.setWantedFields(wantedFields); - FTSRestClientI uniProtRestClient = UniProtFTSRestClient + UniProtFTSRestClient uniProtRestClient = UniProtFTSRestClient .getInstance(); FTSRestResponse resultList; try { - resultList = uniProtRestClient.executeRequest(request); + resultList = uniProtRestClient.executeRequest(request, cursor); } catch (Exception e) { setErrorMessage(e.getMessage()); @@ -268,4 +278,68 @@ public class UniprotFTSPanel extends GFTSPanel e1.printStackTrace(); } } + + /* + * 2022-07-20 bsoares + * The new Uniprot API has a strange pagination process described at + * https://www.uniprot.org/help/pagination + * When a successful request returns results, with more results past the size + * limit, the response sends a "Link" header with a URL containing the a "cursor" + * parameter with an opaque string that refers to the next page of results. + * These are store as nextCursor in the UniProtFTSRestClient along with the currCursor. + * When navigation across pages occurs these should be swapped around. + */ + @Override + public void refreshPaginatorState() + { + UniProtFTSRestClient c = UniProtFTSRestClient.getInstance(); + setNextPageButtonEnabled(c.getNextCursor() != null); + setPrevPageButtonEnabled(c.getPrevCursor() != null); + } + + @Override + public void prevPageAction() + { + updatePaginatorCart(); + UniProtFTSRestClient c = UniProtFTSRestClient.getInstance(); + String prevCursor = c.getPrevCursor(); + if (prevCursor != null) + { + if (offSet >= pageLimit) + { + offSet -= pageLimit; + } + else + { + // not sure what's happening if we get here though it wouldn't surprise + // me + Console.warn( + "UniprotFTSPanel: prevCursor exists but offset < pageLimit. This probably shouldn't be happening."); + } + c.setPrevCursorPage(); + searchAction(prevCursor, false); + } + else + { + refreshPaginatorState(); + } + } + + @Override + public void nextPageAction() + { + UniProtFTSRestClient c = UniProtFTSRestClient.getInstance(); + String nextCursor = c.getNextCursor(); + if (nextCursor != null) + { + offSet += pageLimit; + c.setNextCursorPage(); + searchAction(nextCursor, false); + } + else + { + refreshPaginatorState(); + } + } + } -- 1.7.10.2