2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
23 import jalview.bin.Cache;
24 import jalview.datamodel.DBRefEntry;
25 import jalview.datamodel.DBRefSource;
26 import jalview.datamodel.SequenceFeature;
27 import jalview.datamodel.SequenceI;
28 import jalview.gui.AlignFrame;
29 import jalview.gui.Desktop;
30 import jalview.gui.FeatureSettings;
31 import jalview.gui.JvOptionPane;
32 import jalview.util.DBRefUtils;
33 import jalview.util.MessageManager;
34 import jalview.util.UrlLink;
35 import jalview.ws.dbsources.das.api.DasSourceRegistryI;
36 import jalview.ws.dbsources.das.api.jalviewSourceI;
38 import java.util.ArrayList;
39 import java.util.Arrays;
40 import java.util.Enumeration;
41 import java.util.HashMap;
42 import java.util.HashSet;
43 import java.util.Iterator;
44 import java.util.List;
47 import java.util.StringTokenizer;
48 import java.util.Vector;
50 import org.biodas.jdas.client.FeaturesClient;
51 import org.biodas.jdas.client.adapters.features.DasGFFAdapter;
52 import org.biodas.jdas.client.adapters.features.DasGFFAdapter.GFFAdapter;
53 import org.biodas.jdas.client.threads.FeaturesClientMultipleSources;
54 import org.biodas.jdas.schema.features.ERRORSEGMENT;
55 import org.biodas.jdas.schema.features.FEATURE;
56 import org.biodas.jdas.schema.features.LINK;
57 import org.biodas.jdas.schema.features.SEGMENT;
58 import org.biodas.jdas.schema.features.TYPE;
59 import org.biodas.jdas.schema.features.UNKNOWNFEATURE;
60 import org.biodas.jdas.schema.features.UNKNOWNSEGMENT;
61 import org.biodas.jdas.schema.sources.COORDINATES;
69 public class DasSequenceFeatureFetcher
71 SequenceI[] sequences;
75 FeatureSettings fsettings;
77 StringBuffer sbuffer = new StringBuffer();
79 List<jalviewSourceI> selectedSources;
81 boolean cancelled = false;
83 private void debug(String mesg)
88 private void debug(String mesg, Exception e)
90 if (Cache.log != null)
92 Cache.log.debug(mesg, e);
96 System.err.println(mesg);
106 private DasSourceRegistryI sourceRegistry;
108 private boolean useJDASMultiThread = true;
111 * Creates a new SequenceFeatureFetcher object. Uses default
118 public DasSequenceFeatureFetcher(SequenceI[] sequences,
119 FeatureSettings fsettings, Vector selectedSources)
121 this(sequences, fsettings, selectedSources, true, true, true);
124 public DasSequenceFeatureFetcher(SequenceI[] oursequences,
125 FeatureSettings fsettings, List<jalviewSourceI> selectedSources2,
126 boolean checkDbrefs, boolean promptFetchDbrefs)
128 this(oursequences, fsettings, selectedSources2, checkDbrefs,
129 promptFetchDbrefs, true);
132 public DasSequenceFeatureFetcher(SequenceI[] oursequences,
133 FeatureSettings fsettings, List<jalviewSourceI> selectedSources2,
134 boolean checkDbrefs, boolean promptFetchDbrefs,
135 boolean useJDasMultiThread)
137 this.useJDASMultiThread = useJDasMultiThread;
138 this.selectedSources = new ArrayList<>();
139 // filter both sequences and sources to eliminate duplicates
140 for (jalviewSourceI src : selectedSources2)
142 if (!selectedSources.contains(src))
144 selectedSources.add(src);
148 Vector sqs = new Vector();
149 for (int i = 0; i < oursequences.length; i++)
151 if (!sqs.contains(oursequences[i]))
153 sqs.addElement(oursequences[i]);
156 sequences = new SequenceI[sqs.size()];
157 for (int i = 0; i < sequences.length; i++)
159 sequences[i] = (SequenceI) sqs.elementAt(i);
161 if (fsettings != null)
163 this.fsettings = fsettings;
164 this.af = fsettings.af;
165 af.setShowSeqFeatures(true);
167 int uniprotCount = 0;
168 for (jalviewSourceI source : selectedSources)
170 for (COORDINATES coords : source.getVersion().getCOORDINATES())
172 // TODO: match UniProt coord system canonically (?) - does
173 // UniProt==uniprot==UNIPROT ?
174 if (coords.getAuthority().toLowerCase().equals("uniprot"))
183 for (int i = 0; i < sequences.length; i++)
185 DBRefEntry[] dbref = sequences[i].getDBRefs();
188 for (int j = 0; j < dbref.length; j++)
190 if (dbref[j].getSource().equals(DBRefSource.UNIPROT))
199 if (checkDbrefs && refCount < sequences.length && uniprotCount > 0)
202 int reply = JvOptionPane.YES_OPTION;
203 if (promptFetchDbrefs)
205 reply = JvOptionPane.showInternalConfirmDialog(Desktop.desktop,
206 MessageManager.getString(
207 "info.you_want_jalview_to_find_uniprot_accessions"),
209 .getString("label.find_uniprot_accession_ids"),
210 JvOptionPane.YES_NO_OPTION, JvOptionPane.QUESTION_MESSAGE);
213 if (reply == JvOptionPane.YES_OPTION)
215 Thread thread = new Thread(new FetchDBRefs(),
216 "FetchDBReferenceManager");
231 private void _startFetching()
234 new Thread(new FetchSeqFeatures(), "FetchSeqFeatures").start();
237 class FetchSeqFeatures implements Runnable
243 setGuiFetchComplete();
247 class FetchDBRefs implements Runnable
253 boolean isNucleotide = af.getViewport().getAlignment().isNucleotide();
254 new DBRefFetcher(sequences, af, null, af.featureSettings,
255 isNucleotide).fetchDBRefs(true);
258 setGuiFetchComplete();
263 * Spawns Fetcher threads to add features to sequences in the dataset
269 startTime = System.currentTimeMillis();
272 af.setProgressBar(MessageManager.getString(
273 "status.fetching_das_sequence_features"), startTime);
275 if (sourceRegistry == null)
277 sourceRegistry = Cache.getDasSourceRegistry();
279 if (selectedSources == null || selectedSources.size() == 0)
283 jalviewSourceI[] sources = sourceRegistry.getSources()
284 .toArray(new jalviewSourceI[0]);
285 String active = Cache.getDefault("DAS_ACTIVE_SOURCE", "uniprot");
286 StringTokenizer st = new StringTokenizer(active, "\t");
287 selectedSources = new Vector();
289 while (st.hasMoreTokens())
291 token = st.nextToken();
292 for (int i = 0; i < sources.length; i++)
294 if (sources[i].getTitle().equals(token))
296 selectedSources.add(sources[i]);
301 } catch (Exception ex)
303 debug("Exception whilst setting default feature sources from registry and local preferences.",
308 if (selectedSources == null || selectedSources.size() == 0)
310 System.out.println("No DAS Sources active");
312 setGuiNoDassourceActive();
316 sourcesRemaining = selectedSources.size();
317 FeaturesClientMultipleSources fc = new FeaturesClientMultipleSources();
318 fc.setConnProps(sourceRegistry.getSessionHandler());
319 // Now sending requests one at a time to each server
320 ArrayList<jalviewSourceI> srcobj = new ArrayList<>();
321 ArrayList<String> src = new ArrayList<>();
322 List<List<String>> ids = new ArrayList<>();
323 List<List<DBRefEntry>> idobj = new ArrayList<>();
324 List<Map<String, SequenceI>> sqset = new ArrayList<>();
325 for (jalviewSourceI _sr : selectedSources)
328 Map<String, SequenceI> slist = new HashMap<>();
329 List<DBRefEntry> idob = new ArrayList<>();
330 List<String> qset = new ArrayList<>();
332 for (SequenceI seq : sequences)
334 Object[] idset = nextSequence(_sr, seq);
337 List<DBRefEntry> _idob = (List<DBRefEntry>) idset[0];
338 List<String> _qset = (List<String>) idset[1];
339 if (_idob.size() > 0)
341 // add sequence's ref for each id derived from it
342 // (space inefficient, but most unambiguous)
343 // could replace with hash with _qset values as keys.
344 Iterator<DBRefEntry> dbobj = _idob.iterator();
345 for (String q : _qset)
347 SequenceI osq = slist.get(q);
348 DBRefEntry dr = dbobj.next();
349 if (osq != null && osq != seq)
351 // skip - non-canonical query
366 src.add(_sr.getSourceURL());
372 Map<String, Map<List<String>, Exception>> errors = new HashMap<>();
373 Map<String, Map<List<String>, DasGFFAdapter>> results = new HashMap<>();
374 if (!useJDASMultiThread)
376 Iterator<String> sources = src.iterator();
377 // iterate over each query for each source and do each one individually
378 for (List<String> idl : ids)
380 String source = sources.next();
381 FeaturesClient featuresc = new FeaturesClient(
382 sourceRegistry.getSessionHandler()
383 .getConnectionPropertyProviderFor(source));
384 for (String id : idl)
386 List<String> qid = Arrays.asList(new String[] { id });
389 DasGFFAdapter dga = featuresc.fetchData(source, qid);
390 Map<List<String>, DasGFFAdapter> ers = results.get(source);
394 ers = new HashMap<>());
397 } catch (Exception ex)
399 Map<List<String>, Exception> ers = errors.get(source);
403 ers = new HashMap<>());
412 // pass them all at once
413 fc.fetchData(src, ids, false, results, errors);
415 while (!fc.isTerminated())
420 } catch (InterruptedException x)
426 Iterator<List<String>> idset = ids.iterator();
427 Iterator<List<DBRefEntry>> idobjset = idobj.iterator();
428 Iterator<Map<String, SequenceI>> seqset = sqset.iterator();
429 for (jalviewSourceI source : srcobj)
431 processResponse(seqset.next(), source, idset.next(), idobjset.next(),
432 results.get(source.getSourceURL()),
433 errors.get(source.getSourceURL()));
437 private void processResponse(Map<String, SequenceI> sequencemap,
438 jalviewSourceI jvsource, List<String> ids, List<DBRefEntry> idobj,
439 Map<List<String>, DasGFFAdapter> results,
440 Map<List<String>, Exception> errors)
442 Set<SequenceI> sequences = new HashSet<>();
443 String source = jvsource.getSourceURL();
445 DasGFFAdapter result = (results == null) ? null : results.get(ids);
446 Exception error = (errors == null) ? null : errors.get(ids);
449 debug("das source " + source + " could not be contacted. "
450 + (error == null ? "" : error.toString()));
455 GFFAdapter gff = result.getGFF();
456 List<SEGMENT> segments = gff.getSegments();
457 List<ERRORSEGMENT> errorsegs = gff.getErrorSegments();
458 List<UNKNOWNFEATURE> unkfeats = gff.getUnknownFeatures();
459 List<UNKNOWNSEGMENT> unksegs = gff.getUnknownSegments();
460 debug("das source " + source + " returned " + gff.getTotal()
461 + " responses. " + (errorsegs != null ? errorsegs.size() : 0)
462 + " were incorrect segment queries, "
463 + (unkfeats != null ? unkfeats.size() : 0)
464 + " were unknown features "
465 + (unksegs != null ? unksegs.size() : 0)
466 + " were unknown segments and "
467 + (segments != null ? segments.size() : 0)
468 + " were segment responses.");
469 Iterator<DBRefEntry> dbr = idobj.iterator();
470 if (segments != null)
472 for (SEGMENT seg : segments)
474 String id = seg.getId();
475 if (ids.indexOf(id) == -1)
477 id = id.toUpperCase();
479 DBRefEntry dbref = idobj.get(ids.indexOf(id));
480 SequenceI sequence = sequencemap.get(id);
481 boolean added = false;
482 sequences.add(sequence);
484 for (FEATURE feat : seg.getFEATURE())
486 // standard DAS feature-> jalview sequence feature transformation
487 SequenceFeature f = newSequenceFeature(feat,
488 jvsource.getTitle());
489 if (!parseSeqFeature(sequence, f, feat, jvsource))
491 if (dbref.getMap() != null && f.getBegin() > 0
494 debug("mapping from " + f.getBegin() + " - " + f.getEnd());
495 SequenceFeature vf[] = null;
499 vf = dbref.getMap().locateFeature(f);
500 } catch (Exception ex)
503 "Error in 'experimental' mapping of features. Please try to reproduce and then report info to jalview-discuss@jalview.org.");
504 Cache.log.warn("Mapping feature from " + f.getBegin()
505 + " to " + f.getEnd() + " in dbref "
506 + dbref.getAccessionId() + " in "
507 + dbref.getSource());
508 Cache.log.warn("using das Source " + source);
509 Cache.log.warn("Exception", ex);
514 for (int v = 0; v < vf.length; v++)
516 debug("mapping to " + v + ": " + vf[v].getBegin()
517 + " - " + vf[v].getEnd());
518 sequence.addSequenceFeature(vf[v]);
524 sequence.addSequenceFeature(f);
529 featuresAdded(sequences);
533 // System.out.println("No features found for " + seq.getName()
534 // + " from: " + e.getDasSource().getNickname());
539 private void setGuiNoDassourceActive()
545 MessageManager.getString("status.no_das_sources_active"),
548 if (getFeatSettings() != null)
550 fsettings.noDasSourceActive();
555 * Update our fsettings dialog reference if we didn't have one when we were
560 private FeatureSettings getFeatSettings()
562 if (fsettings == null)
566 fsettings = af.featureSettings;
576 af.setProgressBar(MessageManager.getString(
577 "status.das_feature_fetching_cancelled"), startTime);
582 int sourcesRemaining = 0;
584 private boolean running = false;
586 private void setGuiFetchComplete()
589 if (!cancelled && af != null)
591 // only update the progress bar if we've completed the fetch normally
592 af.setProgressBar(MessageManager.getString(
593 "status.das_feature_fetching_complete"), startTime);
596 if (af != null && af.featureSettings != null)
598 af.featureSettings.discoverAllFeatureData();
601 if (getFeatSettings() != null)
603 fsettings.complete();
607 void featuresAdded(Set<SequenceI> seqs)
611 // no gui to update with features.
614 af.getFeatureRenderer().featuresAdded();
616 int start = af.getViewport().getRanges().getStartSeq();
617 int end = af.getViewport().getRanges().getEndSeq();
619 for (index = start; index < end; index++)
621 for (SequenceI seq : seqs)
623 if (seq == af.getViewport().getAlignment().getSequenceAt(index)
624 .getDatasetSequence())
626 af.alignPanel.paintAlignment(true, true);
634 Object[] nextSequence(jalviewSourceI dasSource, SequenceI seq)
640 DBRefEntry[] uprefs = DBRefUtils.selectRefs(seq.getDBRefs(),
643 // jalview.datamodel.DBRefSource.PDB,
645 // jalview.datamodel.DBRefSource.EMBL - not tested on any EMBL coord
648 // TODO: minimal list of DAS queries to make by querying with untyped ID if
649 // distinct from any typed IDs
651 List<DBRefEntry> ids = new ArrayList<>();
652 List<String> qstring = new ArrayList<>();
653 boolean dasCoordSysFound = false;
657 // do any of these ids match the source's coordinate system ?
658 for (int j = 0; !dasCoordSysFound && j < uprefs.length; j++)
661 for (COORDINATES csys : dasSource.getVersion().getCOORDINATES())
663 if (DBRefUtils.isDasCoordinateSystem(csys.getAuthority(),
666 debug("Launched fetcher for coordinate system "
667 + csys.getAuthority());
668 // Will have to pass any mapping information to the fetcher
669 // - the start/end for the DBRefEntry may not be the same as the
670 // sequence's start/end
673 seq.getName() + " " + (seq.getDatasetSequence() == null)
674 + " " + csys.getUri());
676 dasCoordSysFound = true; // break's out of the loop
678 qstring.add(uprefs[j].getAccessionId());
682 System.out.println("IGNORE " + csys.getAuthority());
688 if (!dasCoordSysFound)
691 // try and use the name as the sequence id
692 if (seq.getName().indexOf("|") > -1)
694 id = seq.getName().substring(seq.getName().lastIndexOf("|") + 1);
695 if (id.trim().length() < 4)
697 // hack - we regard a significant ID as being at least 4
698 // non-whitespace characters
699 id = seq.getName().substring(0, seq.getName().lastIndexOf("|"));
700 if (id.indexOf("|") > -1)
702 id = id.substring(id.lastIndexOf("|") + 1);
712 DBRefEntry dbre = new DBRefEntry();
713 dbre.setAccessionId(id);
714 // Should try to call a general feature fetcher that
715 // queries many sources with name to discover applicable ID references
717 qstring.add(dbre.getAccessionId());
721 return new Object[] { ids, qstring };
725 * examine the given sequence feature to determine if it should actually be
726 * turned into sequence annotation or database cross references rather than a
727 * simple sequence feature.
730 * the sequence to annotate
732 * the jalview sequence feature generated from the DAS feature
734 * the sequence feature attributes
736 * the source that emitted the feature
737 * @return true if feature was consumed as another kind of annotation.
739 protected boolean parseSeqFeature(SequenceI seq, SequenceFeature f,
740 FEATURE feature, jalviewSourceI source)
742 SequenceI mseq = seq;
743 while (seq.getDatasetSequence() != null)
745 seq = seq.getDatasetSequence();
747 if (f.getType() != null)
749 String type = f.getType();
750 if (type.equalsIgnoreCase("protein_name"))
752 // parse name onto the alignment sequence or the dataset sequence.
753 if (seq.getDescription() == null
754 || seq.getDescription().trim().length() == 0)
756 // could look at the note series to pick out the first long name, for
757 // the moment just use the whole description string
758 seq.setDescription(f.getDescription());
760 if (mseq.getDescription() == null
761 || mseq.getDescription().trim().length() == 0)
763 // could look at the note series to pick out the first long name, for
764 // the moment just use the whole description string
765 mseq.setDescription(f.getDescription());
769 // check if source has biosapiens or other sequence ontology label
770 if (type.equalsIgnoreCase("DBXREF") || type.equalsIgnoreCase("DBREF"))
772 // try to parse the accession out
774 DBRefEntry dbr = new DBRefEntry();
775 dbr.setVersion(source.getTitle());
776 StringTokenizer st = new StringTokenizer(f.getDescription(), ":");
777 if (st.hasMoreTokens())
779 dbr.setSource(st.nextToken());
781 if (st.hasMoreTokens())
783 dbr.setAccessionId(st.nextToken());
787 if (f.links != null && f.links.size() > 0)
789 // feature is also appended to enable links to be seen.
790 // TODO: consider extending dbrefs to have their own links ?
791 // TODO: new feature: extract dbref links from DAS servers and add the
792 // URL pattern to the list of DB name associated links in the user's
794 // for the moment - just fix up the existing feature so it displays
796 // f.setType(dbr.getSource());
797 // f.setDescription();
798 f.setValue("linkonly", Boolean.TRUE);
799 // f.setDescription("");
800 Vector newlinks = new Vector();
801 Enumeration it = f.links.elements();
802 while (it.hasMoreElements())
805 UrlLink urllink = new UrlLink(elm = (String) it.nextElement());
806 if (urllink.isValid())
808 urllink.setLabel(f.getDescription());
809 newlinks.addElement(urllink.toString());
813 // couldn't parse the link properly. Keep it anyway - just in
815 debug("couldn't parse link string - " + elm);
816 newlinks.addElement(elm);
820 seq.addSequenceFeature(f);
829 * creates a jalview sequence feature from a das feature document
832 * @return sequence feature object created using dasfeature information
834 SequenceFeature newSequenceFeature(FEATURE feat, String nickname)
843 * Different qNames for a DAS Feature - are string keys to the HashMaps in
844 * features "METHOD") || qName.equals("TYPE") || qName.equals("START") ||
845 * qName.equals("END") || qName.equals("NOTE") || qName.equals("LINK") ||
846 * qName.equals("SCORE")
848 String desc = new String();
849 if (feat.getNOTE() != null)
851 for (String note : feat.getNOTE())
857 int start = 0, end = 0;
862 start = Integer.parseInt(feat.getSTART().toString());
863 } catch (Exception ex)
868 end = Integer.parseInt(feat.getEND().toString());
869 } catch (Exception ex)
874 Object scr = feat.getSCORE();
877 score = (float) Double.parseDouble(scr.toString());
880 } catch (Exception ex)
884 SequenceFeature f = new SequenceFeature(getTypeString(feat.getTYPE()),
885 desc, start, end, score, nickname);
887 if (feat.getLINK() != null)
889 for (LINK link : feat.getLINK())
891 // Do not put feature extent in link text for non-positional features
892 if (f.begin == 0 && f.end == 0)
894 f.addLink(f.getType() + " " + link.getContent() + "|"
899 f.addLink(f.getType() + " " + f.begin + "_" + f.end + " "
900 + link.getContent() + "|" + link.getHref());
906 } catch (Exception e)
908 System.out.println("ERRR " + e);
910 System.out.println("############");
911 debug("Failed to parse " + feat.toString(), e);
916 private String getTypeString(TYPE type)
918 return type.getContent();
921 public boolean isRunning()