2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
23 import jalview.bin.Cache;
24 import jalview.datamodel.DBRefEntry;
25 import jalview.datamodel.SequenceFeature;
26 import jalview.datamodel.SequenceI;
27 import jalview.gui.AlignFrame;
28 import jalview.gui.Desktop;
29 import jalview.gui.FeatureSettings;
30 import jalview.util.MessageManager;
31 import jalview.util.UrlLink;
32 import jalview.ws.dbsources.das.api.DasSourceRegistryI;
33 import jalview.ws.dbsources.das.api.jalviewSourceI;
35 import java.util.ArrayList;
36 import java.util.Arrays;
37 import java.util.Enumeration;
38 import java.util.HashMap;
39 import java.util.HashSet;
40 import java.util.Iterator;
41 import java.util.List;
44 import java.util.StringTokenizer;
45 import java.util.Vector;
47 import javax.swing.JOptionPane;
49 import org.biodas.jdas.client.FeaturesClient;
50 import org.biodas.jdas.client.adapters.features.DasGFFAdapter;
51 import org.biodas.jdas.client.adapters.features.DasGFFAdapter.GFFAdapter;
52 import org.biodas.jdas.client.threads.FeaturesClientMultipleSources;
53 import org.biodas.jdas.schema.features.ERRORSEGMENT;
54 import org.biodas.jdas.schema.features.FEATURE;
55 import org.biodas.jdas.schema.features.LINK;
56 import org.biodas.jdas.schema.features.SEGMENT;
57 import org.biodas.jdas.schema.features.TYPE;
58 import org.biodas.jdas.schema.features.UNKNOWNFEATURE;
59 import org.biodas.jdas.schema.features.UNKNOWNSEGMENT;
60 import org.biodas.jdas.schema.sources.COORDINATES;
68 public class DasSequenceFeatureFetcher
70 SequenceI[] sequences;
74 FeatureSettings fsettings;
76 StringBuffer sbuffer = new StringBuffer();
78 List<jalviewSourceI> selectedSources;
80 boolean cancelled = false;
82 private void debug(String mesg)
87 private void debug(String mesg, Exception e)
89 if (Cache.log != null)
91 Cache.log.debug(mesg, e);
95 System.err.println(mesg);
105 private DasSourceRegistryI sourceRegistry;
107 private boolean useJDASMultiThread = true;
110 * Creates a new SequenceFeatureFetcher object. Uses default
117 public DasSequenceFeatureFetcher(SequenceI[] sequences,
118 FeatureSettings fsettings, Vector selectedSources)
120 this(sequences, fsettings, selectedSources, true, true, true);
123 public DasSequenceFeatureFetcher(SequenceI[] oursequences,
124 FeatureSettings fsettings, List<jalviewSourceI> selectedSources2,
125 boolean checkDbrefs, boolean promptFetchDbrefs)
127 this(oursequences, fsettings, selectedSources2, checkDbrefs,
128 promptFetchDbrefs, true);
131 public DasSequenceFeatureFetcher(SequenceI[] oursequences,
132 FeatureSettings fsettings, List<jalviewSourceI> selectedSources2,
133 boolean checkDbrefs, boolean promptFetchDbrefs,
134 boolean useJDasMultiThread)
136 this.useJDASMultiThread = useJDasMultiThread;
137 this.selectedSources = new ArrayList<jalviewSourceI>();
138 // filter both sequences and sources to eliminate duplicates
139 for (jalviewSourceI src : selectedSources2)
141 if (!selectedSources.contains(src))
143 selectedSources.add(src);
147 Vector sqs = new Vector();
148 for (int i = 0; i < oursequences.length; i++)
150 if (!sqs.contains(oursequences[i]))
152 sqs.addElement(oursequences[i]);
155 sequences = new SequenceI[sqs.size()];
156 for (int i = 0; i < sequences.length; i++)
158 sequences[i] = (SequenceI) sqs.elementAt(i);
160 if (fsettings != null)
162 this.fsettings = fsettings;
163 this.af = fsettings.af;
164 af.setShowSeqFeatures(true);
166 int uniprotCount = 0;
167 for (jalviewSourceI source : selectedSources)
169 for (COORDINATES coords : source.getVersion().getCOORDINATES())
171 // TODO: match UniProt coord system canonically (?) - does
172 // UniProt==uniprot==UNIPROT ?
173 if (coords.getAuthority().toLowerCase().equals("uniprot"))
182 for (int i = 0; i < sequences.length; i++)
184 DBRefEntry[] dbref = sequences[i].getDBRefs();
187 for (int j = 0; j < dbref.length; j++)
189 if (dbref[j].getSource().equals(
190 jalview.datamodel.DBRefSource.UNIPROT))
199 if (checkDbrefs && refCount < sequences.length && uniprotCount > 0)
202 int reply = JOptionPane.YES_OPTION;
203 if (promptFetchDbrefs)
206 .showInternalConfirmDialog(
209 .getString("info.you_want_jalview_to_find_uniprot_accessions"),
211 .getString("label.find_uniprot_accession_ids"),
212 JOptionPane.YES_NO_OPTION,
213 JOptionPane.QUESTION_MESSAGE);
216 if (reply == JOptionPane.YES_OPTION)
218 Thread thread = new Thread(new FetchDBRefs());
233 private void _startFetching()
236 new Thread(new FetchSeqFeatures()).start();
239 class FetchSeqFeatures implements Runnable
245 setGuiFetchComplete();
249 class FetchDBRefs implements Runnable
255 boolean isNuclueotide = af.getViewport().getAlignment()
257 new jalview.ws.DBRefFetcher(sequences, af, null, af.featureSettings,
258 isNuclueotide).fetchDBRefs(true);
261 setGuiFetchComplete();
266 * Spawns Fetcher threads to add features to sequences in the dataset
272 startTime = System.currentTimeMillis();
275 af.setProgressBar(MessageManager
276 .getString("status.fetching_das_sequence_features"),
279 if (sourceRegistry == null)
281 sourceRegistry = Cache.getDasSourceRegistry();
283 if (selectedSources == null || selectedSources.size() == 0)
287 jalviewSourceI[] sources = sourceRegistry.getSources().toArray(
288 new jalviewSourceI[0]);
289 String active = jalview.bin.Cache.getDefault("DAS_ACTIVE_SOURCE",
291 StringTokenizer st = new StringTokenizer(active, "\t");
292 selectedSources = new Vector();
294 while (st.hasMoreTokens())
296 token = st.nextToken();
297 for (int i = 0; i < sources.length; i++)
299 if (sources[i].getTitle().equals(token))
301 selectedSources.add(sources[i]);
306 } catch (Exception ex)
308 debug("Exception whilst setting default feature sources from registry and local preferences.",
313 if (selectedSources == null || selectedSources.size() == 0)
315 System.out.println("No DAS Sources active");
317 setGuiNoDassourceActive();
321 sourcesRemaining = selectedSources.size();
322 FeaturesClientMultipleSources fc = new FeaturesClientMultipleSources();
323 fc.setConnProps(sourceRegistry.getSessionHandler());
324 // Now sending requests one at a time to each server
325 ArrayList<jalviewSourceI> srcobj = new ArrayList<jalviewSourceI>();
326 ArrayList<String> src = new ArrayList<String>();
327 List<List<String>> ids = new ArrayList<List<String>>();
328 List<List<DBRefEntry>> idobj = new ArrayList<List<DBRefEntry>>();
329 List<Map<String, SequenceI>> sqset = new ArrayList<Map<String, SequenceI>>();
330 for (jalviewSourceI _sr : selectedSources)
333 Map<String, SequenceI> slist = new HashMap<String, SequenceI>();
334 List<DBRefEntry> idob = new ArrayList<DBRefEntry>();
335 List<String> qset = new ArrayList<String>();
337 for (SequenceI seq : sequences)
339 Object[] idset = nextSequence(_sr, seq);
342 List<DBRefEntry> _idob = (List<DBRefEntry>) idset[0];
343 List<String> _qset = (List<String>) idset[1];
344 if (_idob.size() > 0)
346 // add sequence's ref for each id derived from it
347 // (space inefficient, but most unambiguous)
348 // could replace with hash with _qset values as keys.
349 Iterator<DBRefEntry> dbobj = _idob.iterator();
350 for (String q : _qset)
352 SequenceI osq = slist.get(q);
353 DBRefEntry dr = dbobj.next();
354 if (osq != null && osq != seq)
356 // skip - non-canonical query
371 src.add(_sr.getSourceURL());
377 Map<String, Map<List<String>, Exception>> errors = new HashMap<String, Map<List<String>, Exception>>();
378 Map<String, Map<List<String>, DasGFFAdapter>> results = new HashMap<String, Map<List<String>, DasGFFAdapter>>();
379 if (!useJDASMultiThread)
381 Iterator<String> sources = src.iterator();
382 // iterate over each query for each source and do each one individually
383 for (List<String> idl : ids)
385 String source = sources.next();
386 FeaturesClient featuresc = new FeaturesClient(sourceRegistry
387 .getSessionHandler().getConnectionPropertyProviderFor(
389 for (String id : idl)
391 List<String> qid = Arrays.asList(new String[] { id });
394 DasGFFAdapter dga = featuresc.fetchData(source, qid);
395 Map<List<String>, DasGFFAdapter> ers = results.get(source);
399 ers = new HashMap<List<String>, DasGFFAdapter>());
402 } catch (Exception ex)
404 Map<List<String>, Exception> ers = errors.get(source);
408 ers = new HashMap<List<String>, Exception>());
417 // pass them all at once
418 fc.fetchData(src, ids, false, results, errors);
420 while (!fc.isTerminated())
425 } catch (InterruptedException x)
431 Iterator<List<String>> idset = ids.iterator();
432 Iterator<List<DBRefEntry>> idobjset = idobj.iterator();
433 Iterator<Map<String, SequenceI>> seqset = sqset.iterator();
434 for (jalviewSourceI source : srcobj)
436 processResponse(seqset.next(), source, idset.next(), idobjset.next(),
437 results.get(source.getSourceURL()),
438 errors.get(source.getSourceURL()));
442 private void processResponse(Map<String, SequenceI> sequencemap,
443 jalviewSourceI jvsource, List<String> ids,
444 List<DBRefEntry> idobj, Map<List<String>, DasGFFAdapter> results,
445 Map<List<String>, Exception> errors)
447 Set<SequenceI> sequences = new HashSet<SequenceI>();
448 String source = jvsource.getSourceURL();
450 DasGFFAdapter result = (results == null) ? null : results.get(ids);
451 Exception error = (errors == null) ? null : errors.get(ids);
454 debug("das source " + source + " could not be contacted. "
455 + (error == null ? "" : error.toString()));
460 GFFAdapter gff = result.getGFF();
461 List<SEGMENT> segments = gff.getSegments();
462 List<ERRORSEGMENT> errorsegs = gff.getErrorSegments();
463 List<UNKNOWNFEATURE> unkfeats = gff.getUnknownFeatures();
464 List<UNKNOWNSEGMENT> unksegs = gff.getUnknownSegments();
465 debug("das source " + source + " returned " + gff.getTotal()
466 + " responses. " + (errorsegs != null ? errorsegs.size() : 0)
467 + " were incorrect segment queries, "
468 + (unkfeats != null ? unkfeats.size() : 0)
469 + " were unknown features "
470 + (unksegs != null ? unksegs.size() : 0)
471 + " were unknown segments and "
472 + (segments != null ? segments.size() : 0)
473 + " were segment responses.");
474 Iterator<DBRefEntry> dbr = idobj.iterator();
475 if (segments != null)
477 for (SEGMENT seg : segments)
479 String id = seg.getId();
480 if (ids.indexOf(id) == -1)
482 id = id.toUpperCase();
484 DBRefEntry dbref = idobj.get(ids.indexOf(id));
485 SequenceI sequence = sequencemap.get(id);
486 boolean added = false;
487 sequences.add(sequence);
489 for (FEATURE feat : seg.getFEATURE())
491 // standard DAS feature-> jalview sequence feature transformation
492 SequenceFeature f = newSequenceFeature(feat,
493 jvsource.getTitle());
494 if (!parseSeqFeature(sequence, f, feat, jvsource))
496 if (dbref.getMap() != null && f.getBegin() > 0
499 debug("mapping from " + f.getBegin() + " - " + f.getEnd());
500 SequenceFeature vf[] = null;
504 vf = dbref.getMap().locateFeature(f);
505 } catch (Exception ex)
508 .warn("Error in 'experimental' mapping of features. Please try to reproduce and then report info to jalview-discuss@jalview.org.");
509 Cache.log.warn("Mapping feature from " + f.getBegin()
510 + " to " + f.getEnd() + " in dbref "
511 + dbref.getAccessionId() + " in "
512 + dbref.getSource());
513 Cache.log.warn("using das Source " + source);
514 Cache.log.warn("Exception", ex);
519 for (int v = 0; v < vf.length; v++)
521 debug("mapping to " + v + ": " + vf[v].getBegin()
522 + " - " + vf[v].getEnd());
523 sequence.addSequenceFeature(vf[v]);
529 sequence.addSequenceFeature(f);
534 featuresAdded(sequences);
538 // System.out.println("No features found for " + seq.getName()
539 // + " from: " + e.getDasSource().getNickname());
544 private void setGuiNoDassourceActive()
550 MessageManager.getString("status.no_das_sources_active"),
553 if (getFeatSettings() != null)
555 fsettings.noDasSourceActive();
560 * Update our fsettings dialog reference if we didn't have one when we were
565 private FeatureSettings getFeatSettings()
567 if (fsettings == null)
571 fsettings = af.featureSettings;
581 af.setProgressBar(MessageManager
582 .getString("status.das_feature_fetching_cancelled"),
588 int sourcesRemaining = 0;
590 private boolean running = false;
592 private void setGuiFetchComplete()
595 if (!cancelled && af != null)
597 // only update the progress bar if we've completed the fetch normally
598 af.setProgressBar(MessageManager
599 .getString("status.das_feature_fetching_complete"), startTime);
602 if (af != null && af.featureSettings != null)
604 af.featureSettings.discoverAllFeatureData();
607 if (getFeatSettings() != null)
609 fsettings.complete();
613 void featuresAdded(Set<SequenceI> seqs)
617 // no gui to update with features.
620 af.getFeatureRenderer().featuresAdded();
622 int start = af.getViewport().getStartSeq();
623 int end = af.getViewport().getEndSeq();
625 for (index = start; index < end; index++)
627 for (SequenceI seq : seqs)
629 if (seq == af.getViewport().getAlignment().getSequenceAt(index)
630 .getDatasetSequence())
632 af.alignPanel.paintAlignment(true);
640 Object[] nextSequence(jalviewSourceI dasSource, SequenceI seq)
646 DBRefEntry[] uprefs = jalview.util.DBRefUtils.selectRefs(
647 seq.getDBRefs(), new String[] {
648 // jalview.datamodel.DBRefSource.PDB,
649 jalview.datamodel.DBRefSource.UNIPROT,
650 // jalview.datamodel.DBRefSource.EMBL - not tested on any EMBL coord
653 // TODO: minimal list of DAS queries to make by querying with untyped ID if
654 // distinct from any typed IDs
656 List<DBRefEntry> ids = new ArrayList<DBRefEntry>();
657 List<String> qstring = new ArrayList<String>();
658 boolean dasCoordSysFound = false;
662 // do any of these ids match the source's coordinate system ?
663 for (int j = 0; !dasCoordSysFound && j < uprefs.length; j++)
666 for (COORDINATES csys : dasSource.getVersion().getCOORDINATES())
668 if (jalview.util.DBRefUtils.isDasCoordinateSystem(
669 csys.getAuthority(), uprefs[j]))
671 debug("Launched fetcher for coordinate system "
672 + csys.getAuthority());
673 // Will have to pass any mapping information to the fetcher
674 // - the start/end for the DBRefEntry may not be the same as the
675 // sequence's start/end
677 System.out.println(seq.getName() + " "
678 + (seq.getDatasetSequence() == null) + " "
681 dasCoordSysFound = true; // break's out of the loop
683 qstring.add(uprefs[j].getAccessionId());
687 System.out.println("IGNORE " + csys.getAuthority());
693 if (!dasCoordSysFound)
696 // try and use the name as the sequence id
697 if (seq.getName().indexOf("|") > -1)
699 id = seq.getName().substring(seq.getName().lastIndexOf("|") + 1);
700 if (id.trim().length() < 4)
702 // hack - we regard a significant ID as being at least 4
703 // non-whitespace characters
704 id = seq.getName().substring(0, seq.getName().lastIndexOf("|"));
705 if (id.indexOf("|") > -1)
707 id = id.substring(id.lastIndexOf("|") + 1);
717 DBRefEntry dbre = new DBRefEntry();
718 dbre.setAccessionId(id);
719 // Should try to call a general feature fetcher that
720 // queries many sources with name to discover applicable ID references
722 qstring.add(dbre.getAccessionId());
726 return new Object[] { ids, qstring };
730 * examine the given sequence feature to determine if it should actually be
731 * turned into sequence annotation or database cross references rather than a
732 * simple sequence feature.
735 * the sequence to annotate
737 * the jalview sequence feature generated from the DAS feature
739 * the sequence feature attributes
741 * the source that emitted the feature
742 * @return true if feature was consumed as another kind of annotation.
744 protected boolean parseSeqFeature(SequenceI seq, SequenceFeature f,
745 FEATURE feature, jalviewSourceI source)
747 SequenceI mseq = seq;
748 while (seq.getDatasetSequence() != null)
750 seq = seq.getDatasetSequence();
752 if (f.getType() != null)
754 String type = f.getType();
755 if (type.equalsIgnoreCase("protein_name"))
757 // parse name onto the alignment sequence or the dataset sequence.
758 if (seq.getDescription() == null
759 || seq.getDescription().trim().length() == 0)
761 // could look at the note series to pick out the first long name, for
762 // the moment just use the whole description string
763 seq.setDescription(f.getDescription());
765 if (mseq.getDescription() == null
766 || mseq.getDescription().trim().length() == 0)
768 // could look at the note series to pick out the first long name, for
769 // the moment just use the whole description string
770 mseq.setDescription(f.getDescription());
774 // check if source has biosapiens or other sequence ontology label
775 if (type.equalsIgnoreCase("DBXREF") || type.equalsIgnoreCase("DBREF"))
777 // try to parse the accession out
779 DBRefEntry dbr = new DBRefEntry();
780 dbr.setVersion(source.getTitle());
781 StringTokenizer st = new StringTokenizer(f.getDescription(), ":");
782 if (st.hasMoreTokens())
784 dbr.setSource(st.nextToken());
786 if (st.hasMoreTokens())
788 dbr.setAccessionId(st.nextToken());
792 if (f.links != null && f.links.size() > 0)
794 // feature is also appended to enable links to be seen.
795 // TODO: consider extending dbrefs to have their own links ?
796 // TODO: new feature: extract dbref links from DAS servers and add the
797 // URL pattern to the list of DB name associated links in the user's
799 // for the moment - just fix up the existing feature so it displays
801 // f.setType(dbr.getSource());
802 // f.setDescription();
803 f.setValue("linkonly", Boolean.TRUE);
804 // f.setDescription("");
805 Vector newlinks = new Vector();
806 Enumeration it = f.links.elements();
807 while (it.hasMoreElements())
810 UrlLink urllink = new UrlLink(elm = (String) it.nextElement());
811 if (urllink.isValid())
813 urllink.setLabel(f.getDescription());
814 newlinks.addElement(urllink.toString());
818 // couldn't parse the link properly. Keep it anyway - just in
820 debug("couldn't parse link string - " + elm);
821 newlinks.addElement(elm);
825 seq.addSequenceFeature(f);
834 * creates a jalview sequence feature from a das feature document
837 * @return sequence feature object created using dasfeature information
839 SequenceFeature newSequenceFeature(FEATURE feat, String nickname)
848 * Different qNames for a DAS Feature - are string keys to the HashMaps in
849 * features "METHOD") || qName.equals("TYPE") || qName.equals("START") ||
850 * qName.equals("END") || qName.equals("NOTE") || qName.equals("LINK") ||
851 * qName.equals("SCORE")
853 String desc = new String();
854 if (feat.getNOTE() != null)
856 for (String note : feat.getNOTE())
862 int start = 0, end = 0;
867 start = Integer.parseInt(feat.getSTART().toString());
868 } catch (Exception ex)
873 end = Integer.parseInt(feat.getEND().toString());
874 } catch (Exception ex)
879 Object scr = feat.getSCORE();
882 score = (float) Double.parseDouble(scr.toString());
885 } catch (Exception ex)
889 SequenceFeature f = new SequenceFeature(
890 getTypeString(feat.getTYPE()), desc, start, end, score,
893 if (feat.getLINK() != null)
895 for (LINK link : feat.getLINK())
897 // Do not put feature extent in link text for non-positional features
898 if (f.begin == 0 && f.end == 0)
900 f.addLink(f.getType() + " " + link.getContent() + "|"
905 f.addLink(f.getType() + " " + f.begin + "_" + f.end + " "
906 + link.getContent() + "|" + link.getHref());
912 } catch (Exception e)
914 System.out.println("ERRR " + e);
916 System.out.println("############");
917 debug("Failed to parse " + feat.toString(), e);
922 private String getTypeString(TYPE type)
924 return type.getContent();
927 public boolean isRunning()