2 * Jalview - A Sequence Alignment Editor and Viewer (Version 2.8)
3 * Copyright (C) 2012 J Procter, AM Waterhouse, LM Lui, J Engelhardt, G Barton, M Clamp, S Searle
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
11 * Jalview is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty
13 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
14 * PURPOSE. See the GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License along with Jalview. If not, see <http://www.gnu.org/licenses/>.
20 import jalview.bin.Cache;
21 import jalview.datamodel.DBRefEntry;
22 import jalview.datamodel.SequenceFeature;
23 import jalview.datamodel.SequenceI;
24 import jalview.gui.AlignFrame;
25 import jalview.gui.Desktop;
26 import jalview.gui.FeatureSettings;
27 import jalview.util.UrlLink;
28 import jalview.ws.dbsources.das.api.DasSourceRegistryI;
29 import jalview.ws.dbsources.das.api.jalviewSourceI;
31 import java.util.ArrayList;
32 import java.util.Arrays;
33 import java.util.Enumeration;
34 import java.util.HashMap;
35 import java.util.HashSet;
36 import java.util.Iterator;
37 import java.util.List;
40 import java.util.StringTokenizer;
41 import java.util.Vector;
43 import javax.swing.JOptionPane;
45 import org.biodas.jdas.client.FeaturesClient;
46 import org.biodas.jdas.client.adapters.features.DasGFFAdapter;
47 import org.biodas.jdas.client.adapters.features.DasGFFAdapter.GFFAdapter;
48 import org.biodas.jdas.client.threads.FeaturesClientMultipleSources;
49 import org.biodas.jdas.schema.features.ERRORSEGMENT;
50 import org.biodas.jdas.schema.features.FEATURE;
51 import org.biodas.jdas.schema.features.LINK;
52 import org.biodas.jdas.schema.features.SEGMENT;
53 import org.biodas.jdas.schema.features.TYPE;
54 import org.biodas.jdas.schema.features.UNKNOWNFEATURE;
55 import org.biodas.jdas.schema.features.UNKNOWNSEGMENT;
56 import org.biodas.jdas.schema.sources.COORDINATES;
64 public class DasSequenceFeatureFetcher
66 SequenceI[] sequences;
70 FeatureSettings fsettings;
72 StringBuffer sbuffer = new StringBuffer();
74 List<jalviewSourceI> selectedSources;
76 boolean cancelled = false;
78 private void debug(String mesg)
83 private void debug(String mesg, Exception e)
85 if (Cache.log != null)
87 Cache.log.debug(mesg, e);
91 System.err.println(mesg);
101 private DasSourceRegistryI sourceRegistry;
103 private boolean useJDASMultiThread = true;
106 * Creates a new SequenceFeatureFetcher object. Uses default
113 public DasSequenceFeatureFetcher(SequenceI[] sequences,
114 FeatureSettings fsettings, Vector selectedSources)
116 this(sequences, fsettings, selectedSources, true, true, true);
119 public DasSequenceFeatureFetcher(SequenceI[] oursequences,
120 FeatureSettings fsettings, List<jalviewSourceI> selectedSources2,
121 boolean checkDbrefs, boolean promptFetchDbrefs)
123 this(oursequences, fsettings, selectedSources2, checkDbrefs,
124 promptFetchDbrefs, true);
127 public DasSequenceFeatureFetcher(SequenceI[] oursequences,
128 FeatureSettings fsettings, List<jalviewSourceI> selectedSources2,
129 boolean checkDbrefs, boolean promptFetchDbrefs,
130 boolean useJDasMultiThread)
132 this.useJDASMultiThread = useJDasMultiThread;
133 this.selectedSources = new ArrayList<jalviewSourceI>();
134 // filter both sequences and sources to eliminate duplicates
135 for (jalviewSourceI src : selectedSources2)
137 if (!selectedSources.contains(src))
139 selectedSources.add(src);
143 Vector sqs = new Vector();
144 for (int i = 0; i < oursequences.length; i++)
146 if (!sqs.contains(oursequences[i]))
148 sqs.addElement(oursequences[i]);
151 sequences = new SequenceI[sqs.size()];
152 for (int i = 0; i < sequences.length; i++)
154 sequences[i] = (SequenceI) sqs.elementAt(i);
156 if (fsettings != null)
158 this.fsettings = fsettings;
159 this.af = fsettings.af;
160 af.setShowSeqFeatures(true);
162 int uniprotCount = 0;
163 for (jalviewSourceI source : selectedSources)
165 for (COORDINATES coords : source.getVersion().getCOORDINATES())
167 // TODO: match UniProt coord system canonically (?) - does
168 // UniProt==uniprot==UNIPROT ?
169 if (coords.getAuthority().toLowerCase().equals("uniprot"))
178 for (int i = 0; i < sequences.length; i++)
180 DBRefEntry[] dbref = sequences[i].getDBRef();
183 for (int j = 0; j < dbref.length; j++)
185 if (dbref[j].getSource().equals(
186 jalview.datamodel.DBRefSource.UNIPROT))
195 if (checkDbrefs && refCount < sequences.length && uniprotCount > 0)
198 int reply = JOptionPane.YES_OPTION;
199 if (promptFetchDbrefs)
202 .showInternalConfirmDialog(
204 "Do you want Jalview to find\n"
205 + "Uniprot Accession ids for given sequence names?",
206 "Find Uniprot Accession Ids",
207 JOptionPane.YES_NO_OPTION,
208 JOptionPane.QUESTION_MESSAGE);
211 if (reply == JOptionPane.YES_OPTION)
213 Thread thread = new Thread(new FetchDBRefs());
228 private void _startFetching()
231 new Thread(new FetchSeqFeatures()).start();
234 class FetchSeqFeatures implements Runnable
239 setGuiFetchComplete();
243 class FetchDBRefs implements Runnable
248 new DBRefFetcher(sequences, af).fetchDBRefs(true);
250 setGuiFetchComplete();
255 * Spawns Fetcher threads to add features to sequences in the dataset
261 startTime = System.currentTimeMillis();
264 af.setProgressBar("Fetching DAS Sequence Features", startTime);
266 if (sourceRegistry == null)
268 sourceRegistry = Cache.getDasSourceRegistry();
270 if (selectedSources == null || selectedSources.size() == 0)
274 jalviewSourceI[] sources = sourceRegistry.getSources().toArray(
275 new jalviewSourceI[0]);
276 String active = jalview.bin.Cache.getDefault("DAS_ACTIVE_SOURCE",
278 StringTokenizer st = new StringTokenizer(active, "\t");
279 selectedSources = new Vector();
281 while (st.hasMoreTokens())
283 token = st.nextToken();
284 for (int i = 0; i < sources.length; i++)
286 if (sources[i].getTitle().equals(token))
288 selectedSources.add(sources[i]);
293 } catch (Exception ex)
295 debug("Exception whilst setting default feature sources from registry and local preferences.",
300 if (selectedSources == null || selectedSources.size() == 0)
302 System.out.println("No DAS Sources active");
304 setGuiNoDassourceActive();
308 sourcesRemaining = selectedSources.size();
309 FeaturesClientMultipleSources fc = new FeaturesClientMultipleSources();
310 fc.setConnProps(sourceRegistry.getSessionHandler());
311 // Now sending requests one at a time to each server
312 ArrayList<jalviewSourceI> srcobj = new ArrayList<jalviewSourceI>();
313 ArrayList<String> src = new ArrayList<String>();
314 List<List<String>> ids = new ArrayList<List<String>>();
315 List<List<DBRefEntry>> idobj = new ArrayList<List<DBRefEntry>>();
316 List<Map<String, SequenceI>> sqset = new ArrayList<Map<String, SequenceI>>();
317 for (jalviewSourceI _sr : selectedSources)
320 Map<String, SequenceI> slist = new HashMap<String, SequenceI>();
321 List<DBRefEntry> idob = new ArrayList<DBRefEntry>();
322 List<String> qset = new ArrayList<String>();
324 for (SequenceI seq : sequences)
326 Object[] idset = nextSequence(_sr, seq);
329 List<DBRefEntry> _idob = (List<DBRefEntry>) idset[0];
330 List<String> _qset = (List<String>) idset[1];
331 if (_idob.size() > 0)
333 // add sequence's ref for each id derived from it
334 // (space inefficient, but most unambiguous)
335 // could replace with hash with _qset values as keys.
336 Iterator<DBRefEntry> dbobj = _idob.iterator();
337 for (String q : _qset)
339 SequenceI osq = slist.get(q);
340 DBRefEntry dr = dbobj.next();
341 if (osq != null && osq != seq)
343 // skip - non-canonical query
358 src.add(_sr.getSourceURL());
364 Map<String, Map<List<String>, Exception>> errors = new HashMap<String, Map<List<String>, Exception>>();
365 Map<String, Map<List<String>, DasGFFAdapter>> results = new HashMap<String, Map<List<String>, DasGFFAdapter>>();
366 if (!useJDASMultiThread)
368 Iterator<String> sources = src.iterator();
369 // iterate over each query for each source and do each one individually
370 for (List<String> idl : ids)
372 String source = sources.next();
373 FeaturesClient featuresc = new FeaturesClient(sourceRegistry
374 .getSessionHandler().getConnectionPropertyProviderFor(
376 for (String id : idl)
378 List<String> qid = Arrays.asList(new String[]
382 DasGFFAdapter dga = featuresc.fetchData(source, qid);
383 Map<List<String>, DasGFFAdapter> ers = results.get(source);
387 ers = new HashMap<List<String>, DasGFFAdapter>());
390 } catch (Exception ex)
392 Map<List<String>, Exception> ers = errors.get(source);
396 ers = new HashMap<List<String>, Exception>());
405 // pass them all at once
406 fc.fetchData(src, ids, false, results, errors);
408 while (!fc.isTerminated())
413 } catch (InterruptedException x)
419 Iterator<List<String>> idset = ids.iterator();
420 Iterator<List<DBRefEntry>> idobjset = idobj.iterator();
421 Iterator<Map<String, SequenceI>> seqset = sqset.iterator();
422 for (jalviewSourceI source : srcobj)
424 processResponse(seqset.next(), source, idset.next(), idobjset.next(),
425 results.get(source.getSourceURL()),
426 errors.get(source.getSourceURL()));
430 private void processResponse(Map<String, SequenceI> sequencemap,
431 jalviewSourceI jvsource, List<String> ids,
432 List<DBRefEntry> idobj, Map<List<String>, DasGFFAdapter> results,
433 Map<List<String>, Exception> errors)
435 Set<SequenceI> sequences = new HashSet<SequenceI>();
436 String source = jvsource.getSourceURL();
438 DasGFFAdapter result = (results == null) ? null : results.get(ids);
439 Exception error = (errors == null) ? null : errors.get(ids);
442 debug("das source " + source + " could not be contacted. "
443 + (error == null ? "" : error.toString()));
448 GFFAdapter gff = result.getGFF();
449 List<SEGMENT> segments = gff.getSegments();
450 List<ERRORSEGMENT> errorsegs = gff.getErrorSegments();
451 List<UNKNOWNFEATURE> unkfeats = gff.getUnknownFeatures();
452 List<UNKNOWNSEGMENT> unksegs = gff.getUnknownSegments();
453 debug("das source " + source + " returned " + gff.getTotal()
454 + " responses. " + (errorsegs != null ? errorsegs.size() : 0)
455 + " were incorrect segment queries, "
456 + (unkfeats != null ? unkfeats.size() : 0)
457 + " were unknown features "
458 + (unksegs != null ? unksegs.size() : 0)
459 + " were unknown segments and "
460 + (segments != null ? segments.size() : 0)
461 + " were segment responses.");
462 Iterator<DBRefEntry> dbr = idobj.iterator();
463 if (segments != null)
465 for (SEGMENT seg : segments)
467 String id = seg.getId();
468 if (ids.indexOf(id) == -1)
470 id = id.toUpperCase();
472 DBRefEntry dbref = idobj.get(ids.indexOf(id));
473 SequenceI sequence = sequencemap.get(id);
474 boolean added = false;
475 sequences.add(sequence);
477 for (FEATURE feat : seg.getFEATURE())
479 // standard DAS feature-> jalview sequence feature transformation
480 SequenceFeature f = newSequenceFeature(feat,
481 jvsource.getTitle());
482 if (!parseSeqFeature(sequence, f, feat, jvsource))
484 if (dbref.getMap() != null && f.getBegin() > 0
487 debug("mapping from " + f.getBegin() + " - " + f.getEnd());
488 SequenceFeature vf[] = null;
492 vf = dbref.getMap().locateFeature(f);
493 } catch (Exception ex)
496 .info("Error in 'experimental' mapping of features. Please try to reproduce and then report info to jalview-discuss@jalview.org.");
497 Cache.log.info("Mapping feature from " + f.getBegin()
498 + " to " + f.getEnd() + " in dbref "
499 + dbref.getAccessionId() + " in "
500 + dbref.getSource());
501 Cache.log.info("using das Source " + source);
502 Cache.log.info("Exception", ex);
507 for (int v = 0; v < vf.length; v++)
509 debug("mapping to " + v + ": " + vf[v].getBegin()
510 + " - " + vf[v].getEnd());
511 sequence.addSequenceFeature(vf[v]);
517 sequence.addSequenceFeature(f);
522 featuresAdded(sequences);
526 // System.out.println("No features found for " + seq.getName()
527 // + " from: " + e.getDasSource().getNickname());
532 private void setGuiNoDassourceActive()
537 af.setProgressBar("No DAS Sources Active", startTime);
539 if (getFeatSettings() != null)
541 fsettings.noDasSourceActive();
546 * Update our fsettings dialog reference if we didn't have one when we were
551 private FeatureSettings getFeatSettings()
553 if (fsettings == null)
557 fsettings = af.featureSettings;
567 af.setProgressBar("DAS Feature Fetching Cancelled", startTime);
572 int sourcesRemaining = 0;
574 private boolean running = false;
576 private void setGuiFetchComplete()
579 if (!cancelled && af != null)
581 // only update the progress bar if we've completed the fetch normally
582 af.setProgressBar("DAS Feature Fetching Complete", startTime);
585 if (af != null && af.featureSettings != null)
587 af.featureSettings.setTableData();
590 if (getFeatSettings() != null)
592 fsettings.complete();
596 void featuresAdded(Set<SequenceI> seqs)
600 // no gui to update with features.
603 af.getFeatureRenderer().featuresAdded();
605 int start = af.getViewport().getStartSeq();
606 int end = af.getViewport().getEndSeq();
608 for (index = start; index < end; index++)
610 for (SequenceI seq : seqs)
612 if (seq == af.getViewport().getAlignment().getSequenceAt(index)
613 .getDatasetSequence())
615 af.alignPanel.paintAlignment(true);
623 Object[] nextSequence(jalviewSourceI dasSource, SequenceI seq)
627 DBRefEntry[] uprefs = jalview.util.DBRefUtils.selectRefs(
628 seq.getDBRef(), new String[]
630 // jalview.datamodel.DBRefSource.PDB,
631 jalview.datamodel.DBRefSource.UNIPROT,
632 // jalview.datamodel.DBRefSource.EMBL - not tested on any EMBL coord
635 // TODO: minimal list of DAS queries to make by querying with untyped ID if
636 // distinct from any typed IDs
638 List<DBRefEntry> ids = new ArrayList<DBRefEntry>();
639 List<String> qstring = new ArrayList<String>();
640 boolean dasCoordSysFound = false;
644 // do any of these ids match the source's coordinate system ?
645 for (int j = 0; !dasCoordSysFound && j < uprefs.length; j++)
648 for (COORDINATES csys : dasSource.getVersion().getCOORDINATES())
650 if (jalview.util.DBRefUtils.isDasCoordinateSystem(
651 csys.getAuthority(), uprefs[j]))
653 debug("Launched fetcher for coordinate system "
654 + csys.getAuthority());
655 // Will have to pass any mapping information to the fetcher
656 // - the start/end for the DBRefEntry may not be the same as the
657 // sequence's start/end
659 System.out.println(seq.getName() + " "
660 + (seq.getDatasetSequence() == null) + " "
663 dasCoordSysFound = true; // break's out of the loop
665 qstring.add(uprefs[j].getAccessionId());
668 System.out.println("IGNORE " + csys.getAuthority());
673 if (!dasCoordSysFound)
676 // try and use the name as the sequence id
677 if (seq.getName().indexOf("|") > -1)
679 id = seq.getName().substring(seq.getName().lastIndexOf("|") + 1);
680 if (id.trim().length() < 4)
682 // hack - we regard a significant ID as being at least 4
683 // non-whitespace characters
684 id = seq.getName().substring(0, seq.getName().lastIndexOf("|"));
685 if (id.indexOf("|") > -1)
687 id = id.substring(id.lastIndexOf("|") + 1);
697 DBRefEntry dbre = new DBRefEntry();
698 dbre.setAccessionId(id);
699 // Should try to call a general feature fetcher that
700 // queries many sources with name to discover applicable ID references
702 qstring.add(dbre.getAccessionId());
711 * examine the given sequence feature to determine if it should actually be
712 * turned into sequence annotation or database cross references rather than a
713 * simple sequence feature.
716 * the sequence to annotate
718 * the jalview sequence feature generated from the DAS feature
720 * the sequence feature attributes
722 * the source that emitted the feature
723 * @return true if feature was consumed as another kind of annotation.
725 protected boolean parseSeqFeature(SequenceI seq, SequenceFeature f,
726 FEATURE feature, jalviewSourceI source)
728 SequenceI mseq = seq;
729 while (seq.getDatasetSequence() != null)
731 seq = seq.getDatasetSequence();
733 if (f.getType() != null)
735 String type = f.getType();
736 if (type.equalsIgnoreCase("protein_name"))
738 // parse name onto the alignment sequence or the dataset sequence.
739 if (seq.getDescription() == null
740 || seq.getDescription().trim().length() == 0)
742 // could look at the note series to pick out the first long name, for
743 // the moment just use the whole description string
744 seq.setDescription(f.getDescription());
746 if (mseq.getDescription() == null
747 || mseq.getDescription().trim().length() == 0)
749 // could look at the note series to pick out the first long name, for
750 // the moment just use the whole description string
751 mseq.setDescription(f.getDescription());
755 // check if source has biosapiens or other sequence ontology label
756 if (type.equalsIgnoreCase("DBXREF") || type.equalsIgnoreCase("DBREF"))
758 // try to parse the accession out
760 DBRefEntry dbr = new DBRefEntry();
761 dbr.setVersion(source.getTitle());
762 StringTokenizer st = new StringTokenizer(f.getDescription(), ":");
763 if (st.hasMoreTokens())
765 dbr.setSource(st.nextToken());
767 if (st.hasMoreTokens())
769 dbr.setAccessionId(st.nextToken());
773 if (f.links != null && f.links.size() > 0)
775 // feature is also appended to enable links to be seen.
776 // TODO: consider extending dbrefs to have their own links ?
777 // TODO: new feature: extract dbref links from DAS servers and add the
778 // URL pattern to the list of DB name associated links in the user's
780 // for the moment - just fix up the existing feature so it displays
782 // f.setType(dbr.getSource());
783 // f.setDescription();
784 f.setValue("linkonly", Boolean.TRUE);
785 // f.setDescription("");
786 Vector newlinks = new Vector();
787 Enumeration it = f.links.elements();
788 while (it.hasMoreElements())
791 UrlLink urllink = new UrlLink(elm = (String) it.nextElement());
792 if (urllink.isValid())
794 urllink.setLabel(f.getDescription());
795 newlinks.addElement(urllink.toString());
799 // couldn't parse the link properly. Keep it anyway - just in
801 debug("couldn't parse link string - " + elm);
802 newlinks.addElement(elm);
806 seq.addSequenceFeature(f);
815 * creates a jalview sequence feature from a das feature document
818 * @return sequence feature object created using dasfeature information
820 SequenceFeature newSequenceFeature(FEATURE feat, String nickname)
829 * Different qNames for a DAS Feature - are string keys to the HashMaps in
830 * features "METHOD") || qName.equals("TYPE") || qName.equals("START") ||
831 * qName.equals("END") || qName.equals("NOTE") || qName.equals("LINK") ||
832 * qName.equals("SCORE")
834 String desc = new String();
835 if (feat.getNOTE() != null)
837 for (String note : feat.getNOTE())
839 desc += (String) note;
843 int start = 0, end = 0;
848 start = Integer.parseInt(feat.getSTART().toString());
849 } catch (Exception ex)
854 end = Integer.parseInt(feat.getEND().toString());
855 } catch (Exception ex)
860 Object scr = feat.getSCORE();
863 score = (float) Double.parseDouble(scr.toString());
866 } catch (Exception ex)
870 SequenceFeature f = new SequenceFeature(
871 getTypeString(feat.getTYPE()), desc, start, end, score,
874 if (feat.getLINK() != null)
876 for (LINK link : feat.getLINK())
878 // Do not put feature extent in link text for non-positional features
879 if (f.begin == 0 && f.end == 0)
881 f.addLink(f.getType() + " " + link.getContent() + "|"
886 f.addLink(f.getType() + " " + f.begin + "_" + f.end + " "
887 + link.getContent() + "|" + link.getHref());
893 } catch (Exception e)
895 System.out.println("ERRR " + e);
897 System.out.println("############");
898 debug("Failed to parse " + feat.toString(), e);
903 private String getTypeString(TYPE type)
905 return type.getContent();
908 public boolean isRunning()