2 * Jalview - A Sequence Alignment Editor and Viewer (Version 2.7)
\r
3 * Copyright (C) 2011 J Procter, AM Waterhouse, J Engelhardt, LM Lui, G Barton, M Clamp, S Searle
\r
5 * This file is part of Jalview.
\r
7 * Jalview is free software: you can redistribute it and/or
\r
8 * modify it under the terms of the GNU General Public License
\r
9 * as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
\r
11 * Jalview is distributed in the hope that it will be useful, but
\r
12 * WITHOUT ANY WARRANTY; without even the implied warranty
\r
13 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
\r
14 * PURPOSE. See the GNU General Public License for more details.
\r
16 * You should have received a copy of the GNU General Public License along with Jalview. If not, see <http://www.gnu.org/licenses/>.
\r
20 import jalview.bin.Cache;
\r
21 import jalview.datamodel.DBRefEntry;
\r
22 import jalview.datamodel.SequenceFeature;
\r
23 import jalview.datamodel.SequenceI;
\r
24 import jalview.gui.AlignFrame;
\r
25 import jalview.gui.Desktop;
\r
26 import jalview.gui.FeatureSettings;
\r
27 import jalview.util.UrlLink;
\r
28 import jalview.ws.dbsources.das.api.DasSourceRegistryI;
\r
29 import jalview.ws.dbsources.das.api.jalviewSourceI;
\r
31 import java.util.ArrayList;
\r
32 import java.util.Enumeration;
\r
33 import java.util.HashMap;
\r
34 import java.util.HashSet;
\r
35 import java.util.Iterator;
\r
36 import java.util.List;
\r
37 import java.util.Map;
\r
38 import java.util.Set;
\r
39 import java.util.StringTokenizer;
\r
40 import java.util.Vector;
\r
42 import javax.swing.JOptionPane;
\r
44 import org.biodas.jdas.client.adapters.features.DasGFFAdapter;
\r
45 import org.biodas.jdas.client.adapters.features.DasGFFAdapter.GFFAdapter;
\r
46 import org.biodas.jdas.client.threads.FeaturesClientMultipleSources;
\r
47 import org.biodas.jdas.schema.features.ERRORSEGMENT;
\r
48 import org.biodas.jdas.schema.features.FEATURE;
\r
49 import org.biodas.jdas.schema.features.LINK;
\r
50 import org.biodas.jdas.schema.features.SEGMENT;
\r
51 import org.biodas.jdas.schema.features.TYPE;
\r
52 import org.biodas.jdas.schema.features.UNKNOWNFEATURE;
\r
53 import org.biodas.jdas.schema.features.UNKNOWNSEGMENT;
\r
54 import org.biodas.jdas.schema.sources.COORDINATES;
\r
60 * @version $Revision$
\r
62 public class DasSequenceFeatureFetcher
\r
64 SequenceI[] sequences;
\r
68 FeatureSettings fsettings;
\r
70 StringBuffer sbuffer = new StringBuffer();
\r
72 List<jalviewSourceI> selectedSources;
\r
74 boolean cancelled = false;
\r
76 private void debug(String mesg)
\r
81 private void debug(String mesg, Exception e)
\r
83 if (Cache.log != null)
\r
85 Cache.log.debug(mesg, e);
\r
89 System.err.println(mesg);
\r
92 e.printStackTrace();
\r
99 private DasSourceRegistryI sourceRegistry;
\r
102 * Creates a new SequenceFeatureFetcher object. Uses default
\r
109 public DasSequenceFeatureFetcher(SequenceI[] sequences,
\r
110 FeatureSettings fsettings, Vector selectedSources)
\r
112 this(sequences, fsettings, selectedSources, true, true);
\r
115 public DasSequenceFeatureFetcher(SequenceI[] oursequences,
\r
116 FeatureSettings fsettings, List<jalviewSourceI> selectedSources2,
\r
117 boolean checkDbrefs, boolean promptFetchDbrefs)
\r
119 this.selectedSources = new ArrayList<jalviewSourceI>();
\r
120 // filter both sequences and sources to eliminate duplicates
\r
121 for (jalviewSourceI src : selectedSources2)
\r
123 if (!selectedSources.contains(src))
\r
125 selectedSources.add(src);
\r
129 Vector sqs = new Vector();
\r
130 for (int i = 0; i < oursequences.length; i++)
\r
132 if (!sqs.contains(oursequences[i]))
\r
134 sqs.addElement(oursequences[i]);
\r
137 sequences = new SequenceI[sqs.size()];
\r
138 for (int i = 0; i < sequences.length; i++)
\r
140 sequences[i] = (SequenceI) sqs.elementAt(i);
\r
142 if (fsettings != null)
\r
144 this.fsettings = fsettings;
\r
145 this.af = fsettings.af;
\r
146 af.setShowSeqFeatures(true);
\r
148 int uniprotCount = 0;
\r
149 for (jalviewSourceI source : selectedSources)
\r
151 for (COORDINATES coords : source.getVersion().getCOORDINATES())
\r
153 // TODO: match UniProt coord system canonically (?) - does
\r
154 // UniProt==uniprot==UNIPROT ?
\r
155 if (coords.getAuthority().toLowerCase().equals("uniprot"))
\r
164 for (int i = 0; i < sequences.length; i++)
\r
166 DBRefEntry[] dbref = sequences[i].getDBRef();
\r
169 for (int j = 0; j < dbref.length; j++)
\r
171 if (dbref[j].getSource().equals(
\r
172 jalview.datamodel.DBRefSource.UNIPROT))
\r
181 if (checkDbrefs && refCount < sequences.length && uniprotCount > 0)
\r
184 int reply = JOptionPane.YES_OPTION;
\r
185 if (promptFetchDbrefs)
\r
187 reply = JOptionPane
\r
188 .showInternalConfirmDialog(
\r
190 "Do you want Jalview to find\n"
\r
191 + "Uniprot Accession ids for given sequence names?",
\r
192 "Find Uniprot Accession Ids",
\r
193 JOptionPane.YES_NO_OPTION,
\r
194 JOptionPane.QUESTION_MESSAGE);
\r
197 if (reply == JOptionPane.YES_OPTION)
\r
199 Thread thread = new Thread(new FetchDBRefs());
\r
213 private void _startFetching()
\r
215 new Thread(new FetchSeqFeatures()).start();
\r
217 class FetchSeqFeatures implements Runnable
\r
222 setGuiFetchComplete();
\r
225 class FetchDBRefs implements Runnable
\r
229 new DBRefFetcher(sequences, af).fetchDBRefs(true);
\r
231 setGuiFetchComplete();
\r
236 * Spawns Fetcher threads to add features to sequences in the dataset
\r
238 void startFetching()
\r
241 startTime = System.currentTimeMillis();
\r
244 af.setProgressBar("Fetching DAS Sequence Features", startTime);
\r
246 if (sourceRegistry == null)
\r
248 sourceRegistry = Cache.getDasSourceRegistry();
\r
250 if (selectedSources == null || selectedSources.size() == 0)
\r
254 jalviewSourceI[] sources = sourceRegistry.getSources().toArray(
\r
255 new jalviewSourceI[0]);
\r
256 String active = jalview.bin.Cache.getDefault("DAS_ACTIVE_SOURCE",
\r
258 StringTokenizer st = new StringTokenizer(active, "\t");
\r
259 selectedSources = new Vector();
\r
261 while (st.hasMoreTokens())
\r
263 token = st.nextToken();
\r
264 for (int i = 0; i < sources.length; i++)
\r
266 if (sources[i].getTitle().equals(token))
\r
268 selectedSources.add(sources[i]);
\r
273 } catch (Exception ex)
\r
275 debug("Exception whilst setting default feature sources from registry and local preferences.",
\r
280 if (selectedSources == null || selectedSources.size() == 0)
\r
282 System.out.println("No DAS Sources active");
\r
284 setGuiNoDassourceActive();
\r
288 sourcesRemaining = selectedSources.size();
\r
289 FeaturesClientMultipleSources fc = new FeaturesClientMultipleSources();
\r
290 fc.setConnProps(sourceRegistry.getSessionHandler());
\r
291 // Now sending requests one at a time to each server
\r
292 ArrayList<jalviewSourceI> srcobj = new ArrayList<jalviewSourceI>();
\r
293 ArrayList<String> src = new ArrayList<String>();
\r
294 List<List<String>> ids = new ArrayList<List<String>>();
\r
295 List<List<DBRefEntry>> idobj = new ArrayList<List<DBRefEntry>>();
\r
296 List<Map<String, SequenceI>> sqset = new ArrayList<Map<String, SequenceI>>();
\r
297 for (jalviewSourceI _sr : selectedSources)
\r
300 Map<String, SequenceI> slist = new HashMap<String, SequenceI>();
\r
301 List<DBRefEntry> idob = new ArrayList<DBRefEntry>();
\r
302 List<String> qset = new ArrayList<String>();
\r
304 for (SequenceI seq : sequences)
\r
306 Object[] idset = nextSequence(_sr, seq);
\r
309 List<DBRefEntry> _idob = (List<DBRefEntry>) idset[0];
\r
310 List<String> _qset = (List<String>) idset[1];
\r
311 if (_idob.size() > 0)
\r
313 // add sequence's ref for each id derived from it
\r
314 // (space inefficient, but most unambiguous)
\r
315 // could replace with hash with _qset values as keys.
\r
316 Iterator<DBRefEntry> dbobj = _idob.iterator();
\r
317 for (String q : _qset)
\r
319 SequenceI osq = slist.get(q);
\r
320 DBRefEntry dr = dbobj.next();
\r
321 if (osq != null && osq != seq)
\r
323 // skip - non-canonical query
\r
335 if (idob.size() > 0)
\r
338 src.add(_sr.getSourceURL());
\r
344 Map<String, Map<List<String>, Exception>> errors = new HashMap<String, Map<List<String>, Exception>>();
\r
345 Map<String, Map<List<String>, DasGFFAdapter>> results = new HashMap<String, Map<List<String>, DasGFFAdapter>>();
\r
346 fc.fetchData(src, ids, false, results, errors);
\r
348 while (!fc.isTerminated())
\r
353 } catch (InterruptedException x)
\r
358 Iterator<List<String>> idset = ids.iterator();
\r
359 Iterator<List<DBRefEntry>> idobjset = idobj.iterator();
\r
360 Iterator<Map<String, SequenceI>> seqset = sqset.iterator();
\r
361 for (jalviewSourceI source : srcobj)
\r
363 processResponse(seqset.next(), source, idset.next(), idobjset.next(),
\r
364 results.get(source.getSourceURL()),
\r
365 errors.get(source.getSourceURL()));
\r
369 private void processResponse(Map<String, SequenceI> sequencemap,
\r
370 jalviewSourceI jvsource, List<String> ids,
\r
371 List<DBRefEntry> idobj, Map<List<String>, DasGFFAdapter> results,
\r
372 Map<List<String>, Exception> errors)
\r
374 Set<SequenceI> sequences = new HashSet<SequenceI>();
\r
375 String source = jvsource.getSourceURL();
\r
376 // process features
\r
377 DasGFFAdapter result = (results == null) ? null : results.get(ids);
\r
378 Exception error = (errors == null) ? null : errors.get(ids);
\r
379 if (result == null)
\r
381 debug("das source " + source + " could not be contacted. "
\r
382 + (error == null ? "" : error.toString()));
\r
387 GFFAdapter gff = result.getGFF();
\r
388 List<SEGMENT> segments = gff.getSegments();
\r
389 List<ERRORSEGMENT> errorsegs = gff.getErrorSegments();
\r
390 List<UNKNOWNFEATURE> unkfeats = gff.getUnknownFeatures();
\r
391 List<UNKNOWNSEGMENT> unksegs = gff.getUnknownSegments();
\r
392 debug("das source " + source + " returned " + gff.getTotal()
\r
393 + " responses. " + (errorsegs != null ? errorsegs.size() : 0)
\r
394 + " were incorrect segment queries, "
\r
395 + (unkfeats != null ? unkfeats.size() : 0)
\r
396 + " were unknown features "
\r
397 + (unksegs != null ? unksegs.size() : 0)
\r
398 + " were unknown segments and "
\r
399 + (segments != null ? segments.size() : 0)
\r
400 + " were segment responses.");
\r
401 Iterator<DBRefEntry> dbr = idobj.iterator();
\r
402 if (segments != null)
\r
404 for (SEGMENT seg : segments)
\r
406 String id = seg.getId();
\r
407 DBRefEntry dbref = idobj.get(ids.indexOf(id));
\r
408 SequenceI sequence = sequencemap.get(id);
\r
409 boolean added = false;
\r
410 sequences.add(sequence);
\r
412 for (FEATURE feat : seg.getFEATURE())
\r
414 // standard DAS feature-> jalview sequence feature transformation
\r
415 SequenceFeature f = newSequenceFeature(feat, jvsource.getTitle());
\r
416 if (!parseSeqFeature(sequence, f, feat, jvsource))
\r
418 if (dbref.getMap() != null && f.getBegin() > 0
\r
421 debug("mapping from " + f.getBegin() + " - " + f.getEnd());
\r
422 SequenceFeature vf[] = null;
\r
426 vf = dbref.getMap().locateFeature(f);
\r
427 } catch (Exception ex)
\r
430 .info("Error in 'experimental' mapping of features. Please try to reproduce and then report info to jalview-discuss@jalview.org.");
\r
431 Cache.log.info("Mapping feature from " + f.getBegin()
\r
432 + " to " + f.getEnd() + " in dbref "
\r
433 + dbref.getAccessionId() + " in "
\r
434 + dbref.getSource());
\r
435 Cache.log.info("using das Source " + source);
\r
436 Cache.log.info("Exception", ex);
\r
441 for (int v = 0; v < vf.length; v++)
\r
443 debug("mapping to " + v + ": " + vf[v].getBegin()
\r
444 + " - " + vf[v].getEnd());
\r
445 sequence.addSequenceFeature(vf[v]);
\r
451 sequence.addSequenceFeature(f);
\r
456 featuresAdded(sequences);
\r
460 // System.out.println("No features found for " + seq.getName()
\r
461 // + " from: " + e.getDasSource().getNickname());
\r
466 private void setGuiNoDassourceActive()
\r
471 af.setProgressBar("No DAS Sources Active", startTime);
\r
473 if (getFeatSettings() != null)
\r
475 fsettings.noDasSourceActive();
\r
480 * Update our fsettings dialog reference if we didn't have one when we were
\r
481 * first initialised.
\r
483 * @return fsettings
\r
485 private FeatureSettings getFeatSettings()
\r
487 if (fsettings == null)
\r
491 fsettings = af.featureSettings;
\r
497 public void cancel()
\r
501 af.setProgressBar("DAS Feature Fetching Cancelled", startTime);
\r
506 int sourcesRemaining = 0;
\r
508 private void setGuiFetchComplete()
\r
511 if (!cancelled && af != null)
\r
513 // only update the progress bar if we've completed the fetch normally
\r
514 af.setProgressBar("DAS Feature Fetching Complete", startTime);
\r
517 if (af != null && af.featureSettings != null)
\r
519 af.featureSettings.setTableData();
\r
522 if (getFeatSettings() != null)
\r
524 fsettings.complete();
\r
528 void featuresAdded(Set<SequenceI> seqs)
\r
532 // no gui to update with features.
\r
535 af.getFeatureRenderer().featuresAdded();
\r
537 int start = af.getViewport().getStartSeq();
\r
538 int end = af.getViewport().getEndSeq();
\r
540 for (index = start; index < end; index++)
\r
542 for (SequenceI seq : seqs)
\r
544 if (seq == af.getViewport().getAlignment().getSequenceAt(index)
\r
545 .getDatasetSequence())
\r
547 af.alignPanel.paintAlignment(true);
\r
555 Object[] nextSequence(jalviewSourceI dasSource, SequenceI seq)
\r
559 DBRefEntry[] uprefs = jalview.util.DBRefUtils.selectRefs(
\r
560 seq.getDBRef(), new String[]
\r
562 // jalview.datamodel.DBRefSource.PDB,
\r
563 jalview.datamodel.DBRefSource.UNIPROT,
\r
564 // jalview.datamodel.DBRefSource.EMBL - not tested on any EMBL coord
\r
567 // TODO: minimal list of DAS queries to make by querying with untyped ID if
\r
568 // distinct from any typed IDs
\r
570 List<DBRefEntry> ids = new ArrayList<DBRefEntry>();
\r
571 List<String> qstring = new ArrayList<String>();
\r
572 boolean dasCoordSysFound = false;
\r
574 if (uprefs != null)
\r
576 // do any of these ids match the source's coordinate system ?
\r
577 for (int j = 0; !dasCoordSysFound && j < uprefs.length; j++)
\r
580 for (COORDINATES csys : dasSource.getVersion().getCOORDINATES())
\r
582 if (jalview.util.DBRefUtils.isDasCoordinateSystem(
\r
583 csys.getAuthority(), uprefs[j]))
\r
585 debug("Launched fetcher for coordinate system "
\r
586 + csys.getAuthority());
\r
587 // Will have to pass any mapping information to the fetcher
\r
588 // - the start/end for the DBRefEntry may not be the same as the
\r
589 // sequence's start/end
\r
591 System.out.println(seq.getName() + " "
\r
592 + (seq.getDatasetSequence() == null) + " "
\r
595 dasCoordSysFound = true; // break's out of the loop
\r
596 ids.add(uprefs[j]);
\r
597 qstring.add(uprefs[j].getAccessionId());
\r
600 System.out.println("IGNORE " + csys.getAuthority());
\r
605 if (!dasCoordSysFound)
\r
608 // try and use the name as the sequence id
\r
609 if (seq.getName().indexOf("|") > -1)
\r
611 id = seq.getName().substring(seq.getName().lastIndexOf("|") + 1);
\r
612 if (id.trim().length() < 4)
\r
614 // hack - we regard a significant ID as being at least 4
\r
615 // non-whitespace characters
\r
616 id = seq.getName().substring(0, seq.getName().lastIndexOf("|"));
\r
617 if (id.indexOf("|") > -1)
\r
619 id = id.substring(id.lastIndexOf("|") + 1);
\r
625 id = seq.getName();
\r
629 DBRefEntry dbre = new DBRefEntry();
\r
630 dbre.setAccessionId(id);
\r
631 // Should try to call a general feature fetcher that
\r
632 // queries many sources with name to discover applicable ID references
\r
634 qstring.add(dbre.getAccessionId());
\r
638 return new Object[]
\r
643 * examine the given sequence feature to determine if it should actually be
\r
644 * turned into sequence annotation or database cross references rather than a
\r
645 * simple sequence feature.
\r
648 * the sequence to annotate
\r
650 * the jalview sequence feature generated from the DAS feature
\r
652 * the sequence feature attributes
\r
654 * the source that emitted the feature
\r
655 * @return true if feature was consumed as another kind of annotation.
\r
657 protected boolean parseSeqFeature(SequenceI seq, SequenceFeature f,
\r
658 FEATURE feature, jalviewSourceI source)
\r
660 SequenceI mseq = seq;
\r
661 while (seq.getDatasetSequence() != null)
\r
663 seq = seq.getDatasetSequence();
\r
665 if (f.getType() != null)
\r
667 String type = f.getType();
\r
668 if (type.equalsIgnoreCase("protein_name"))
\r
670 // parse name onto the alignment sequence or the dataset sequence.
\r
671 if (seq.getDescription() == null
\r
672 || seq.getDescription().trim().length() == 0)
\r
674 // could look at the note series to pick out the first long name, for
\r
675 // the moment just use the whole description string
\r
676 seq.setDescription(f.getDescription());
\r
678 if (mseq.getDescription() == null
\r
679 || mseq.getDescription().trim().length() == 0)
\r
681 // could look at the note series to pick out the first long name, for
\r
682 // the moment just use the whole description string
\r
683 mseq.setDescription(f.getDescription());
\r
687 // check if source has biosapiens or other sequence ontology label
\r
688 if (type.equalsIgnoreCase("DBXREF") || type.equalsIgnoreCase("DBREF"))
\r
690 // try to parse the accession out
\r
692 DBRefEntry dbr = new DBRefEntry();
\r
693 dbr.setVersion(source.getTitle());
\r
694 StringTokenizer st = new StringTokenizer(f.getDescription(), ":");
\r
695 if (st.hasMoreTokens())
\r
697 dbr.setSource(st.nextToken());
\r
699 if (st.hasMoreTokens())
\r
701 dbr.setAccessionId(st.nextToken());
\r
705 if (f.links != null && f.links.size() > 0)
\r
707 // feature is also appended to enable links to be seen.
\r
708 // TODO: consider extending dbrefs to have their own links ?
\r
709 // TODO: new feature: extract dbref links from DAS servers and add the
\r
710 // URL pattern to the list of DB name associated links in the user's
\r
712 // for the moment - just fix up the existing feature so it displays
\r
714 // f.setType(dbr.getSource());
\r
715 // f.setDescription();
\r
716 f.setValue("linkonly", Boolean.TRUE);
\r
717 // f.setDescription("");
\r
718 Vector newlinks = new Vector();
\r
719 Enumeration it = f.links.elements();
\r
720 while (it.hasMoreElements())
\r
723 UrlLink urllink = new UrlLink(elm = (String) it.nextElement());
\r
724 if (urllink.isValid())
\r
726 urllink.setLabel(f.getDescription());
\r
727 newlinks.addElement(urllink.toString());
\r
731 // couldn't parse the link properly. Keep it anyway - just in
\r
733 debug("couldn't parse link string - " + elm);
\r
734 newlinks.addElement(elm);
\r
737 f.links = newlinks;
\r
738 seq.addSequenceFeature(f);
\r
747 * creates a jalview sequence feature from a das feature document
\r
750 * @return sequence feature object created using dasfeature information
\r
752 SequenceFeature newSequenceFeature(FEATURE feat, String nickname)
\r
761 * Different qNames for a DAS Feature - are string keys to the HashMaps in
\r
762 * features "METHOD") || qName.equals("TYPE") || qName.equals("START") ||
\r
763 * qName.equals("END") || qName.equals("NOTE") || qName.equals("LINK") ||
\r
764 * qName.equals("SCORE")
\r
766 String desc = new String();
\r
767 if (feat.getNOTE() != null)
\r
769 for (String note : feat.getNOTE())
\r
771 desc += (String) note;
\r
775 int start = 0, end = 0;
\r
780 start = Integer.parseInt(feat.getSTART().toString());
\r
781 } catch (Exception ex)
\r
786 end = Integer.parseInt(feat.getEND().toString());
\r
787 } catch (Exception ex)
\r
792 Object scr = feat.getSCORE();
\r
795 score = (float) Double.parseDouble(scr.toString());
\r
798 } catch (Exception ex)
\r
802 SequenceFeature f = new SequenceFeature(
\r
803 getTypeString(feat.getTYPE()), desc, start, end, score,
\r
806 if (feat.getLINK() != null)
\r
808 for (LINK link : feat.getLINK())
\r
810 // Do not put feature extent in link text for non-positional features
\r
811 if (f.begin == 0 && f.end == 0)
\r
813 f.addLink(f.getType() + " " + link.getContent() + "|"
\r
818 f.addLink(f.getType() + " " + f.begin + "_" + f.end + " "
\r
819 + link.getContent() + "|" + link.getHref());
\r
825 } catch (Exception e)
\r
827 System.out.println("ERRR " + e);
\r
828 e.printStackTrace();
\r
829 System.out.println("############");
\r
830 debug("Failed to parse " + feat.toString(), e);
\r
835 private String getTypeString(TYPE type)
\r
837 return type.getContent();
\r