2 * Jalview - A Sequence Alignment Editor and Viewer (Version 2.6)
\r
3 * Copyright (C) 2010 J Procter, AM Waterhouse, G Barton, M Clamp, S Searle
\r
5 * This file is part of Jalview.
\r
7 * Jalview is free software: you can redistribute it and/or
\r
8 * modify it under the terms of the GNU General Public License
\r
9 * as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
\r
11 * Jalview is distributed in the hope that it will be useful, but
\r
12 * WITHOUT ANY WARRANTY; without even the implied warranty
\r
13 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
\r
14 * PURPOSE. See the GNU General Public License for more details.
\r
16 * You should have received a copy of the GNU General Public License along with Jalview. If not, see <http://www.gnu.org/licenses/>.
\r
23 import javax.swing.*;
\r
25 import org.biojava.dasobert.das.*;
\r
26 import org.biojava.dasobert.das2.*;
\r
27 import org.biojava.dasobert.das2.io.*;
\r
28 import org.biojava.dasobert.dasregistry.*;
\r
29 import org.biojava.dasobert.eventmodel.*;
\r
30 import jalview.bin.Cache;
\r
31 import jalview.datamodel.*;
\r
32 import jalview.gui.*;
\r
33 import jalview.util.UrlLink;
\r
39 * @version $Revision$
\r
41 public class DasSequenceFeatureFetcher
\r
43 SequenceI[] sequences;
\r
47 FeatureSettings fsettings;
\r
49 StringBuffer sbuffer = new StringBuffer();
\r
51 Vector selectedSources;
\r
53 boolean cancelled = false;
\r
55 private void debug(String mesg)
\r
60 private void debug(String mesg, Exception e)
\r
62 if (Cache.log != null)
\r
64 Cache.log.debug(mesg, e);
\r
68 System.err.println(mesg);
\r
71 e.printStackTrace();
\r
79 * Creates a new SequenceFeatureFetcher object. Uses default
\r
86 public DasSequenceFeatureFetcher(SequenceI[] sequences,
\r
87 FeatureSettings fsettings, Vector selectedSources)
\r
89 this(sequences, fsettings, selectedSources, true, true);
\r
92 public DasSequenceFeatureFetcher(SequenceI[] oursequences,
\r
93 FeatureSettings fsettings, Vector ourselectedSources,
\r
94 boolean checkDbrefs, boolean promptFetchDbrefs)
\r
96 this.selectedSources = new Vector();
\r
97 Enumeration sources = ourselectedSources.elements();
\r
98 // filter both sequences and sources to eliminate duplicates
\r
99 while (sources.hasMoreElements())
\r
101 Object src = sources.nextElement();
\r
102 if (!selectedSources.contains(src))
\r
104 selectedSources.addElement(src);
\r
108 Vector sqs = new Vector();
\r
109 for (int i = 0; i < oursequences.length; i++)
\r
111 if (!sqs.contains(oursequences[i]))
\r
113 sqs.addElement(oursequences[i]);
\r
116 sequences = new SequenceI[sqs.size()];
\r
117 for (int i = 0; i < sequences.length; i++)
\r
119 sequences[i] = (SequenceI) sqs.elementAt(i);
\r
121 if (fsettings != null)
\r
123 this.fsettings = fsettings;
\r
124 this.af = fsettings.af;
\r
125 af.setShowSeqFeatures(true);
\r
127 int uniprotCount = 0;
\r
128 for (int i = 0; i < selectedSources.size(); i++)
\r
130 DasSource source = (DasSource) selectedSources.elementAt(i);
\r
131 DasCoordinateSystem[] coords = source.getCoordinateSystem();
\r
132 for (int c = 0; c < coords.length; c++)
\r
134 // TODO: match UniProt coord system canonically (?) - does
\r
135 // UniProt==uniprot==UNIPROT ?
\r
136 if (coords[c].getName().indexOf("UniProt") > -1)
\r
145 for (int i = 0; i < sequences.length; i++)
\r
147 DBRefEntry[] dbref = sequences[i].getDBRef();
\r
150 for (int j = 0; j < dbref.length; j++)
\r
152 if (dbref[j].getSource().equals(
\r
153 jalview.datamodel.DBRefSource.UNIPROT))
\r
162 if (checkDbrefs && refCount < sequences.length && uniprotCount > 0)
\r
165 int reply = JOptionPane.YES_OPTION;
\r
166 if (promptFetchDbrefs)
\r
168 reply = JOptionPane
\r
169 .showInternalConfirmDialog(
\r
171 "Do you want Jalview to find\n"
\r
172 + "Uniprot Accession ids for given sequence names?",
\r
173 "Find Uniprot Accession Ids",
\r
174 JOptionPane.YES_NO_OPTION,
\r
175 JOptionPane.QUESTION_MESSAGE);
\r
178 if (reply == JOptionPane.YES_OPTION)
\r
180 Thread thread = new Thread(new FetchDBRefs());
\r
195 class FetchDBRefs implements Runnable
\r
199 new DBRefFetcher(sequences, af).fetchDBRefs(true);
\r
205 * Spawns a number of dasobert Fetcher threads to add features to sequences in
\r
208 void startFetching()
\r
211 startTime = System.currentTimeMillis();
\r
214 af.setProgressBar("Fetching DAS Sequence Features", startTime);
\r
217 if (selectedSources == null || selectedSources.size() == 0)
\r
221 DasSource[] sources = new jalview.gui.DasSourceBrowser()
\r
224 String active = jalview.bin.Cache.getDefault("DAS_ACTIVE_SOURCE",
\r
226 StringTokenizer st = new StringTokenizer(active, "\t");
\r
227 selectedSources = new Vector();
\r
229 while (st.hasMoreTokens())
\r
231 token = st.nextToken();
\r
232 for (int i = 0; i < sources.length; i++)
\r
234 if (sources[i].getNickname().equals(token))
\r
236 selectedSources.addElement(sources[i]);
\r
241 } catch (Exception ex)
\r
243 debug("Exception whilst setting default feature sources from registry and local preferences.",
\r
248 if (selectedSources == null || selectedSources.size() == 0)
\r
250 System.out.println("No DAS Sources active");
\r
252 setGuiNoDassourceActive();
\r
256 sourcesRemaining = selectedSources.size();
\r
257 // Now sending requests one at a time to each server
\r
258 for (int sourceIndex = 0; sourceIndex < selectedSources.size()
\r
259 && !cancelled; sourceIndex++)
\r
261 DasSource dasSource = (DasSource) selectedSources
\r
262 .elementAt(sourceIndex);
\r
264 nextSequence(dasSource, sequences[0]);
\r
268 private void setGuiNoDassourceActive()
\r
273 af.setProgressBar("No DAS Sources Active", startTime);
\r
275 if (getFeatSettings() != null)
\r
277 fsettings.noDasSourceActive();
\r
282 * Update our fsettings dialog reference if we didn't have one when we were
\r
283 * first initialised.
\r
285 * @return fsettings
\r
287 private FeatureSettings getFeatSettings()
\r
289 if (fsettings == null)
\r
293 fsettings = af.featureSettings;
\r
299 public void cancel()
\r
303 af.setProgressBar("DAS Feature Fetching Cancelled", startTime);
\r
308 int sourcesRemaining = 0;
\r
310 void responseComplete(DasSource dasSource, SequenceI seq)
\r
314 for (int seqIndex = 0; seqIndex < sequences.length - 1 && !cancelled; seqIndex++)
\r
316 if (sequences[seqIndex] == seq)
\r
318 nextSequence(dasSource, sequences[++seqIndex]);
\r
324 sourcesRemaining--;
\r
326 if (sourcesRemaining == 0)
\r
328 System.err.println("Fetching Complete.");
\r
329 setGuiFetchComplete();
\r
334 private void setGuiFetchComplete()
\r
337 if (!cancelled && af != null)
\r
339 // only update the progress bar if we've completed the fetch normally
\r
340 af.setProgressBar("DAS Feature Fetching Complete", startTime);
\r
343 if (af != null && af.featureSettings != null)
\r
345 af.featureSettings.setTableData();
\r
348 if (getFeatSettings() != null)
\r
350 fsettings.complete();
\r
354 void featuresAdded(SequenceI seq)
\r
358 // no gui to update with features.
\r
361 af.getFeatureRenderer().featuresAdded();
\r
363 int start = af.getViewport().getStartSeq();
\r
364 int end = af.getViewport().getEndSeq();
\r
366 for (index = start; index < end; index++)
\r
368 if (seq == af.getViewport().getAlignment().getSequenceAt(index)
\r
369 .getDatasetSequence())
\r
371 af.alignPanel.paintAlignment(true);
\r
377 void nextSequence(DasSource dasSource, SequenceI seq)
\r
381 DBRefEntry[] uprefs = jalview.util.DBRefUtils.selectRefs(
\r
382 seq.getDBRef(), new String[]
\r
384 // jalview.datamodel.DBRefSource.PDB,
\r
385 jalview.datamodel.DBRefSource.UNIPROT,
\r
386 // jalview.datamodel.DBRefSource.EMBL - not tested on any EMBL coord
\r
389 // TODO: minimal list of DAS queries to make by querying with untyped ID if
\r
390 // distinct from any typed IDs
\r
392 boolean dasCoordSysFound = false;
\r
394 if (uprefs != null)
\r
396 // do any of these ids match the source's coordinate system ?
\r
397 for (int j = 0; !dasCoordSysFound && j < uprefs.length; j++)
\r
399 DasCoordinateSystem cs[] = dasSource.getCoordinateSystem();
\r
401 for (int csIndex = 0; csIndex < cs.length && !dasCoordSysFound; csIndex++)
\r
404 && jalview.util.DBRefUtils.isDasCoordinateSystem(
\r
405 cs[csIndex].getName(), uprefs[j]))
\r
407 debug("Launched fetcher for coordinate system "
\r
408 + cs[0].getName());
\r
409 // Will have to pass any mapping information to the fetcher
\r
410 // - the start/end for the DBRefEntry may not be the same as the
\r
411 // sequence's start/end
\r
413 System.out.println(seq.getName() + " "
\r
414 + (seq.getDatasetSequence() == null) + " "
\r
415 + dasSource.getUrl());
\r
417 dasCoordSysFound = true; // break's out of the loop
\r
418 createFeatureFetcher(seq, dasSource, uprefs[j]);
\r
421 System.out.println("IGNORE " + cs[csIndex].getName());
\r
426 if (!dasCoordSysFound)
\r
429 // try and use the name as the sequence id
\r
430 if (seq.getName().indexOf("|") > -1)
\r
432 id = seq.getName().substring(seq.getName().lastIndexOf("|") + 1);
\r
433 if (id.trim().length() < 4)
\r
435 // hack - we regard a significant ID as being at least 4
\r
436 // non-whitespace characters
\r
437 id = seq.getName().substring(0, seq.getName().lastIndexOf("|"));
\r
438 if (id.indexOf("|") > -1)
\r
440 id = id.substring(id.lastIndexOf("|") + 1);
\r
446 id = seq.getName();
\r
450 // Should try to call a general feature fetcher that
\r
451 // queries many sources with name to discover applicable ID references
\r
452 createFeatureFetcher(seq, dasSource, id);
\r
459 * fetch and add das features to a sequence using the given source URL and
\r
460 * compatible DbRef id. new features are mapped using the DbRef mapping to the
\r
461 * local coordinate system.
\r
467 protected void createFeatureFetcher(final SequenceI seq,
\r
468 final DasSource dasSource, final DBRefEntry dbref)
\r
472 // / fetch DAS features
\r
473 final Das1Source source = new Das1Source();
\r
474 source.setUrl(dasSource.getUrl());
\r
475 source.setNickname(dasSource.getNickname());
\r
476 if (dbref == null || dbref.getAccessionId() == null
\r
477 || dbref.getAccessionId().length() < 1)
\r
479 responseComplete(dasSource, seq); // reduce thread count anyhow
\r
482 debug("new Das Feature Fetcher for " + dbref.getSource() + ":"
\r
483 + dbref.getAccessionId() + " querying " + dasSource.getUrl());
\r
484 FeatureThread fetcher = new FeatureThread(dbref.getAccessionId()
\r
485 // + ":" + start + "," + end,
\r
488 fetcher.addFeatureListener(new FeatureListener()
\r
490 public void comeBackLater(FeatureEvent e)
\r
492 responseComplete(dasSource, seq);
\r
493 debug("das source " + e.getSource().getNickname()
\r
494 + " asked us to come back in " + e.getComeBackLater()
\r
498 public void newFeatures(FeatureEvent e)
\r
501 Das1Source ds = e.getSource();
\r
503 Map[] features = e.getFeatures();
\r
504 // add features to sequence
\r
505 debug("das source " + ds.getUrl() + " returned " + features.length
\r
508 if (features.length > 0)
\r
510 for (int i = 0; i < features.length; i++)
\r
512 // standard DAS feature-> jalview sequence feature transformation
\r
513 SequenceFeature f = newSequenceFeature(features[i],
\r
514 source.getNickname());
\r
515 if (!parseSeqFeature(seq, f, features[i], source))
\r
517 if (dbref.getMap() != null && f.getBegin() > 0
\r
520 debug("mapping from " + f.getBegin() + " - " + f.getEnd());
\r
521 SequenceFeature vf[] = null;
\r
525 vf = dbref.getMap().locateFeature(f);
\r
526 } catch (Exception ex)
\r
529 .info("Error in 'experimental' mapping of features. Please try to reproduce and then report info to jalview-discuss@jalview.org.");
\r
530 Cache.log.info("Mapping feature from " + f.getBegin()
\r
531 + " to " + f.getEnd() + " in dbref "
\r
532 + dbref.getAccessionId() + " in "
\r
533 + dbref.getSource());
\r
534 Cache.log.info("using das Source " + ds.getUrl());
\r
535 Cache.log.info("Exception", ex);
\r
540 for (int v = 0; v < vf.length; v++)
\r
542 debug("mapping to " + v + ": " + vf[v].getBegin()
\r
543 + " - " + vf[v].getEnd());
\r
544 seq.addSequenceFeature(vf[v]);
\r
550 seq.addSequenceFeature(f);
\r
554 featuresAdded(seq);
\r
558 // System.out.println("No features found for " + seq.getName()
\r
559 // + " from: " + e.getDasSource().getNickname());
\r
561 responseComplete(dasSource, seq);
\r
571 protected void createFeatureFetcher(final SequenceI seq,
\r
572 final DasSource dasSource, String id)
\r
575 // / fetch DAS features
\r
576 final Das1Source source = new Das1Source();
\r
577 source.setUrl(dasSource.getUrl());
\r
578 source.setNickname(dasSource.getNickname());
\r
584 if (id != null && id.length() > 0)
\r
586 debug("new Das Feature Fetcher for " + id + " querying "
\r
587 + dasSource.getUrl());
\r
588 FeatureThread fetcher = new FeatureThread(id
\r
589 // + ":" + start + "," + end,
\r
592 fetcher.addFeatureListener(new FeatureListener()
\r
594 public void comeBackLater(FeatureEvent e)
\r
596 responseComplete(dasSource, seq);
\r
597 debug("das source " + e.getSource().getNickname()
\r
598 + " asked us to come back in " + e.getComeBackLater()
\r
602 public void newFeatures(FeatureEvent e)
\r
605 Das1Source ds = e.getSource();
\r
607 Map[] features = e.getFeatures();
\r
608 // add features to sequence
\r
609 debug("das source " + ds.getUrl() + " returned "
\r
610 + features.length + " features");
\r
612 if (features.length > 0)
\r
614 for (int i = 0; i < features.length; i++)
\r
616 // standard DAS feature-> jalview sequence feature transformation
\r
617 SequenceFeature f = newSequenceFeature(features[i],
\r
618 source.getNickname());
\r
619 if (!parseSeqFeature(seq, f, features[i], source))
\r
621 // just add as a simple sequence feature
\r
622 seq.addSequenceFeature(f);
\r
626 featuresAdded(seq);
\r
630 // System.out.println("No features found for " + seq.getName()
\r
631 // + " from: " + e.getDasSource().getNickname());
\r
633 responseComplete(dasSource, seq);
\r
644 // invalid fetch - indicate it is finished.
\r
645 debug("Skipping empty ID for querying " + dasSource.getUrl());
\r
646 responseComplete(dasSource, seq);
\r
651 * examine the given sequence feature to determine if it should actually be
\r
652 * turned into sequence annotation or database cross references rather than a
\r
653 * simple sequence feature.
\r
656 * the sequence to annotate
\r
658 * the jalview sequence feature generated from the DAS feature
\r
660 * the sequence feature attributes
\r
662 * the source that emitted the feature
\r
663 * @return true if feature was consumed as another kind of annotation.
\r
665 protected boolean parseSeqFeature(SequenceI seq, SequenceFeature f,
\r
666 Map map, Das1Source source)
\r
668 SequenceI mseq = seq;
\r
669 while (seq.getDatasetSequence() != null)
\r
671 seq = seq.getDatasetSequence();
\r
673 if (f.getType() != null)
\r
675 String type = f.getType();
\r
676 if (type.equalsIgnoreCase("protein_name"))
\r
678 // parse name onto the alignment sequence or the dataset sequence.
\r
679 if (seq.getDescription() == null
\r
680 || seq.getDescription().trim().length() == 0)
\r
682 // could look at the note series to pick out the first long name, for
\r
683 // the moment just use the whole description string
\r
684 seq.setDescription(f.getDescription());
\r
686 if (mseq.getDescription() == null
\r
687 || mseq.getDescription().trim().length() == 0)
\r
689 // could look at the note series to pick out the first long name, for
\r
690 // the moment just use the whole description string
\r
691 mseq.setDescription(f.getDescription());
\r
695 // check if source has biosapiens or other sequence ontology label
\r
696 if (type.equalsIgnoreCase("DBXREF") || type.equalsIgnoreCase("DBREF"))
\r
698 // try to parse the accession out
\r
700 DBRefEntry dbr = new DBRefEntry();
\r
701 dbr.setVersion(source.getNickname());
\r
702 StringTokenizer st = new StringTokenizer(f.getDescription(), ":");
\r
703 if (st.hasMoreTokens())
\r
705 dbr.setSource(st.nextToken());
\r
707 if (st.hasMoreTokens())
\r
709 dbr.setAccessionId(st.nextToken());
\r
713 if (f.links != null && f.links.size() > 0)
\r
715 // feature is also appended to enable links to be seen.
\r
716 // TODO: consider extending dbrefs to have their own links ?
\r
717 // TODO: new feature: extract dbref links from DAS servers and add the
\r
718 // URL pattern to the list of DB name associated links in the user's
\r
720 // for the moment - just fix up the existing feature so it displays
\r
722 // f.setType(dbr.getSource());
\r
723 // f.setDescription();
\r
724 f.setValue("linkonly", Boolean.TRUE);
\r
725 // f.setDescription("");
\r
726 Vector newlinks = new Vector();
\r
727 Enumeration it = f.links.elements();
\r
728 while (it.hasMoreElements())
\r
731 UrlLink urllink = new UrlLink(elm = (String) it.nextElement());
\r
732 if (urllink.isValid())
\r
734 urllink.setLabel(f.getDescription());
\r
735 newlinks.addElement(urllink.toString());
\r
739 // couldn't parse the link properly. Keep it anyway - just in
\r
741 debug("couldn't parse link string - " + elm);
\r
742 newlinks.addElement(elm);
\r
745 f.links = newlinks;
\r
746 seq.addSequenceFeature(f);
\r
755 * creates a jalview sequence feature from a das feature document
\r
757 * @param dasfeature
\r
758 * @return sequence feature object created using dasfeature information
\r
760 SequenceFeature newSequenceFeature(Map dasfeature, String nickname)
\r
762 if (dasfeature == null)
\r
769 * Different qNames for a DAS Feature - are string keys to the HashMaps in
\r
770 * features "METHOD") || qName.equals("TYPE") || qName.equals("START") ||
\r
771 * qName.equals("END") || qName.equals("NOTE") || qName.equals("LINK") ||
\r
772 * qName.equals("SCORE")
\r
774 String desc = new String();
\r
775 if (dasfeature.containsKey("NOTE"))
\r
777 desc += (String) dasfeature.get("NOTE");
\r
780 int start = 0, end = 0;
\r
785 start = Integer.parseInt(dasfeature.get("START").toString());
\r
786 } catch (Exception ex)
\r
791 end = Integer.parseInt(dasfeature.get("END").toString());
\r
792 } catch (Exception ex)
\r
797 Object scr = dasfeature.get("SCORE");
\r
800 score = (float) Double.parseDouble(scr.toString());
\r
803 } catch (Exception ex)
\r
807 SequenceFeature f = new SequenceFeature(
\r
808 (String) dasfeature.get("TYPE"), desc, start, end, score,
\r
811 if (dasfeature.containsKey("LINK"))
\r
813 // Do not put feature extent in link text for non-positional features
\r
814 if (f.begin == 0 && f.end == 0)
\r
816 f.addLink(f.getType() + "|" + dasfeature.get("LINK"));
\r
820 f.addLink(f.getType() + " " + f.begin + "_" + f.end + "|"
\r
821 + dasfeature.get("LINK"));
\r
826 } catch (Exception e)
\r
828 System.out.println("ERRR " + e);
\r
829 e.printStackTrace();
\r
830 System.out.println("############");
\r
831 debug("Failed to parse " + dasfeature.toString(), e);
\r
837 * query the default DAS Source Registry for sources. Uses value of jalview
\r
838 * property DAS_REGISTRY_URL and the DasSourceBrowser.DEFAULT_REGISTRY if that
\r
841 * @return list of sources
\r
843 public static DasSource[] getDASSources()
\r
846 String registryURL = jalview.bin.Cache.getDefault("DAS_REGISTRY_URL",
\r
847 DasSourceBrowser.DEFAULT_REGISTRY);
\r
848 return getDASSources(registryURL);
\r
852 * query the given URL for DasSources.
\r
854 * @param registryURL
\r
855 * return sources from registryURL
\r
857 public static DasSource[] getDASSources(String registryURL)
\r
859 DasSourceReaderImpl reader = new DasSourceReaderImpl();
\r
863 URL url = new URL(registryURL);
\r
865 DasSource[] sources = reader.readDasSource(url);
\r
867 List das1sources = new ArrayList();
\r
868 for (int i = 0; i < sources.length; i++)
\r
870 DasSource ds = sources[i];
\r
871 if (ds instanceof Das2Source)
\r
873 Das2Source d2s = (Das2Source) ds;
\r
874 if (d2s.hasDas1Capabilities())
\r
876 Das1Source d1s = DasSourceConverter.toDas1Source(d2s);
\r
877 das1sources.add(d1s);
\r
881 else if (ds instanceof Das1Source)
\r
883 das1sources.add((Das1Source) ds);
\r
887 return (Das1Source[]) das1sources.toArray(new Das1Source[das1sources
\r
889 } catch (Exception ex)
\r
891 System.err.println("Failed to contact DAS1 registry at "
\r
893 ex.printStackTrace();
\r