X-Git-Url: http://source.jalview.org/gitweb/?p=jalview.git;a=blobdiff_plain;f=src%2Fjalview%2Fws%2FDasSequenceFeatureFetcher.java;h=fd4d09230a9a13880692e056038cede8d9184adc;hp=e3f74782b08301de33caa373acb9688df01902f9;hb=797df64fa2a0a30773d0f48f5494d4155e5a8be3;hpb=a32b2c8d6f9d68da9fdcd07e2557c62c90fed630 diff --git a/src/jalview/ws/DasSequenceFeatureFetcher.java b/src/jalview/ws/DasSequenceFeatureFetcher.java index e3f7478..fd4d092 100644 --- a/src/jalview/ws/DasSequenceFeatureFetcher.java +++ b/src/jalview/ws/DasSequenceFeatureFetcher.java @@ -1,20 +1,19 @@ /* - * Jalview - A Sequence Alignment Editor and Viewer - * Copyright (C) 2007 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + * Jalview - A Sequence Alignment Editor and Viewer (Version 2.7) + * Copyright (C) 2011 J Procter, AM Waterhouse, J Engelhardt, LM Lui, G Barton, M Clamp, S Searle + * + * This file is part of Jalview. + * + * Jalview is free software: you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. + * + * Jalview is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along with Jalview. If not, see . */ package jalview.ws; @@ -31,10 +30,11 @@ import org.biojava.dasobert.eventmodel.*; import jalview.bin.Cache; import jalview.datamodel.*; import jalview.gui.*; +import jalview.util.UrlLink; /** * DOCUMENT ME! - * + * * @author $author$ * @version $Revision$ */ @@ -51,47 +51,78 @@ public class DasSequenceFeatureFetcher Vector selectedSources; boolean cancelled = false; + private void debug(String mesg) { debug(mesg, null); } + private void debug(String mesg, Exception e) { - if (Cache.log!=null) + if (Cache.log != null) { Cache.log.debug(mesg, e); - } else { + } + else + { System.err.println(mesg); - if (e!=null) - { - e.printStackTrace(); + if (e != null) + { + e.printStackTrace(); } } } + long startTime; /** - * Creates a new SequenceFeatureFetcher object. - * Uses default - * - * @param align DOCUMENT ME! - * @param ap DOCUMENT ME! + * Creates a new SequenceFeatureFetcher object. Uses default + * + * @param align + * DOCUMENT ME! + * @param ap + * DOCUMENT ME! */ public DasSequenceFeatureFetcher(SequenceI[] sequences, FeatureSettings fsettings, Vector selectedSources) { this(sequences, fsettings, selectedSources, true, true); } - public DasSequenceFeatureFetcher(SequenceI[] sequences, - FeatureSettings fsettings, Vector selectedSources, boolean checkDbrefs, boolean promptFetchDbrefs) + + public DasSequenceFeatureFetcher(SequenceI[] oursequences, + FeatureSettings fsettings, Vector ourselectedSources, + boolean checkDbrefs, boolean promptFetchDbrefs) { - this.selectedSources = selectedSources; - this.sequences = sequences; - if (fsettings!=null) + this.selectedSources = new Vector(); + Enumeration sources = ourselectedSources.elements(); + // filter both sequences and sources to eliminate duplicates + while (sources.hasMoreElements()) + { + Object src = sources.nextElement(); + if (!selectedSources.contains(src)) + { + selectedSources.addElement(src); + } + ; + } + Vector sqs = new Vector(); + for (int i = 0; i < oursequences.length; i++) + { + if (!sqs.contains(oursequences[i])) + { + sqs.addElement(oursequences[i]); + } + } + sequences = new SequenceI[sqs.size()]; + for (int i = 0; i < sequences.length; i++) + { + sequences[i] = (SequenceI) sqs.elementAt(i); + } + if (fsettings != null) { this.fsettings = fsettings; this.af = fsettings.af; - af.getViewport().setShowSequenceFeatures(true); + af.setShowSeqFeatures(true); } int uniprotCount = 0; for (int i = 0; i < selectedSources.size(); i++) @@ -100,7 +131,8 @@ public class DasSequenceFeatureFetcher DasCoordinateSystem[] coords = source.getCoordinateSystem(); for (int c = 0; c < coords.length; c++) { - // TODO: match UniProt coord system canonically (?) - does UniProt==uniprot==UNIPROT ? + // TODO: match UniProt coord system canonically (?) - does + // UniProt==uniprot==UNIPROT ? if (coords[c].getName().indexOf("UniProt") > -1) { uniprotCount++; @@ -133,13 +165,15 @@ public class DasSequenceFeatureFetcher int reply = JOptionPane.YES_OPTION; if (promptFetchDbrefs) { - reply = JOptionPane.showInternalConfirmDialog(Desktop.desktop, - "Do you want Jalview to find\n" - + "Uniprot Accession ids for given sequence names?", - "Find Uniprot Accession Ids", JOptionPane.YES_NO_OPTION, - JOptionPane.QUESTION_MESSAGE); + reply = JOptionPane + .showInternalConfirmDialog( + Desktop.desktop, + "Do you want Jalview to find\n" + + "Uniprot Accession ids for given sequence names?", + "Find Uniprot Accession Ids", + JOptionPane.YES_NO_OPTION, + JOptionPane.QUESTION_MESSAGE); } - if (reply == JOptionPane.YES_OPTION) { @@ -168,37 +202,46 @@ public class DasSequenceFeatureFetcher } /** - * Spawns a number of dasobert Fetcher threads to add features to sequences in the dataset + * Spawns a number of dasobert Fetcher threads to add features to sequences in + * the dataset */ void startFetching() { cancelled = false; startTime = System.currentTimeMillis(); - if (af!=null) + if (af != null) { af.setProgressBar("Fetching DAS Sequence Features", startTime); } - DasSource[] sources = new jalview.gui.DasSourceBrowser().getDASSource(); - if (selectedSources == null || selectedSources.size() == 0) { - String active = jalview.bin.Cache.getDefault("DAS_ACTIVE_SOURCE", - "uniprot"); - StringTokenizer st = new StringTokenizer(active, "\t"); - Vector selectedSources = new Vector(); - String token; - while (st.hasMoreTokens()) + try { - token = st.nextToken(); - for (int i = 0; i < sources.length; i++) + DasSource[] sources = new jalview.gui.DasSourceBrowser() + .getDASSource(); + + String active = jalview.bin.Cache.getDefault("DAS_ACTIVE_SOURCE", + "uniprot"); + StringTokenizer st = new StringTokenizer(active, "\t"); + selectedSources = new Vector(); + String token; + while (st.hasMoreTokens()) { - if (sources[i].getNickname().equals(token)) + token = st.nextToken(); + for (int i = 0; i < sources.length; i++) { - selectedSources.addElement(sources[i]); - break; + if (sources[i].getNickname().equals(token)) + { + selectedSources.addElement(sources[i]); + break; + } } } + } catch (Exception ex) + { + debug("Exception whilst setting default feature sources from registry and local preferences.", + ex); } } @@ -211,7 +254,7 @@ public class DasSequenceFeatureFetcher } sourcesRemaining = selectedSources.size(); - //Now sending requests one at a time to each server + // Now sending requests one at a time to each server for (int sourceIndex = 0; sourceIndex < selectedSources.size() && !cancelled; sourceIndex++) { @@ -225,18 +268,20 @@ public class DasSequenceFeatureFetcher private void setGuiNoDassourceActive() { - if (af!=null) + if (af != null) { af.setProgressBar("No DAS Sources Active", startTime); } - if (getFeatSettings()!=null) + if (getFeatSettings() != null) { fsettings.noDasSourceActive(); - } + } } /** - * Update our fsettings dialog reference if we didn't have one when we were first initialised. + * Update our fsettings dialog reference if we didn't have one when we were + * first initialised. + * * @return fsettings */ private FeatureSettings getFeatSettings() @@ -253,7 +298,7 @@ public class DasSequenceFeatureFetcher public void cancel() { - if (af!=null) + if (af != null) { af.setProgressBar("DAS Feature Fetching Cancelled", startTime); } @@ -289,8 +334,9 @@ public class DasSequenceFeatureFetcher private void setGuiFetchComplete() { - if (af != null) + if (!cancelled && af != null) { + // only update the progress bar if we've completed the fetch normally af.setProgressBar("DAS Feature Fetching Complete", startTime); } @@ -307,7 +353,7 @@ public class DasSequenceFeatureFetcher void featuresAdded(SequenceI seq) { - if (af==null) + if (af == null) { // no gui to update with features. return; @@ -335,11 +381,13 @@ public class DasSequenceFeatureFetcher DBRefEntry[] uprefs = jalview.util.DBRefUtils.selectRefs( seq.getDBRef(), new String[] { - // jalview.datamodel.DBRefSource.PDB, + // jalview.datamodel.DBRefSource.PDB, jalview.datamodel.DBRefSource.UNIPROT, - // jalview.datamodel.DBRefSource.EMBL - not tested on any EMBL coord sys sources + // jalview.datamodel.DBRefSource.EMBL - not tested on any EMBL coord + // sys sources }); - // TODO: minimal list of DAS queries to make by querying with untyped ID if distinct from any typed IDs + // TODO: minimal list of DAS queries to make by querying with untyped ID if + // distinct from any typed IDs boolean dasCoordSysFound = false; @@ -358,8 +406,9 @@ public class DasSequenceFeatureFetcher { debug("Launched fetcher for coordinate system " + cs[0].getName()); - // Will have to pass any mapping information to the fetcher - //- the start/end for the DBRefEntry may not be the same as the sequence's start/end + // Will have to pass any mapping information to the fetcher + // - the start/end for the DBRefEntry may not be the same as the + // sequence's start/end System.out.println(seq.getName() + " " + (seq.getDatasetSequence() == null) + " " @@ -381,13 +430,14 @@ public class DasSequenceFeatureFetcher if (seq.getName().indexOf("|") > -1) { id = seq.getName().substring(seq.getName().lastIndexOf("|") + 1); - if (id.trim().length()<4) + if (id.trim().length() < 4) { - // hack - we regard a significant ID as being at least 4 non-whitespace characters + // hack - we regard a significant ID as being at least 4 + // non-whitespace characters id = seq.getName().substring(0, seq.getName().lastIndexOf("|")); - if (id.indexOf("|")>-1) + if (id.indexOf("|") > -1) { - id = id.substring(id.lastIndexOf("|")+1); + id = id.substring(id.lastIndexOf("|") + 1); } } } @@ -406,8 +456,10 @@ public class DasSequenceFeatureFetcher } /** - * fetch and add das features to a sequence using the given source URL and compatible DbRef id. - * new features are mapped using the DbRef mapping to the local coordinate system. + * fetch and add das features to a sequence using the given source URL and + * compatible DbRef id. new features are mapped using the DbRef mapping to the + * local coordinate system. + * * @param seq * @param SourceUrl * @param dbref @@ -416,8 +468,8 @@ public class DasSequenceFeatureFetcher final DasSource dasSource, final DBRefEntry dbref) { - ////////////// - /// fetch DAS features + // //////////// + // / fetch DAS features final Das1Source source = new Das1Source(); source.setUrl(dasSource.getUrl()); source.setNickname(dasSource.getNickname()); @@ -427,11 +479,10 @@ public class DasSequenceFeatureFetcher responseComplete(dasSource, seq); // reduce thread count anyhow return; } - debug("new Das Feature Fetcher for " + dbref.getSource() - + ":" + dbref.getAccessionId() + " querying " - + dasSource.getUrl()); + debug("new Das Feature Fetcher for " + dbref.getSource() + ":" + + dbref.getAccessionId() + " querying " + dasSource.getUrl()); FeatureThread fetcher = new FeatureThread(dbref.getAccessionId() - // + ":" + start + "," + end, + // + ":" + start + "," + end, , source); fetcher.addFeatureListener(new FeatureListener() @@ -451,59 +502,61 @@ public class DasSequenceFeatureFetcher Map[] features = e.getFeatures(); // add features to sequence - debug("das source " + ds.getUrl() + " returned " - + features.length + " features"); + debug("das source " + ds.getUrl() + " returned " + features.length + + " features"); if (features.length > 0) { for (int i = 0; i < features.length; i++) { - SequenceFeature f = newSequenceFeature(features[i], source - .getNickname()); - if (dbref.getMap() != null && f.getBegin() > 0 - && f.getEnd() > 0) + // standard DAS feature-> jalview sequence feature transformation + SequenceFeature f = newSequenceFeature(features[i], + source.getNickname()); + if (!parseSeqFeature(seq, f, features[i], source)) { - debug("mapping from " + f.getBegin() + " - " - + f.getEnd()); - SequenceFeature vf[] = null; - - try + if (dbref.getMap() != null && f.getBegin() > 0 + && f.getEnd() > 0) { - vf = dbref.getMap().locateFeature(f); - } catch (Exception ex) - { - Cache.log - .info("Error in 'experimental' mapping of features. Please try to reproduce and then report info to help@jalview.org."); - Cache.log.info("Mapping feature from " + f.getBegin() - + " to " + f.getEnd() + " in dbref " - + dbref.getAccessionId() + " in " - + dbref.getSource()); - Cache.log.info("using das Source " + ds.getUrl()); - Cache.log.info("Exception", ex); - } + debug("mapping from " + f.getBegin() + " - " + f.getEnd()); + SequenceFeature vf[] = null; - if (vf != null) - { - for (int v = 0; v < vf.length; v++) + try { - debug("mapping to " + v + ": " - + vf[v].getBegin() + " - " + vf[v].getEnd()); - seq.addSequenceFeature(vf[v]); + vf = dbref.getMap().locateFeature(f); + } catch (Exception ex) + { + Cache.log + .info("Error in 'experimental' mapping of features. Please try to reproduce and then report info to jalview-discuss@jalview.org."); + Cache.log.info("Mapping feature from " + f.getBegin() + + " to " + f.getEnd() + " in dbref " + + dbref.getAccessionId() + " in " + + dbref.getSource()); + Cache.log.info("using das Source " + ds.getUrl()); + Cache.log.info("Exception", ex); + } + + if (vf != null) + { + for (int v = 0; v < vf.length; v++) + { + debug("mapping to " + v + ": " + vf[v].getBegin() + + " - " + vf[v].getEnd()); + seq.addSequenceFeature(vf[v]); + } } } - } - else - { - seq.addSequenceFeature(f); + else + { + seq.addSequenceFeature(f); + } } } - featuresAdded(seq); } else { - // System.out.println("No features found for " + seq.getName() - // + " from: " + e.getDasSource().getNickname()); + // System.out.println("No features found for " + seq.getName() + // + " from: " + e.getDasSource().getNickname()); } responseComplete(dasSource, seq); @@ -518,13 +571,13 @@ public class DasSequenceFeatureFetcher protected void createFeatureFetcher(final SequenceI seq, final DasSource dasSource, String id) { - ////////////// - /// fetch DAS features + // //////////// + // / fetch DAS features final Das1Source source = new Das1Source(); source.setUrl(dasSource.getUrl()); source.setNickname(dasSource.getNickname()); - - if (id!=null) + + if (id != null) { id = id.trim(); } @@ -533,7 +586,7 @@ public class DasSequenceFeatureFetcher debug("new Das Feature Fetcher for " + id + " querying " + dasSource.getUrl()); FeatureThread fetcher = new FeatureThread(id - // + ":" + start + "," + end, + // + ":" + start + "," + end, , source); fetcher.addFeatureListener(new FeatureListener() @@ -560,18 +613,22 @@ public class DasSequenceFeatureFetcher { for (int i = 0; i < features.length; i++) { - SequenceFeature f = newSequenceFeature(features[i], source - .getNickname()); - - seq.addSequenceFeature(f); + // standard DAS feature-> jalview sequence feature transformation + SequenceFeature f = newSequenceFeature(features[i], + source.getNickname()); + if (!parseSeqFeature(seq, f, features[i], source)) + { + // just add as a simple sequence feature + seq.addSequenceFeature(f); + } } featuresAdded(seq); } else { - // System.out.println("No features found for " + seq.getName() - // + " from: " + e.getDasSource().getNickname()); + // System.out.println("No features found for " + seq.getName() + // + " from: " + e.getDasSource().getNickname()); } responseComplete(dasSource, seq); @@ -581,16 +638,122 @@ public class DasSequenceFeatureFetcher ); fetcher.start(); - } else { + } + else + { // invalid fetch - indicate it is finished. - debug("Skipping empty ID for querying " - + dasSource.getUrl()); + debug("Skipping empty ID for querying " + dasSource.getUrl()); responseComplete(dasSource, seq); } } /** + * examine the given sequence feature to determine if it should actually be + * turned into sequence annotation or database cross references rather than a + * simple sequence feature. + * + * @param seq + * the sequence to annotate + * @param f + * the jalview sequence feature generated from the DAS feature + * @param map + * the sequence feature attributes + * @param source + * the source that emitted the feature + * @return true if feature was consumed as another kind of annotation. + */ + protected boolean parseSeqFeature(SequenceI seq, SequenceFeature f, + Map map, Das1Source source) + { + SequenceI mseq = seq; + while (seq.getDatasetSequence() != null) + { + seq = seq.getDatasetSequence(); + } + if (f.getType() != null) + { + String type = f.getType(); + if (type.equalsIgnoreCase("protein_name")) + { + // parse name onto the alignment sequence or the dataset sequence. + if (seq.getDescription() == null + || seq.getDescription().trim().length() == 0) + { + // could look at the note series to pick out the first long name, for + // the moment just use the whole description string + seq.setDescription(f.getDescription()); + } + if (mseq.getDescription() == null + || mseq.getDescription().trim().length() == 0) + { + // could look at the note series to pick out the first long name, for + // the moment just use the whole description string + mseq.setDescription(f.getDescription()); + } + return true; + } + // check if source has biosapiens or other sequence ontology label + if (type.equalsIgnoreCase("DBXREF") || type.equalsIgnoreCase("DBREF")) + { + // try to parse the accession out + + DBRefEntry dbr = new DBRefEntry(); + dbr.setVersion(source.getNickname()); + StringTokenizer st = new StringTokenizer(f.getDescription(), ":"); + if (st.hasMoreTokens()) + { + dbr.setSource(st.nextToken()); + } + if (st.hasMoreTokens()) + { + dbr.setAccessionId(st.nextToken()); + } + seq.addDBRef(dbr); + + if (f.links != null && f.links.size() > 0) + { + // feature is also appended to enable links to be seen. + // TODO: consider extending dbrefs to have their own links ? + // TODO: new feature: extract dbref links from DAS servers and add the + // URL pattern to the list of DB name associated links in the user's + // preferences ? + // for the moment - just fix up the existing feature so it displays + // correctly. + // f.setType(dbr.getSource()); + // f.setDescription(); + f.setValue("linkonly", Boolean.TRUE); + // f.setDescription(""); + Vector newlinks = new Vector(); + Enumeration it = f.links.elements(); + while (it.hasMoreElements()) + { + String elm; + UrlLink urllink = new UrlLink(elm = (String) it.nextElement()); + if (urllink.isValid()) + { + urllink.setLabel(f.getDescription()); + newlinks.addElement(urllink.toString()); + } + else + { + // couldn't parse the link properly. Keep it anyway - just in + // case. + debug("couldn't parse link string - " + elm); + newlinks.addElement(elm); + } + } + f.links = newlinks; + seq.addSequenceFeature(f); + } + return true; + } + } + return false; + } + + /** * creates a jalview sequence feature from a das feature document + * * @param dasfeature * @return sequence feature object created using dasfeature information */ @@ -603,14 +766,10 @@ public class DasSequenceFeatureFetcher try { /** - * Different qNames for a DAS Feature - are string keys to the HashMaps in features - * "METHOD") || - qName.equals("TYPE") || - qName.equals("START") || - qName.equals("END") || - qName.equals("NOTE") || - qName.equals("LINK") || - qName.equals("SCORE") + * Different qNames for a DAS Feature - are string keys to the HashMaps in + * features "METHOD") || qName.equals("TYPE") || qName.equals("START") || + * qName.equals("END") || qName.equals("NOTE") || qName.equals("LINK") || + * qName.equals("SCORE") */ String desc = new String(); if (dasfeature.containsKey("NOTE")) @@ -635,18 +794,32 @@ public class DasSequenceFeatureFetcher } try { - score = Integer.parseInt(dasfeature.get("SCORE").toString()); + Object scr = dasfeature.get("SCORE"); + if (scr != null) + { + score = (float) Double.parseDouble(scr.toString()); + + } } catch (Exception ex) { } - SequenceFeature f = new SequenceFeature((String) dasfeature - .get("TYPE"), desc, start, end, score, nickname); + SequenceFeature f = new SequenceFeature( + (String) dasfeature.get("TYPE"), desc, start, end, score, + nickname); if (dasfeature.containsKey("LINK")) { - f.addLink(f.getType() + " " + f.begin + "_" + f.end + "|" - + dasfeature.get("LINK")); + // Do not put feature extent in link text for non-positional features + if (f.begin == 0 && f.end == 0) + { + f.addLink(f.getType() + "|" + dasfeature.get("LINK")); + } + else + { + f.addLink(f.getType() + " " + f.begin + "_" + f.end + "|" + + dasfeature.get("LINK")); + } } return f; @@ -659,9 +832,12 @@ public class DasSequenceFeatureFetcher return null; } } + /** - * query the default DAS Source Registry for sources. - * Uses value of jalview property DAS_REGISTRY_URL and the DasSourceBrowser.DEFAULT_REGISTRY if that doesn't exist. + * query the default DAS Source Registry for sources. Uses value of jalview + * property DAS_REGISTRY_URL and the DasSourceBrowser.DEFAULT_REGISTRY if that + * doesn't exist. + * * @return list of sources */ public static DasSource[] getDASSources() @@ -671,10 +847,12 @@ public class DasSequenceFeatureFetcher DasSourceBrowser.DEFAULT_REGISTRY); return getDASSources(registryURL); } + /** * query the given URL for DasSources. + * * @param registryURL - * return sources from registryURL + * return sources from registryURL */ public static DasSource[] getDASSources(String registryURL) { @@ -710,6 +888,8 @@ public class DasSequenceFeatureFetcher .size()]); } catch (Exception ex) { + System.err.println("Failed to contact DAS1 registry at " + + registryURL); ex.printStackTrace(); return null; }