X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fws%2FDasSequenceFeatureFetcher.java;h=345bdd7e37b1939e2497e909ce8a74209e482dea;hb=17e77c3f2949a0729322b4a8d907f3f34b6a9914;hp=30fe7253f1ee69f5490ce771217d0d0ac8d07b15;hpb=1d4bc65204bdb63740d3692f13633c463d5c08fd;p=jalview.git diff --git a/src/jalview/ws/DasSequenceFeatureFetcher.java b/src/jalview/ws/DasSequenceFeatureFetcher.java index 30fe725..345bdd7 100644 --- a/src/jalview/ws/DasSequenceFeatureFetcher.java +++ b/src/jalview/ws/DasSequenceFeatureFetcher.java @@ -1,882 +1,926 @@ -/* - * Jalview - A Sequence Alignment Editor and Viewer (Version 2.7) - * Copyright (C) 2011 J Procter, AM Waterhouse, G Barton, M Clamp, S Searle - * - * This file is part of Jalview. - * - * Jalview is free software: you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. - * - * Jalview is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR - * PURPOSE. See the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License along with Jalview. If not, see . - */ -package jalview.ws; - -import java.net.*; -import java.util.*; - -import javax.swing.*; - -import org.biodas.jdas.client.SourcesClient; -import org.biodas.jdas.schema.sources.SOURCE; -import org.biodas.jdas.schema.sources.SOURCES; -import org.biojava.dasobert.das.*; -import org.biojava.dasobert.das2.*; -import org.biojava.dasobert.das2.io.*; -import org.biojava.dasobert.dasregistry.*; -import org.biojava.dasobert.eventmodel.*; -import jalview.bin.Cache; -import jalview.datamodel.*; -import jalview.gui.*; -import jalview.util.UrlLink; - -/** - * DOCUMENT ME! - * - * @author $author$ - * @version $Revision$ - */ -public class DasSequenceFeatureFetcher -{ - SequenceI[] sequences; - - AlignFrame af; - - FeatureSettings fsettings; - - StringBuffer sbuffer = new StringBuffer(); - - Vector selectedSources; - - boolean cancelled = false; - - private void debug(String mesg) - { - debug(mesg, null); - } - - private void debug(String mesg, Exception e) - { - if (Cache.log != null) - { - Cache.log.debug(mesg, e); - } - else - { - System.err.println(mesg); - if (e != null) - { - e.printStackTrace(); - } - } - } - - long startTime; - - /** - * Creates a new SequenceFeatureFetcher object. Uses default - * - * @param align - * DOCUMENT ME! - * @param ap - * DOCUMENT ME! - */ - public DasSequenceFeatureFetcher(SequenceI[] sequences, - FeatureSettings fsettings, Vector selectedSources) - { - this(sequences, fsettings, selectedSources, true, true); - } - - public DasSequenceFeatureFetcher(SequenceI[] oursequences, - FeatureSettings fsettings, Vector ourselectedSources, - boolean checkDbrefs, boolean promptFetchDbrefs) - { - this.selectedSources = new Vector(); - Enumeration sources = ourselectedSources.elements(); - // filter both sequences and sources to eliminate duplicates - while (sources.hasMoreElements()) - { - Object src = sources.nextElement(); - if (!selectedSources.contains(src)) - { - selectedSources.addElement(src); - } - ; - } - Vector sqs = new Vector(); - for (int i = 0; i < oursequences.length; i++) - { - if (!sqs.contains(oursequences[i])) - { - sqs.addElement(oursequences[i]); - } - } - sequences = new SequenceI[sqs.size()]; - for (int i = 0; i < sequences.length; i++) - { - sequences[i] = (SequenceI) sqs.elementAt(i); - } - if (fsettings != null) - { - this.fsettings = fsettings; - this.af = fsettings.af; - af.setShowSeqFeatures(true); - } - int uniprotCount = 0; - for (int i = 0; i < selectedSources.size(); i++) - { - DasSource source = (DasSource) selectedSources.elementAt(i); - DasCoordinateSystem[] coords = source.getCoordinateSystem(); - for (int c = 0; c < coords.length; c++) - { - // TODO: match UniProt coord system canonically (?) - does - // UniProt==uniprot==UNIPROT ? - if (coords[c].getName().indexOf("UniProt") > -1) - { - uniprotCount++; - break; - } - } - } - - int refCount = 0; - for (int i = 0; i < sequences.length; i++) - { - DBRefEntry[] dbref = sequences[i].getDBRef(); - if (dbref != null) - { - for (int j = 0; j < dbref.length; j++) - { - if (dbref[j].getSource().equals( - jalview.datamodel.DBRefSource.UNIPROT)) - { - refCount++; - break; - } - } - } - } - - if (checkDbrefs && refCount < sequences.length && uniprotCount > 0) - { - - int reply = JOptionPane.YES_OPTION; - if (promptFetchDbrefs) - { - reply = JOptionPane - .showInternalConfirmDialog( - Desktop.desktop, - "Do you want Jalview to find\n" - + "Uniprot Accession ids for given sequence names?", - "Find Uniprot Accession Ids", - JOptionPane.YES_NO_OPTION, - JOptionPane.QUESTION_MESSAGE); - } - - if (reply == JOptionPane.YES_OPTION) - { - Thread thread = new Thread(new FetchDBRefs()); - thread.start(); - } - else - { - startFetching(); - } - } - else - { - startFetching(); - } - - } - - class FetchDBRefs implements Runnable - { - public void run() - { - new DBRefFetcher(sequences, af).fetchDBRefs(true); - startFetching(); - } - } - - /** - * Spawns a number of dasobert Fetcher threads to add features to sequences in - * the dataset - */ - void startFetching() - { - cancelled = false; - startTime = System.currentTimeMillis(); - if (af != null) - { - af.setProgressBar("Fetching DAS Sequence Features", startTime); - } - - if (selectedSources == null || selectedSources.size() == 0) - { - try - { - DasSource[] sources = new jalview.gui.DasSourceBrowser() - .getDASSource(); - - String active = jalview.bin.Cache.getDefault("DAS_ACTIVE_SOURCE", - "uniprot"); - StringTokenizer st = new StringTokenizer(active, "\t"); - selectedSources = new Vector(); - String token; - while (st.hasMoreTokens()) - { - token = st.nextToken(); - for (int i = 0; i < sources.length; i++) - { - if (sources[i].getNickname().equals(token)) - { - selectedSources.addElement(sources[i]); - break; - } - } - } - } catch (Exception ex) - { - debug("Exception whilst setting default feature sources from registry and local preferences.", - ex); - } - } - - if (selectedSources == null || selectedSources.size() == 0) - { - System.out.println("No DAS Sources active"); - cancelled = true; - setGuiNoDassourceActive(); - return; - } - - sourcesRemaining = selectedSources.size(); - // Now sending requests one at a time to each server - for (int sourceIndex = 0; sourceIndex < selectedSources.size() - && !cancelled; sourceIndex++) - { - DasSource dasSource = (DasSource) selectedSources - .elementAt(sourceIndex); - - nextSequence(dasSource, sequences[0]); - } - } - - private void setGuiNoDassourceActive() - { - - if (af != null) - { - af.setProgressBar("No DAS Sources Active", startTime); - } - if (getFeatSettings() != null) - { - fsettings.noDasSourceActive(); - } - } - - /** - * Update our fsettings dialog reference if we didn't have one when we were - * first initialised. - * - * @return fsettings - */ - private FeatureSettings getFeatSettings() - { - if (fsettings == null) - { - if (af != null) - { - fsettings = af.featureSettings; - } - } - return fsettings; - } - - public void cancel() - { - if (af != null) - { - af.setProgressBar("DAS Feature Fetching Cancelled", startTime); - } - cancelled = true; - } - - int sourcesRemaining = 0; - - void responseComplete(DasSource dasSource, SequenceI seq) - { - if (seq != null) - { - for (int seqIndex = 0; seqIndex < sequences.length - 1 && !cancelled; seqIndex++) - { - if (sequences[seqIndex] == seq) - { - nextSequence(dasSource, sequences[++seqIndex]); - return; - } - } - } - - sourcesRemaining--; - - if (sourcesRemaining == 0) - { - System.err.println("Fetching Complete."); - setGuiFetchComplete(); - } - - } - - private void setGuiFetchComplete() - { - - if (!cancelled && af != null) - { - // only update the progress bar if we've completed the fetch normally - af.setProgressBar("DAS Feature Fetching Complete", startTime); - } - - if (af != null && af.featureSettings != null) - { - af.featureSettings.setTableData(); - } - - if (getFeatSettings() != null) - { - fsettings.complete(); - } - } - - void featuresAdded(SequenceI seq) - { - if (af == null) - { - // no gui to update with features. - return; - } - af.getFeatureRenderer().featuresAdded(); - - int start = af.getViewport().getStartSeq(); - int end = af.getViewport().getEndSeq(); - int index; - for (index = start; index < end; index++) - { - if (seq == af.getViewport().getAlignment().getSequenceAt(index) - .getDatasetSequence()) - { - af.alignPanel.paintAlignment(true); - break; - } - } - } - - void nextSequence(DasSource dasSource, SequenceI seq) - { - if (cancelled) - return; - DBRefEntry[] uprefs = jalview.util.DBRefUtils.selectRefs( - seq.getDBRef(), new String[] - { - // jalview.datamodel.DBRefSource.PDB, - jalview.datamodel.DBRefSource.UNIPROT, - // jalview.datamodel.DBRefSource.EMBL - not tested on any EMBL coord - // sys sources - }); - // TODO: minimal list of DAS queries to make by querying with untyped ID if - // distinct from any typed IDs - - boolean dasCoordSysFound = false; - - if (uprefs != null) - { - // do any of these ids match the source's coordinate system ? - for (int j = 0; !dasCoordSysFound && j < uprefs.length; j++) - { - DasCoordinateSystem cs[] = dasSource.getCoordinateSystem(); - - for (int csIndex = 0; csIndex < cs.length && !dasCoordSysFound; csIndex++) - { - if (cs.length > 0 - && jalview.util.DBRefUtils.isDasCoordinateSystem( - cs[csIndex].getName(), uprefs[j])) - { - debug("Launched fetcher for coordinate system " - + cs[0].getName()); - // Will have to pass any mapping information to the fetcher - // - the start/end for the DBRefEntry may not be the same as the - // sequence's start/end - - System.out.println(seq.getName() + " " - + (seq.getDatasetSequence() == null) + " " - + dasSource.getUrl()); - - dasCoordSysFound = true; // break's out of the loop - createFeatureFetcher(seq, dasSource, uprefs[j]); - } - else - System.out.println("IGNORE " + cs[csIndex].getName()); - } - } - } - - if (!dasCoordSysFound) - { - String id = null; - // try and use the name as the sequence id - if (seq.getName().indexOf("|") > -1) - { - id = seq.getName().substring(seq.getName().lastIndexOf("|") + 1); - if (id.trim().length() < 4) - { - // hack - we regard a significant ID as being at least 4 - // non-whitespace characters - id = seq.getName().substring(0, seq.getName().lastIndexOf("|")); - if (id.indexOf("|") > -1) - { - id = id.substring(id.lastIndexOf("|") + 1); - } - } - } - else - { - id = seq.getName(); - } - if (id != null) - { - // Should try to call a general feature fetcher that - // queries many sources with name to discover applicable ID references - createFeatureFetcher(seq, dasSource, id); - } - } - - } - - /** - * fetch and add das features to a sequence using the given source URL and - * compatible DbRef id. new features are mapped using the DbRef mapping to the - * local coordinate system. - * - * @param seq - * @param SourceUrl - * @param dbref - */ - protected void createFeatureFetcher(final SequenceI seq, - final DasSource dasSource, final DBRefEntry dbref) - { - - // //////////// - // / fetch DAS features - final Das1Source source = new Das1Source(); - source.setUrl(dasSource.getUrl()); - source.setNickname(dasSource.getNickname()); - if (dbref == null || dbref.getAccessionId() == null - || dbref.getAccessionId().length() < 1) - { - responseComplete(dasSource, seq); // reduce thread count anyhow - return; - } - debug("new Das Feature Fetcher for " + dbref.getSource() + ":" - + dbref.getAccessionId() + " querying " + dasSource.getUrl()); - FeatureThread fetcher = new FeatureThread(dbref.getAccessionId() - // + ":" + start + "," + end, - , source); - - fetcher.addFeatureListener(new FeatureListener() - { - public void comeBackLater(FeatureEvent e) - { - responseComplete(dasSource, seq); - debug("das source " + e.getSource().getNickname() - + " asked us to come back in " + e.getComeBackLater() - + " secs."); - } - - public void newFeatures(FeatureEvent e) - { - - Das1Source ds = e.getSource(); - - Map[] features = e.getFeatures(); - // add features to sequence - debug("das source " + ds.getUrl() + " returned " + features.length - + " features"); - - if (features.length > 0) - { - for (int i = 0; i < features.length; i++) - { - // standard DAS feature-> jalview sequence feature transformation - SequenceFeature f = newSequenceFeature(features[i], - source.getNickname()); - if (!parseSeqFeature(seq, f, features[i], source)) - { - if (dbref.getMap() != null && f.getBegin() > 0 - && f.getEnd() > 0) - { - debug("mapping from " + f.getBegin() + " - " + f.getEnd()); - SequenceFeature vf[] = null; - - try - { - vf = dbref.getMap().locateFeature(f); - } catch (Exception ex) - { - Cache.log - .info("Error in 'experimental' mapping of features. Please try to reproduce and then report info to jalview-discuss@jalview.org."); - Cache.log.info("Mapping feature from " + f.getBegin() - + " to " + f.getEnd() + " in dbref " - + dbref.getAccessionId() + " in " - + dbref.getSource()); - Cache.log.info("using das Source " + ds.getUrl()); - Cache.log.info("Exception", ex); - } - - if (vf != null) - { - for (int v = 0; v < vf.length; v++) - { - debug("mapping to " + v + ": " + vf[v].getBegin() - + " - " + vf[v].getEnd()); - seq.addSequenceFeature(vf[v]); - } - } - } - else - { - seq.addSequenceFeature(f); - } - } - } - featuresAdded(seq); - } - else - { - // System.out.println("No features found for " + seq.getName() - // + " from: " + e.getDasSource().getNickname()); - } - responseComplete(dasSource, seq); - - } - } - - ); - - fetcher.start(); - } - - protected void createFeatureFetcher(final SequenceI seq, - final DasSource dasSource, String id) - { - // //////////// - // / fetch DAS features - final Das1Source source = new Das1Source(); - source.setUrl(dasSource.getUrl()); - source.setNickname(dasSource.getNickname()); - - if (id != null) - { - id = id.trim(); - } - if (id != null && id.length() > 0) - { - debug("new Das Feature Fetcher for " + id + " querying " - + dasSource.getUrl()); - FeatureThread fetcher = new FeatureThread(id - // + ":" + start + "," + end, - , source); - - fetcher.addFeatureListener(new FeatureListener() - { - public void comeBackLater(FeatureEvent e) - { - responseComplete(dasSource, seq); - debug("das source " + e.getSource().getNickname() - + " asked us to come back in " + e.getComeBackLater() - + " secs."); - } - - public void newFeatures(FeatureEvent e) - { - - Das1Source ds = e.getSource(); - - Map[] features = e.getFeatures(); - // add features to sequence - debug("das source " + ds.getUrl() + " returned " - + features.length + " features"); - - if (features.length > 0) - { - for (int i = 0; i < features.length; i++) - { - // standard DAS feature-> jalview sequence feature transformation - SequenceFeature f = newSequenceFeature(features[i], - source.getNickname()); - if (!parseSeqFeature(seq, f, features[i], source)) - { - // just add as a simple sequence feature - seq.addSequenceFeature(f); - } - } - - featuresAdded(seq); - } - else - { - // System.out.println("No features found for " + seq.getName() - // + " from: " + e.getDasSource().getNickname()); - } - responseComplete(dasSource, seq); - - } - } - - ); - - fetcher.start(); - } - else - { - // invalid fetch - indicate it is finished. - debug("Skipping empty ID for querying " + dasSource.getUrl()); - responseComplete(dasSource, seq); - } - } - - /** - * examine the given sequence feature to determine if it should actually be - * turned into sequence annotation or database cross references rather than a - * simple sequence feature. - * - * @param seq - * the sequence to annotate - * @param f - * the jalview sequence feature generated from the DAS feature - * @param map - * the sequence feature attributes - * @param source - * the source that emitted the feature - * @return true if feature was consumed as another kind of annotation. - */ - protected boolean parseSeqFeature(SequenceI seq, SequenceFeature f, - Map map, Das1Source source) - { - SequenceI mseq = seq; - while (seq.getDatasetSequence() != null) - { - seq = seq.getDatasetSequence(); - } - if (f.getType() != null) - { - String type = f.getType(); - if (type.equalsIgnoreCase("protein_name")) - { - // parse name onto the alignment sequence or the dataset sequence. - if (seq.getDescription() == null - || seq.getDescription().trim().length() == 0) - { - // could look at the note series to pick out the first long name, for - // the moment just use the whole description string - seq.setDescription(f.getDescription()); - } - if (mseq.getDescription() == null - || mseq.getDescription().trim().length() == 0) - { - // could look at the note series to pick out the first long name, for - // the moment just use the whole description string - mseq.setDescription(f.getDescription()); - } - return true; - } - // check if source has biosapiens or other sequence ontology label - if (type.equalsIgnoreCase("DBXREF") || type.equalsIgnoreCase("DBREF")) - { - // try to parse the accession out - - DBRefEntry dbr = new DBRefEntry(); - dbr.setVersion(source.getNickname()); - StringTokenizer st = new StringTokenizer(f.getDescription(), ":"); - if (st.hasMoreTokens()) - { - dbr.setSource(st.nextToken()); - } - if (st.hasMoreTokens()) - { - dbr.setAccessionId(st.nextToken()); - } - seq.addDBRef(dbr); - - if (f.links != null && f.links.size() > 0) - { - // feature is also appended to enable links to be seen. - // TODO: consider extending dbrefs to have their own links ? - // TODO: new feature: extract dbref links from DAS servers and add the - // URL pattern to the list of DB name associated links in the user's - // preferences ? - // for the moment - just fix up the existing feature so it displays - // correctly. - // f.setType(dbr.getSource()); - // f.setDescription(); - f.setValue("linkonly", Boolean.TRUE); - // f.setDescription(""); - Vector newlinks = new Vector(); - Enumeration it = f.links.elements(); - while (it.hasMoreElements()) - { - String elm; - UrlLink urllink = new UrlLink(elm = (String) it.nextElement()); - if (urllink.isValid()) - { - urllink.setLabel(f.getDescription()); - newlinks.addElement(urllink.toString()); - } - else - { - // couldn't parse the link properly. Keep it anyway - just in - // case. - debug("couldn't parse link string - " + elm); - newlinks.addElement(elm); - } - } - f.links = newlinks; - seq.addSequenceFeature(f); - } - return true; - } - } - return false; - } - - /** - * creates a jalview sequence feature from a das feature document - * - * @param dasfeature - * @return sequence feature object created using dasfeature information - */ - SequenceFeature newSequenceFeature(Map dasfeature, String nickname) - { - if (dasfeature == null) - { - return null; - } - try - { - /** - * Different qNames for a DAS Feature - are string keys to the HashMaps in - * features "METHOD") || qName.equals("TYPE") || qName.equals("START") || - * qName.equals("END") || qName.equals("NOTE") || qName.equals("LINK") || - * qName.equals("SCORE") - */ - String desc = new String(); - if (dasfeature.containsKey("NOTE")) - { - desc += (String) dasfeature.get("NOTE"); - } - - int start = 0, end = 0; - float score = 0f; - - try - { - start = Integer.parseInt(dasfeature.get("START").toString()); - } catch (Exception ex) - { - } - try - { - end = Integer.parseInt(dasfeature.get("END").toString()); - } catch (Exception ex) - { - } - try - { - Object scr = dasfeature.get("SCORE"); - if (scr != null) - { - score = (float) Double.parseDouble(scr.toString()); - - } - } catch (Exception ex) - { - } - - SequenceFeature f = new SequenceFeature( - (String) dasfeature.get("TYPE"), desc, start, end, score, - nickname); - - if (dasfeature.containsKey("LINK")) - { - // Do not put feature extent in link text for non-positional features - if (f.begin == 0 && f.end == 0) - { - f.addLink(f.getType() + "|" + dasfeature.get("LINK")); - } - else - { - f.addLink(f.getType() + " " + f.begin + "_" + f.end + "|" - + dasfeature.get("LINK")); - } - } - - return f; - } catch (Exception e) - { - System.out.println("ERRR " + e); - e.printStackTrace(); - System.out.println("############"); - debug("Failed to parse " + dasfeature.toString(), e); - return null; - } - } - - /** - * query the default DAS Source Registry for sources. Uses value of jalview - * property DAS_REGISTRY_URL and the DasSourceBrowser.DEFAULT_REGISTRY if that - * doesn't exist. - * - * @return list of sources - */ - public static SOURCE[] getDASSources() - { - - String registryURL = jalview.bin.Cache.getDefault("DAS_REGISTRY_URL", - DasSourceBrowser.DEFAULT_REGISTRY); - return getDASSources(registryURL); - } - - /** - * query the given URL for DasSources. - * - * @param registryURL - * return sources from registryURL - */ - public static SOURCE[] getDASSources(String registryURL) - { - - try - { - URL url = new URL(registryURL); - org.biodas.jdas.client.SourcesClientInterface client = new SourcesClient(); - - SOURCES sources = client.fetchData(registryURL); - - List dassources = sources.getSOURCE(); - - return dassources.toArray(new SOURCE[dassources.size()]); - } catch (Exception ex) - { - System.err.println("Failed to contact DAS1 registry at " - + registryURL); - ex.printStackTrace(); - return null; - } - } - -} +/* + * Jalview - A Sequence Alignment Editor and Viewer (Version 2.9) + * Copyright (C) 2015 The Jalview Authors + * + * This file is part of Jalview. + * + * Jalview is free software: you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, either version 3 + * of the License, or (at your option) any later version. + * + * Jalview is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Jalview. If not, see . + * The Jalview Authors are detailed in the 'AUTHORS' file. + */ +package jalview.ws; + +import jalview.bin.Cache; +import jalview.datamodel.DBRefEntry; +import jalview.datamodel.SequenceFeature; +import jalview.datamodel.SequenceI; +import jalview.gui.AlignFrame; +import jalview.gui.Desktop; +import jalview.gui.FeatureSettings; +import jalview.util.MessageManager; +import jalview.util.UrlLink; +import jalview.ws.dbsources.das.api.DasSourceRegistryI; +import jalview.ws.dbsources.das.api.jalviewSourceI; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Enumeration; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.StringTokenizer; +import java.util.Vector; + +import javax.swing.JOptionPane; + +import org.biodas.jdas.client.FeaturesClient; +import org.biodas.jdas.client.adapters.features.DasGFFAdapter; +import org.biodas.jdas.client.adapters.features.DasGFFAdapter.GFFAdapter; +import org.biodas.jdas.client.threads.FeaturesClientMultipleSources; +import org.biodas.jdas.schema.features.ERRORSEGMENT; +import org.biodas.jdas.schema.features.FEATURE; +import org.biodas.jdas.schema.features.LINK; +import org.biodas.jdas.schema.features.SEGMENT; +import org.biodas.jdas.schema.features.TYPE; +import org.biodas.jdas.schema.features.UNKNOWNFEATURE; +import org.biodas.jdas.schema.features.UNKNOWNSEGMENT; +import org.biodas.jdas.schema.sources.COORDINATES; + +/** + * DOCUMENT ME! + * + * @author $author$ + * @version $Revision$ + */ +public class DasSequenceFeatureFetcher +{ + SequenceI[] sequences; + + AlignFrame af; + + FeatureSettings fsettings; + + StringBuffer sbuffer = new StringBuffer(); + + List selectedSources; + + boolean cancelled = false; + + private void debug(String mesg) + { + debug(mesg, null); + } + + private void debug(String mesg, Exception e) + { + if (Cache.log != null) + { + Cache.log.debug(mesg, e); + } + else + { + System.err.println(mesg); + if (e != null) + { + e.printStackTrace(); + } + } + } + + long startTime; + + private DasSourceRegistryI sourceRegistry; + + private boolean useJDASMultiThread = true; + + /** + * Creates a new SequenceFeatureFetcher object. Uses default + * + * @param align + * DOCUMENT ME! + * @param ap + * DOCUMENT ME! + */ + public DasSequenceFeatureFetcher(SequenceI[] sequences, + FeatureSettings fsettings, Vector selectedSources) + { + this(sequences, fsettings, selectedSources, true, true, true); + } + + public DasSequenceFeatureFetcher(SequenceI[] oursequences, + FeatureSettings fsettings, List selectedSources2, + boolean checkDbrefs, boolean promptFetchDbrefs) + { + this(oursequences, fsettings, selectedSources2, checkDbrefs, + promptFetchDbrefs, true); + } + + public DasSequenceFeatureFetcher(SequenceI[] oursequences, + FeatureSettings fsettings, List selectedSources2, + boolean checkDbrefs, boolean promptFetchDbrefs, + boolean useJDasMultiThread) + { + this.useJDASMultiThread = useJDasMultiThread; + this.selectedSources = new ArrayList(); + // filter both sequences and sources to eliminate duplicates + for (jalviewSourceI src : selectedSources2) + { + if (!selectedSources.contains(src)) + { + selectedSources.add(src); + } + ; + } + Vector sqs = new Vector(); + for (int i = 0; i < oursequences.length; i++) + { + if (!sqs.contains(oursequences[i])) + { + sqs.addElement(oursequences[i]); + } + } + sequences = new SequenceI[sqs.size()]; + for (int i = 0; i < sequences.length; i++) + { + sequences[i] = (SequenceI) sqs.elementAt(i); + } + if (fsettings != null) + { + this.fsettings = fsettings; + this.af = fsettings.af; + af.setShowSeqFeatures(true); + } + int uniprotCount = 0; + for (jalviewSourceI source : selectedSources) + { + for (COORDINATES coords : source.getVersion().getCOORDINATES()) + { + // TODO: match UniProt coord system canonically (?) - does + // UniProt==uniprot==UNIPROT ? + if (coords.getAuthority().toLowerCase().equals("uniprot")) + { + uniprotCount++; + break; + } + } + } + + int refCount = 0; + for (int i = 0; i < sequences.length; i++) + { + DBRefEntry[] dbref = sequences[i].getDBRef(); + if (dbref != null) + { + for (int j = 0; j < dbref.length; j++) + { + if (dbref[j].getSource().equals( + jalview.datamodel.DBRefSource.UNIPROT)) + { + refCount++; + break; + } + } + } + } + + if (checkDbrefs && refCount < sequences.length && uniprotCount > 0) + { + + int reply = JOptionPane.YES_OPTION; + if (promptFetchDbrefs) + { + reply = JOptionPane + .showInternalConfirmDialog( + Desktop.desktop, + MessageManager + .getString("info.you_want_jalview_to_find_uniprot_accessions"), + MessageManager + .getString("label.find_uniprot_accession_ids"), + JOptionPane.YES_NO_OPTION, + JOptionPane.QUESTION_MESSAGE); + } + + if (reply == JOptionPane.YES_OPTION) + { + Thread thread = new Thread(new FetchDBRefs()); + thread.start(); + } + else + { + _startFetching(); + } + } + else + { + _startFetching(); + } + + } + + private void _startFetching() + { + running = true; + new Thread(new FetchSeqFeatures()).start(); + } + + class FetchSeqFeatures implements Runnable + { + public void run() + { + startFetching(); + setGuiFetchComplete(); + } + } + + class FetchDBRefs implements Runnable + { + public void run() + { + running = true; + new DBRefFetcher(sequences, af).fetchDBRefs(true); + startFetching(); + setGuiFetchComplete(); + } + } + + /** + * Spawns Fetcher threads to add features to sequences in the dataset + */ + void startFetching() + { + running = true; + cancelled = false; + startTime = System.currentTimeMillis(); + if (af != null) + { + af.setProgressBar(MessageManager + .getString("status.fetching_das_sequence_features"), + startTime); + } + if (sourceRegistry == null) + { + sourceRegistry = Cache.getDasSourceRegistry(); + } + if (selectedSources == null || selectedSources.size() == 0) + { + try + { + jalviewSourceI[] sources = sourceRegistry.getSources().toArray( + new jalviewSourceI[0]); + String active = jalview.bin.Cache.getDefault("DAS_ACTIVE_SOURCE", + "uniprot"); + StringTokenizer st = new StringTokenizer(active, "\t"); + selectedSources = new Vector(); + String token; + while (st.hasMoreTokens()) + { + token = st.nextToken(); + for (int i = 0; i < sources.length; i++) + { + if (sources[i].getTitle().equals(token)) + { + selectedSources.add(sources[i]); + break; + } + } + } + } catch (Exception ex) + { + debug("Exception whilst setting default feature sources from registry and local preferences.", + ex); + } + } + + if (selectedSources == null || selectedSources.size() == 0) + { + System.out.println("No DAS Sources active"); + cancelled = true; + setGuiNoDassourceActive(); + return; + } + + sourcesRemaining = selectedSources.size(); + FeaturesClientMultipleSources fc = new FeaturesClientMultipleSources(); + fc.setConnProps(sourceRegistry.getSessionHandler()); + // Now sending requests one at a time to each server + ArrayList srcobj = new ArrayList(); + ArrayList src = new ArrayList(); + List> ids = new ArrayList>(); + List> idobj = new ArrayList>(); + List> sqset = new ArrayList>(); + for (jalviewSourceI _sr : selectedSources) + { + + Map slist = new HashMap(); + List idob = new ArrayList(); + List qset = new ArrayList(); + + for (SequenceI seq : sequences) + { + Object[] idset = nextSequence(_sr, seq); + if (idset != null) + { + List _idob = (List) idset[0]; + List _qset = (List) idset[1]; + if (_idob.size() > 0) + { + // add sequence's ref for each id derived from it + // (space inefficient, but most unambiguous) + // could replace with hash with _qset values as keys. + Iterator dbobj = _idob.iterator(); + for (String q : _qset) + { + SequenceI osq = slist.get(q); + DBRefEntry dr = dbobj.next(); + if (osq != null && osq != seq) + { + // skip - non-canonical query + } + else + { + idob.add(dr); + qset.add(q); + slist.put(q, seq); + } + } + } + } + } + if (idob.size() > 0) + { + srcobj.add(_sr); + src.add(_sr.getSourceURL()); + ids.add(qset); + idobj.add(idob); + sqset.add(slist); + } + } + Map, Exception>> errors = new HashMap, Exception>>(); + Map, DasGFFAdapter>> results = new HashMap, DasGFFAdapter>>(); + if (!useJDASMultiThread) + { + Iterator sources = src.iterator(); + // iterate over each query for each source and do each one individually + for (List idl : ids) + { + String source = sources.next(); + FeaturesClient featuresc = new FeaturesClient(sourceRegistry + .getSessionHandler().getConnectionPropertyProviderFor( + source)); + for (String id : idl) + { + List qid = Arrays.asList(new String[] { id }); + try + { + DasGFFAdapter dga = featuresc.fetchData(source, qid); + Map, DasGFFAdapter> ers = results.get(source); + if (ers == null) + { + results.put(source, + ers = new HashMap, DasGFFAdapter>()); + } + ers.put(qid, dga); + } catch (Exception ex) + { + Map, Exception> ers = errors.get(source); + if (ers == null) + { + errors.put(source, + ers = new HashMap, Exception>()); + } + ers.put(qid, ex); + } + } + } + } + else + { + // pass them all at once + fc.fetchData(src, ids, false, results, errors); + fc.shutDown(); + while (!fc.isTerminated()) + { + try + { + Thread.sleep(200); + } catch (InterruptedException x) + { + + } + } + } + Iterator> idset = ids.iterator(); + Iterator> idobjset = idobj.iterator(); + Iterator> seqset = sqset.iterator(); + for (jalviewSourceI source : srcobj) + { + processResponse(seqset.next(), source, idset.next(), idobjset.next(), + results.get(source.getSourceURL()), + errors.get(source.getSourceURL())); + } + } + + private void processResponse(Map sequencemap, + jalviewSourceI jvsource, List ids, + List idobj, Map, DasGFFAdapter> results, + Map, Exception> errors) + { + Set sequences = new HashSet(); + String source = jvsource.getSourceURL(); + // process features + DasGFFAdapter result = (results == null) ? null : results.get(ids); + Exception error = (errors == null) ? null : errors.get(ids); + if (result == null) + { + debug("das source " + source + " could not be contacted. " + + (error == null ? "" : error.toString())); + } + else + { + + GFFAdapter gff = result.getGFF(); + List segments = gff.getSegments(); + List errorsegs = gff.getErrorSegments(); + List unkfeats = gff.getUnknownFeatures(); + List unksegs = gff.getUnknownSegments(); + debug("das source " + source + " returned " + gff.getTotal() + + " responses. " + (errorsegs != null ? errorsegs.size() : 0) + + " were incorrect segment queries, " + + (unkfeats != null ? unkfeats.size() : 0) + + " were unknown features " + + (unksegs != null ? unksegs.size() : 0) + + " were unknown segments and " + + (segments != null ? segments.size() : 0) + + " were segment responses."); + Iterator dbr = idobj.iterator(); + if (segments != null) + { + for (SEGMENT seg : segments) + { + String id = seg.getId(); + if (ids.indexOf(id) == -1) + { + id = id.toUpperCase(); + } + DBRefEntry dbref = idobj.get(ids.indexOf(id)); + SequenceI sequence = sequencemap.get(id); + boolean added = false; + sequences.add(sequence); + + for (FEATURE feat : seg.getFEATURE()) + { + // standard DAS feature-> jalview sequence feature transformation + SequenceFeature f = newSequenceFeature(feat, + jvsource.getTitle()); + if (!parseSeqFeature(sequence, f, feat, jvsource)) + { + if (dbref.getMap() != null && f.getBegin() > 0 + && f.getEnd() > 0) + { + debug("mapping from " + f.getBegin() + " - " + f.getEnd()); + SequenceFeature vf[] = null; + + try + { + vf = dbref.getMap().locateFeature(f); + } catch (Exception ex) + { + Cache.log + .warn("Error in 'experimental' mapping of features. Please try to reproduce and then report info to jalview-discuss@jalview.org."); + Cache.log.warn("Mapping feature from " + f.getBegin() + + " to " + f.getEnd() + " in dbref " + + dbref.getAccessionId() + " in " + + dbref.getSource()); + Cache.log.warn("using das Source " + source); + Cache.log.warn("Exception", ex); + } + + if (vf != null) + { + for (int v = 0; v < vf.length; v++) + { + debug("mapping to " + v + ": " + vf[v].getBegin() + + " - " + vf[v].getEnd()); + sequence.addSequenceFeature(vf[v]); + } + } + } + else + { + sequence.addSequenceFeature(f); + } + } + } + } + featuresAdded(sequences); + } + else + { + // System.out.println("No features found for " + seq.getName() + // + " from: " + e.getDasSource().getNickname()); + } + } + } + + private void setGuiNoDassourceActive() + { + + if (af != null) + { + af.setProgressBar( + MessageManager.getString("status.no_das_sources_active"), + startTime); + } + if (getFeatSettings() != null) + { + fsettings.noDasSourceActive(); + } + } + + /** + * Update our fsettings dialog reference if we didn't have one when we were + * first initialised. + * + * @return fsettings + */ + private FeatureSettings getFeatSettings() + { + if (fsettings == null) + { + if (af != null) + { + fsettings = af.featureSettings; + } + } + return fsettings; + } + + public void cancel() + { + if (af != null) + { + af.setProgressBar(MessageManager + .getString("status.das_feature_fetching_cancelled"), + startTime); + } + cancelled = true; + } + + int sourcesRemaining = 0; + + private boolean running = false; + + private void setGuiFetchComplete() + { + running = false; + if (!cancelled && af != null) + { + // only update the progress bar if we've completed the fetch normally + af.setProgressBar(MessageManager + .getString("status.das_feature_fetching_complete"), startTime); + } + + if (af != null && af.featureSettings != null) + { + af.featureSettings.discoverAllFeatureData(); + } + + if (getFeatSettings() != null) + { + fsettings.complete(); + } + } + + void featuresAdded(Set seqs) + { + if (af == null) + { + // no gui to update with features. + return; + } + af.getFeatureRenderer().featuresAdded(); + + int start = af.getViewport().getStartSeq(); + int end = af.getViewport().getEndSeq(); + int index; + for (index = start; index < end; index++) + { + for (SequenceI seq : seqs) + { + if (seq == af.getViewport().getAlignment().getSequenceAt(index) + .getDatasetSequence()) + { + af.alignPanel.paintAlignment(true); + index = end; + break; + } + } + } + } + + Object[] nextSequence(jalviewSourceI dasSource, SequenceI seq) + { + if (cancelled) + { + return null; + } + DBRefEntry[] uprefs = jalview.util.DBRefUtils.selectRefs( + seq.getDBRef(), new String[] { + // jalview.datamodel.DBRefSource.PDB, + jalview.datamodel.DBRefSource.UNIPROT, + // jalview.datamodel.DBRefSource.EMBL - not tested on any EMBL coord + // sys sources + }); + // TODO: minimal list of DAS queries to make by querying with untyped ID if + // distinct from any typed IDs + + List ids = new ArrayList(); + List qstring = new ArrayList(); + boolean dasCoordSysFound = false; + + if (uprefs != null) + { + // do any of these ids match the source's coordinate system ? + for (int j = 0; !dasCoordSysFound && j < uprefs.length; j++) + { + + for (COORDINATES csys : dasSource.getVersion().getCOORDINATES()) + { + if (jalview.util.DBRefUtils.isDasCoordinateSystem( + csys.getAuthority(), uprefs[j])) + { + debug("Launched fetcher for coordinate system " + + csys.getAuthority()); + // Will have to pass any mapping information to the fetcher + // - the start/end for the DBRefEntry may not be the same as the + // sequence's start/end + + System.out.println(seq.getName() + " " + + (seq.getDatasetSequence() == null) + " " + + csys.getUri()); + + dasCoordSysFound = true; // break's out of the loop + ids.add(uprefs[j]); + qstring.add(uprefs[j].getAccessionId()); + } + else + { + System.out.println("IGNORE " + csys.getAuthority()); + } + } + } + } + + if (!dasCoordSysFound) + { + String id = null; + // try and use the name as the sequence id + if (seq.getName().indexOf("|") > -1) + { + id = seq.getName().substring(seq.getName().lastIndexOf("|") + 1); + if (id.trim().length() < 4) + { + // hack - we regard a significant ID as being at least 4 + // non-whitespace characters + id = seq.getName().substring(0, seq.getName().lastIndexOf("|")); + if (id.indexOf("|") > -1) + { + id = id.substring(id.lastIndexOf("|") + 1); + } + } + } + else + { + id = seq.getName(); + } + if (id != null) + { + DBRefEntry dbre = new DBRefEntry(); + dbre.setAccessionId(id); + // Should try to call a general feature fetcher that + // queries many sources with name to discover applicable ID references + ids.add(dbre); + qstring.add(dbre.getAccessionId()); + } + } + + return new Object[] { ids, qstring }; + } + + /** + * examine the given sequence feature to determine if it should actually be + * turned into sequence annotation or database cross references rather than a + * simple sequence feature. + * + * @param seq + * the sequence to annotate + * @param f + * the jalview sequence feature generated from the DAS feature + * @param map + * the sequence feature attributes + * @param source + * the source that emitted the feature + * @return true if feature was consumed as another kind of annotation. + */ + protected boolean parseSeqFeature(SequenceI seq, SequenceFeature f, + FEATURE feature, jalviewSourceI source) + { + SequenceI mseq = seq; + while (seq.getDatasetSequence() != null) + { + seq = seq.getDatasetSequence(); + } + if (f.getType() != null) + { + String type = f.getType(); + if (type.equalsIgnoreCase("protein_name")) + { + // parse name onto the alignment sequence or the dataset sequence. + if (seq.getDescription() == null + || seq.getDescription().trim().length() == 0) + { + // could look at the note series to pick out the first long name, for + // the moment just use the whole description string + seq.setDescription(f.getDescription()); + } + if (mseq.getDescription() == null + || mseq.getDescription().trim().length() == 0) + { + // could look at the note series to pick out the first long name, for + // the moment just use the whole description string + mseq.setDescription(f.getDescription()); + } + return true; + } + // check if source has biosapiens or other sequence ontology label + if (type.equalsIgnoreCase("DBXREF") || type.equalsIgnoreCase("DBREF")) + { + // try to parse the accession out + + DBRefEntry dbr = new DBRefEntry(); + dbr.setVersion(source.getTitle()); + StringTokenizer st = new StringTokenizer(f.getDescription(), ":"); + if (st.hasMoreTokens()) + { + dbr.setSource(st.nextToken()); + } + if (st.hasMoreTokens()) + { + dbr.setAccessionId(st.nextToken()); + } + seq.addDBRef(dbr); + + if (f.links != null && f.links.size() > 0) + { + // feature is also appended to enable links to be seen. + // TODO: consider extending dbrefs to have their own links ? + // TODO: new feature: extract dbref links from DAS servers and add the + // URL pattern to the list of DB name associated links in the user's + // preferences ? + // for the moment - just fix up the existing feature so it displays + // correctly. + // f.setType(dbr.getSource()); + // f.setDescription(); + f.setValue("linkonly", Boolean.TRUE); + // f.setDescription(""); + Vector newlinks = new Vector(); + Enumeration it = f.links.elements(); + while (it.hasMoreElements()) + { + String elm; + UrlLink urllink = new UrlLink(elm = (String) it.nextElement()); + if (urllink.isValid()) + { + urllink.setLabel(f.getDescription()); + newlinks.addElement(urllink.toString()); + } + else + { + // couldn't parse the link properly. Keep it anyway - just in + // case. + debug("couldn't parse link string - " + elm); + newlinks.addElement(elm); + } + } + f.links = newlinks; + seq.addSequenceFeature(f); + } + return true; + } + } + return false; + } + + /** + * creates a jalview sequence feature from a das feature document + * + * @param feat + * @return sequence feature object created using dasfeature information + */ + SequenceFeature newSequenceFeature(FEATURE feat, String nickname) + { + if (feat == null) + { + return null; + } + try + { + /** + * Different qNames for a DAS Feature - are string keys to the HashMaps in + * features "METHOD") || qName.equals("TYPE") || qName.equals("START") || + * qName.equals("END") || qName.equals("NOTE") || qName.equals("LINK") || + * qName.equals("SCORE") + */ + String desc = new String(); + if (feat.getNOTE() != null) + { + for (String note : feat.getNOTE()) + { + desc += note; + } + } + + int start = 0, end = 0; + float score = 0f; + + try + { + start = Integer.parseInt(feat.getSTART().toString()); + } catch (Exception ex) + { + } + try + { + end = Integer.parseInt(feat.getEND().toString()); + } catch (Exception ex) + { + } + try + { + Object scr = feat.getSCORE(); + if (scr != null) + { + score = (float) Double.parseDouble(scr.toString()); + + } + } catch (Exception ex) + { + } + + SequenceFeature f = new SequenceFeature( + getTypeString(feat.getTYPE()), desc, start, end, score, + nickname); + + if (feat.getLINK() != null) + { + for (LINK link : feat.getLINK()) + { + // Do not put feature extent in link text for non-positional features + if (f.begin == 0 && f.end == 0) + { + f.addLink(f.getType() + " " + link.getContent() + "|" + + link.getHref()); + } + else + { + f.addLink(f.getType() + " " + f.begin + "_" + f.end + " " + + link.getContent() + "|" + link.getHref()); + } + } + } + + return f; + } catch (Exception e) + { + System.out.println("ERRR " + e); + e.printStackTrace(); + System.out.println("############"); + debug("Failed to parse " + feat.toString(), e); + return null; + } + } + + private String getTypeString(TYPE type) + { + return type.getContent(); + } + + public boolean isRunning() + { + return running; + } + +}