From: jprocter Date: Wed, 29 Feb 2012 16:24:50 +0000 (+0000) Subject: move from dasobert to jdas multithreaded feature fetching - and use jalview das api... X-Git-Tag: Jalview_2_9~532^2~31^2~5 X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=ffb7b25710b6ff418fdba906a8f30c52c3d5d33c;p=jalview.git move from dasobert to jdas multithreaded feature fetching - and use jalview das api where possible (introduces issue JAL-1051) --- diff --git a/src/jalview/ws/DasSequenceFeatureFetcher.java b/src/jalview/ws/DasSequenceFeatureFetcher.java index 30fe725..1220df7 100644 --- a/src/jalview/ws/DasSequenceFeatureFetcher.java +++ b/src/jalview/ws/DasSequenceFeatureFetcher.java @@ -17,23 +17,41 @@ */ package jalview.ws; -import java.net.*; -import java.util.*; - -import javax.swing.*; - -import org.biodas.jdas.client.SourcesClient; -import org.biodas.jdas.schema.sources.SOURCE; -import org.biodas.jdas.schema.sources.SOURCES; -import org.biojava.dasobert.das.*; -import org.biojava.dasobert.das2.*; -import org.biojava.dasobert.das2.io.*; -import org.biojava.dasobert.dasregistry.*; -import org.biojava.dasobert.eventmodel.*; import jalview.bin.Cache; -import jalview.datamodel.*; -import jalview.gui.*; +import jalview.datamodel.DBRefEntry; +import jalview.datamodel.SequenceFeature; +import jalview.datamodel.SequenceI; +import jalview.gui.AlignFrame; +import jalview.gui.Desktop; +import jalview.gui.FeatureSettings; import jalview.util.UrlLink; +import jalview.ws.dbsources.das.api.DasSourceRegistryI; +import jalview.ws.dbsources.das.api.jalviewSourceI; + +import java.util.ArrayList; +import java.util.Enumeration; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.StringTokenizer; +import java.util.Vector; + +import javax.swing.JOptionPane; + +import org.biodas.jdas.client.adapters.features.DasGFFAdapter; +import org.biodas.jdas.client.adapters.features.DasGFFAdapter.GFFAdapter; +import org.biodas.jdas.client.threads.FeaturesClientMultipleSources; +import org.biodas.jdas.schema.features.ERRORSEGMENT; +import org.biodas.jdas.schema.features.FEATURE; +import org.biodas.jdas.schema.features.LINK; +import org.biodas.jdas.schema.features.SEGMENT; +import org.biodas.jdas.schema.features.TYPE; +import org.biodas.jdas.schema.features.UNKNOWNFEATURE; +import org.biodas.jdas.schema.features.UNKNOWNSEGMENT; +import org.biodas.jdas.schema.sources.COORDINATES; /** * DOCUMENT ME! @@ -51,7 +69,7 @@ public class DasSequenceFeatureFetcher StringBuffer sbuffer = new StringBuffer(); - Vector selectedSources; + List selectedSources; boolean cancelled = false; @@ -78,6 +96,8 @@ public class DasSequenceFeatureFetcher long startTime; + private DasSourceRegistryI sourceRegistry; + /** * Creates a new SequenceFeatureFetcher object. Uses default * @@ -93,18 +113,16 @@ public class DasSequenceFeatureFetcher } public DasSequenceFeatureFetcher(SequenceI[] oursequences, - FeatureSettings fsettings, Vector ourselectedSources, + FeatureSettings fsettings, List selectedSources2, boolean checkDbrefs, boolean promptFetchDbrefs) { - this.selectedSources = new Vector(); - Enumeration sources = ourselectedSources.elements(); + this.selectedSources = new ArrayList(); // filter both sequences and sources to eliminate duplicates - while (sources.hasMoreElements()) + for (jalviewSourceI src : selectedSources2) { - Object src = sources.nextElement(); if (!selectedSources.contains(src)) { - selectedSources.addElement(src); + selectedSources.add(src); } ; } @@ -128,15 +146,13 @@ public class DasSequenceFeatureFetcher af.setShowSeqFeatures(true); } int uniprotCount = 0; - for (int i = 0; i < selectedSources.size(); i++) + for (jalviewSourceI source : selectedSources) { - DasSource source = (DasSource) selectedSources.elementAt(i); - DasCoordinateSystem[] coords = source.getCoordinateSystem(); - for (int c = 0; c < coords.length; c++) + for (COORDINATES coords : source.getVersion().getCOORDINATES()) { // TODO: match UniProt coord system canonically (?) - does // UniProt==uniprot==UNIPROT ? - if (coords[c].getName().indexOf("UniProt") > -1) + if (coords.getAuthority().toLowerCase().equals("uniprot")) { uniprotCount++; break; @@ -185,28 +201,39 @@ public class DasSequenceFeatureFetcher } else { - startFetching(); + _startFetching(); } } else { - startFetching(); + _startFetching(); } } - + private void _startFetching() + { + new Thread(new FetchSeqFeatures()).start(); + } + class FetchSeqFeatures implements Runnable + { + public void run() + { + startFetching(); + setGuiFetchComplete(); + } + } class FetchDBRefs implements Runnable { public void run() { new DBRefFetcher(sequences, af).fetchDBRefs(true); startFetching(); + setGuiFetchComplete(); } } /** - * Spawns a number of dasobert Fetcher threads to add features to sequences in - * the dataset + * Spawns Fetcher threads to add features to sequences in the dataset */ void startFetching() { @@ -216,14 +243,16 @@ public class DasSequenceFeatureFetcher { af.setProgressBar("Fetching DAS Sequence Features", startTime); } - + if (sourceRegistry == null) + { + sourceRegistry = Cache.getDasSourceRegistry(); + } if (selectedSources == null || selectedSources.size() == 0) { try { - DasSource[] sources = new jalview.gui.DasSourceBrowser() - .getDASSource(); - + jalviewSourceI[] sources = sourceRegistry.getSources().toArray( + new jalviewSourceI[0]); String active = jalview.bin.Cache.getDefault("DAS_ACTIVE_SOURCE", "uniprot"); StringTokenizer st = new StringTokenizer(active, "\t"); @@ -234,9 +263,9 @@ public class DasSequenceFeatureFetcher token = st.nextToken(); for (int i = 0; i < sources.length; i++) { - if (sources[i].getNickname().equals(token)) + if (sources[i].getTitle().equals(token)) { - selectedSources.addElement(sources[i]); + selectedSources.add(sources[i]); break; } } @@ -257,14 +286,180 @@ public class DasSequenceFeatureFetcher } sourcesRemaining = selectedSources.size(); + FeaturesClientMultipleSources fc = new FeaturesClientMultipleSources(); + fc.setConnProps(sourceRegistry.getSessionHandler()); // Now sending requests one at a time to each server - for (int sourceIndex = 0; sourceIndex < selectedSources.size() - && !cancelled; sourceIndex++) + ArrayList srcobj = new ArrayList(); + ArrayList src = new ArrayList(); + List> ids = new ArrayList>(); + List> idobj = new ArrayList>(); + List> sqset = new ArrayList>(); + for (jalviewSourceI _sr : selectedSources) { - DasSource dasSource = (DasSource) selectedSources - .elementAt(sourceIndex); - nextSequence(dasSource, sequences[0]); + Map slist = new HashMap(); + List idob = new ArrayList(); + List qset = new ArrayList(); + + for (SequenceI seq : sequences) + { + Object[] idset = nextSequence(_sr, seq); + if (idset != null) + { + List _idob = (List) idset[0]; + List _qset = (List) idset[1]; + if (_idob.size() > 0) + { + // add sequence's ref for each id derived from it + // (space inefficient, but most unambiguous) + // could replace with hash with _qset values as keys. + Iterator dbobj = _idob.iterator(); + for (String q : _qset) + { + SequenceI osq = slist.get(q); + DBRefEntry dr = dbobj.next(); + if (osq != null && osq != seq) + { + // skip - non-canonical query + } + else + { + idob.add(dr); + qset.add(q); + slist.put(q, seq); + } + } + } + } + } + if (idob.size() > 0) + { + srcobj.add(_sr); + src.add(_sr.getSourceURL()); + ids.add(qset); + idobj.add(idob); + sqset.add(slist); + } + } + Map, Exception>> errors = new HashMap, Exception>>(); + Map, DasGFFAdapter>> results = new HashMap, DasGFFAdapter>>(); + fc.fetchData(src, ids, false, results, errors); + fc.shutDown(); + while (!fc.isTerminated()) + { + try + { + Thread.sleep(200); + } catch (InterruptedException x) + { + + } + } + Iterator> idset = ids.iterator(); + Iterator> idobjset = idobj.iterator(); + Iterator> seqset = sqset.iterator(); + for (jalviewSourceI source : srcobj) + { + processResponse(seqset.next(), source, idset.next(), idobjset.next(), + results.get(source.getSourceURL()), + errors.get(source.getSourceURL())); + } + } + + private void processResponse(Map sequencemap, + jalviewSourceI jvsource, List ids, + List idobj, Map, DasGFFAdapter> results, + Map, Exception> errors) + { + Set sequences = new HashSet(); + String source = jvsource.getSourceURL(); + // process features + DasGFFAdapter result = (results == null) ? null : results.get(ids); + Exception error = (errors == null) ? null : errors.get(ids); + if (result == null) + { + debug("das source " + source + " could not be contacted. " + + (error == null ? "" : error.toString())); + } + else + { + + GFFAdapter gff = result.getGFF(); + List segments = gff.getSegments(); + List errorsegs = gff.getErrorSegments(); + List unkfeats = gff.getUnknownFeatures(); + List unksegs = gff.getUnknownSegments(); + debug("das source " + source + " returned " + gff.getTotal() + + " responses. " + (errorsegs != null ? errorsegs.size() : 0) + + " were incorrect segment queries, " + + (unkfeats != null ? unkfeats.size() : 0) + + " were unknown features " + + (unksegs != null ? unksegs.size() : 0) + + " were unknown segments and " + + (segments != null ? segments.size() : 0) + + " were segment responses."); + Iterator dbr = idobj.iterator(); + if (segments != null) + { + for (SEGMENT seg : segments) + { + String id = seg.getId(); + DBRefEntry dbref = idobj.get(ids.indexOf(id)); + SequenceI sequence = sequencemap.get(id); + boolean added = false; + sequences.add(sequence); + + for (FEATURE feat : seg.getFEATURE()) + { + // standard DAS feature-> jalview sequence feature transformation + SequenceFeature f = newSequenceFeature(feat, jvsource.getTitle()); + if (!parseSeqFeature(sequence, f, feat, jvsource)) + { + if (dbref.getMap() != null && f.getBegin() > 0 + && f.getEnd() > 0) + { + debug("mapping from " + f.getBegin() + " - " + f.getEnd()); + SequenceFeature vf[] = null; + + try + { + vf = dbref.getMap().locateFeature(f); + } catch (Exception ex) + { + Cache.log + .info("Error in 'experimental' mapping of features. Please try to reproduce and then report info to jalview-discuss@jalview.org."); + Cache.log.info("Mapping feature from " + f.getBegin() + + " to " + f.getEnd() + " in dbref " + + dbref.getAccessionId() + " in " + + dbref.getSource()); + Cache.log.info("using das Source " + source); + Cache.log.info("Exception", ex); + } + + if (vf != null) + { + for (int v = 0; v < vf.length; v++) + { + debug("mapping to " + v + ": " + vf[v].getBegin() + + " - " + vf[v].getEnd()); + sequence.addSequenceFeature(vf[v]); + } + } + } + else + { + sequence.addSequenceFeature(f); + } + } + } + } + featuresAdded(sequences); + } + else + { + // System.out.println("No features found for " + seq.getName() + // + " from: " + e.getDasSource().getNickname()); + } } } @@ -310,30 +505,6 @@ public class DasSequenceFeatureFetcher int sourcesRemaining = 0; - void responseComplete(DasSource dasSource, SequenceI seq) - { - if (seq != null) - { - for (int seqIndex = 0; seqIndex < sequences.length - 1 && !cancelled; seqIndex++) - { - if (sequences[seqIndex] == seq) - { - nextSequence(dasSource, sequences[++seqIndex]); - return; - } - } - } - - sourcesRemaining--; - - if (sourcesRemaining == 0) - { - System.err.println("Fetching Complete."); - setGuiFetchComplete(); - } - - } - private void setGuiFetchComplete() { @@ -354,7 +525,7 @@ public class DasSequenceFeatureFetcher } } - void featuresAdded(SequenceI seq) + void featuresAdded(Set seqs) { if (af == null) { @@ -368,19 +539,23 @@ public class DasSequenceFeatureFetcher int index; for (index = start; index < end; index++) { - if (seq == af.getViewport().getAlignment().getSequenceAt(index) - .getDatasetSequence()) + for (SequenceI seq : seqs) { - af.alignPanel.paintAlignment(true); - break; + if (seq == af.getViewport().getAlignment().getSequenceAt(index) + .getDatasetSequence()) + { + af.alignPanel.paintAlignment(true); + index = end; + break; + } } } } - void nextSequence(DasSource dasSource, SequenceI seq) + Object[] nextSequence(jalviewSourceI dasSource, SequenceI seq) { if (cancelled) - return; + return null; DBRefEntry[] uprefs = jalview.util.DBRefUtils.selectRefs( seq.getDBRef(), new String[] { @@ -392,6 +567,8 @@ public class DasSequenceFeatureFetcher // TODO: minimal list of DAS queries to make by querying with untyped ID if // distinct from any typed IDs + List ids = new ArrayList(); + List qstring = new ArrayList(); boolean dasCoordSysFound = false; if (uprefs != null) @@ -399,29 +576,28 @@ public class DasSequenceFeatureFetcher // do any of these ids match the source's coordinate system ? for (int j = 0; !dasCoordSysFound && j < uprefs.length; j++) { - DasCoordinateSystem cs[] = dasSource.getCoordinateSystem(); - for (int csIndex = 0; csIndex < cs.length && !dasCoordSysFound; csIndex++) + for (COORDINATES csys : dasSource.getVersion().getCOORDINATES()) { - if (cs.length > 0 - && jalview.util.DBRefUtils.isDasCoordinateSystem( - cs[csIndex].getName(), uprefs[j])) + if (jalview.util.DBRefUtils.isDasCoordinateSystem( + csys.getAuthority(), uprefs[j])) { debug("Launched fetcher for coordinate system " - + cs[0].getName()); + + csys.getAuthority()); // Will have to pass any mapping information to the fetcher // - the start/end for the DBRefEntry may not be the same as the // sequence's start/end System.out.println(seq.getName() + " " + (seq.getDatasetSequence() == null) + " " - + dasSource.getUrl()); + + csys.getUri()); dasCoordSysFound = true; // break's out of the loop - createFeatureFetcher(seq, dasSource, uprefs[j]); + ids.add(uprefs[j]); + qstring.add(uprefs[j].getAccessionId()); } else - System.out.println("IGNORE " + cs[csIndex].getName()); + System.out.println("IGNORE " + csys.getAuthority()); } } } @@ -450,204 +626,17 @@ public class DasSequenceFeatureFetcher } if (id != null) { + DBRefEntry dbre = new DBRefEntry(); + dbre.setAccessionId(id); // Should try to call a general feature fetcher that // queries many sources with name to discover applicable ID references - createFeatureFetcher(seq, dasSource, id); + ids.add(dbre); + qstring.add(dbre.getAccessionId()); } } - } - - /** - * fetch and add das features to a sequence using the given source URL and - * compatible DbRef id. new features are mapped using the DbRef mapping to the - * local coordinate system. - * - * @param seq - * @param SourceUrl - * @param dbref - */ - protected void createFeatureFetcher(final SequenceI seq, - final DasSource dasSource, final DBRefEntry dbref) - { - - // //////////// - // / fetch DAS features - final Das1Source source = new Das1Source(); - source.setUrl(dasSource.getUrl()); - source.setNickname(dasSource.getNickname()); - if (dbref == null || dbref.getAccessionId() == null - || dbref.getAccessionId().length() < 1) - { - responseComplete(dasSource, seq); // reduce thread count anyhow - return; - } - debug("new Das Feature Fetcher for " + dbref.getSource() + ":" - + dbref.getAccessionId() + " querying " + dasSource.getUrl()); - FeatureThread fetcher = new FeatureThread(dbref.getAccessionId() - // + ":" + start + "," + end, - , source); - - fetcher.addFeatureListener(new FeatureListener() - { - public void comeBackLater(FeatureEvent e) - { - responseComplete(dasSource, seq); - debug("das source " + e.getSource().getNickname() - + " asked us to come back in " + e.getComeBackLater() - + " secs."); - } - - public void newFeatures(FeatureEvent e) - { - - Das1Source ds = e.getSource(); - - Map[] features = e.getFeatures(); - // add features to sequence - debug("das source " + ds.getUrl() + " returned " + features.length - + " features"); - - if (features.length > 0) - { - for (int i = 0; i < features.length; i++) - { - // standard DAS feature-> jalview sequence feature transformation - SequenceFeature f = newSequenceFeature(features[i], - source.getNickname()); - if (!parseSeqFeature(seq, f, features[i], source)) - { - if (dbref.getMap() != null && f.getBegin() > 0 - && f.getEnd() > 0) - { - debug("mapping from " + f.getBegin() + " - " + f.getEnd()); - SequenceFeature vf[] = null; - - try - { - vf = dbref.getMap().locateFeature(f); - } catch (Exception ex) - { - Cache.log - .info("Error in 'experimental' mapping of features. Please try to reproduce and then report info to jalview-discuss@jalview.org."); - Cache.log.info("Mapping feature from " + f.getBegin() - + " to " + f.getEnd() + " in dbref " - + dbref.getAccessionId() + " in " - + dbref.getSource()); - Cache.log.info("using das Source " + ds.getUrl()); - Cache.log.info("Exception", ex); - } - - if (vf != null) - { - for (int v = 0; v < vf.length; v++) - { - debug("mapping to " + v + ": " + vf[v].getBegin() - + " - " + vf[v].getEnd()); - seq.addSequenceFeature(vf[v]); - } - } - } - else - { - seq.addSequenceFeature(f); - } - } - } - featuresAdded(seq); - } - else - { - // System.out.println("No features found for " + seq.getName() - // + " from: " + e.getDasSource().getNickname()); - } - responseComplete(dasSource, seq); - - } - } - - ); - - fetcher.start(); - } - - protected void createFeatureFetcher(final SequenceI seq, - final DasSource dasSource, String id) - { - // //////////// - // / fetch DAS features - final Das1Source source = new Das1Source(); - source.setUrl(dasSource.getUrl()); - source.setNickname(dasSource.getNickname()); - - if (id != null) - { - id = id.trim(); - } - if (id != null && id.length() > 0) - { - debug("new Das Feature Fetcher for " + id + " querying " - + dasSource.getUrl()); - FeatureThread fetcher = new FeatureThread(id - // + ":" + start + "," + end, - , source); - - fetcher.addFeatureListener(new FeatureListener() - { - public void comeBackLater(FeatureEvent e) - { - responseComplete(dasSource, seq); - debug("das source " + e.getSource().getNickname() - + " asked us to come back in " + e.getComeBackLater() - + " secs."); - } - - public void newFeatures(FeatureEvent e) - { - - Das1Source ds = e.getSource(); - - Map[] features = e.getFeatures(); - // add features to sequence - debug("das source " + ds.getUrl() + " returned " - + features.length + " features"); - - if (features.length > 0) - { - for (int i = 0; i < features.length; i++) - { - // standard DAS feature-> jalview sequence feature transformation - SequenceFeature f = newSequenceFeature(features[i], - source.getNickname()); - if (!parseSeqFeature(seq, f, features[i], source)) - { - // just add as a simple sequence feature - seq.addSequenceFeature(f); - } - } - - featuresAdded(seq); - } - else - { - // System.out.println("No features found for " + seq.getName() - // + " from: " + e.getDasSource().getNickname()); - } - responseComplete(dasSource, seq); - - } - } - - ); - - fetcher.start(); - } - else - { - // invalid fetch - indicate it is finished. - debug("Skipping empty ID for querying " + dasSource.getUrl()); - responseComplete(dasSource, seq); - } + return new Object[] + { ids, qstring }; } /** @@ -666,7 +655,7 @@ public class DasSequenceFeatureFetcher * @return true if feature was consumed as another kind of annotation. */ protected boolean parseSeqFeature(SequenceI seq, SequenceFeature f, - Map map, Das1Source source) + FEATURE feature, jalviewSourceI source) { SequenceI mseq = seq; while (seq.getDatasetSequence() != null) @@ -701,7 +690,7 @@ public class DasSequenceFeatureFetcher // try to parse the accession out DBRefEntry dbr = new DBRefEntry(); - dbr.setVersion(source.getNickname()); + dbr.setVersion(source.getTitle()); StringTokenizer st = new StringTokenizer(f.getDescription(), ":"); if (st.hasMoreTokens()) { @@ -757,12 +746,12 @@ public class DasSequenceFeatureFetcher /** * creates a jalview sequence feature from a das feature document * - * @param dasfeature + * @param feat * @return sequence feature object created using dasfeature information */ - SequenceFeature newSequenceFeature(Map dasfeature, String nickname) + SequenceFeature newSequenceFeature(FEATURE feat, String nickname) { - if (dasfeature == null) + if (feat == null) { return null; } @@ -775,9 +764,12 @@ public class DasSequenceFeatureFetcher * qName.equals("SCORE") */ String desc = new String(); - if (dasfeature.containsKey("NOTE")) + if (feat.getNOTE() != null) { - desc += (String) dasfeature.get("NOTE"); + for (String note : feat.getNOTE()) + { + desc += (String) note; + } } int start = 0, end = 0; @@ -785,19 +777,19 @@ public class DasSequenceFeatureFetcher try { - start = Integer.parseInt(dasfeature.get("START").toString()); + start = Integer.parseInt(feat.getSTART().toString()); } catch (Exception ex) { } try { - end = Integer.parseInt(dasfeature.get("END").toString()); + end = Integer.parseInt(feat.getEND().toString()); } catch (Exception ex) { } try { - Object scr = dasfeature.get("SCORE"); + Object scr = feat.getSCORE(); if (scr != null) { score = (float) Double.parseDouble(scr.toString()); @@ -808,20 +800,24 @@ public class DasSequenceFeatureFetcher } SequenceFeature f = new SequenceFeature( - (String) dasfeature.get("TYPE"), desc, start, end, score, + getTypeString(feat.getTYPE()), desc, start, end, score, nickname); - if (dasfeature.containsKey("LINK")) + if (feat.getLINK() != null) { - // Do not put feature extent in link text for non-positional features - if (f.begin == 0 && f.end == 0) - { - f.addLink(f.getType() + "|" + dasfeature.get("LINK")); - } - else + for (LINK link : feat.getLINK()) { - f.addLink(f.getType() + " " + f.begin + "_" + f.end + "|" - + dasfeature.get("LINK")); + // Do not put feature extent in link text for non-positional features + if (f.begin == 0 && f.end == 0) + { + f.addLink(f.getType() + " " + link.getContent() + "|" + + link.getHref()); + } + else + { + f.addLink(f.getType() + " " + f.begin + "_" + f.end + " " + + link.getContent() + "|" + link.getHref()); + } } } @@ -831,52 +827,14 @@ public class DasSequenceFeatureFetcher System.out.println("ERRR " + e); e.printStackTrace(); System.out.println("############"); - debug("Failed to parse " + dasfeature.toString(), e); + debug("Failed to parse " + feat.toString(), e); return null; } } - /** - * query the default DAS Source Registry for sources. Uses value of jalview - * property DAS_REGISTRY_URL and the DasSourceBrowser.DEFAULT_REGISTRY if that - * doesn't exist. - * - * @return list of sources - */ - public static SOURCE[] getDASSources() - { - - String registryURL = jalview.bin.Cache.getDefault("DAS_REGISTRY_URL", - DasSourceBrowser.DEFAULT_REGISTRY); - return getDASSources(registryURL); - } - - /** - * query the given URL for DasSources. - * - * @param registryURL - * return sources from registryURL - */ - public static SOURCE[] getDASSources(String registryURL) + private String getTypeString(TYPE type) { - - try - { - URL url = new URL(registryURL); - org.biodas.jdas.client.SourcesClientInterface client = new SourcesClient(); - - SOURCES sources = client.fetchData(registryURL); - - List dassources = sources.getSOURCE(); - - return dassources.toArray(new SOURCE[dassources.size()]); - } catch (Exception ex) - { - System.err.println("Failed to contact DAS1 registry at " - + registryURL); - ex.printStackTrace(); - return null; - } + return type.getContent(); } }