From d015dda1e582c4654acd7fb7761fb0c0a2dc3573 Mon Sep 17 00:00:00 2001 From: gmungoc Date: Thu, 21 Jan 2016 14:34:54 +0000 Subject: [PATCH] JAL-1705 get xrefs only for dna/peptide; code tidy --- src/jalview/analysis/CrossRef.java | 24 ++-- src/jalview/ws/seqfetcher/ASequenceFetcher.java | 147 +++++++++++++---------- 2 files changed, 95 insertions(+), 76 deletions(-) diff --git a/src/jalview/analysis/CrossRef.java b/src/jalview/analysis/CrossRef.java index 95d2396..e96d9d7 100644 --- a/src/jalview/analysis/CrossRef.java +++ b/src/jalview/analysis/CrossRef.java @@ -244,29 +244,30 @@ public class CrossRef } for (int r = 0; xrfs != null && r < xrfs.length; r++) { - if (source != null && !source.equals(xrfs[r].getSource())) + DBRefEntry xref = xrfs[r]; + if (source != null && !source.equals(xref.getSource())) { continue; } - if (xrfs[r].hasMap()) + if (xref.hasMap()) { - if (xrfs[r].getMap().getTo() != null) + if (xref.getMap().getTo() != null) { - SequenceI rsq = new Sequence(xrfs[r].getMap().getTo()); + SequenceI rsq = new Sequence(xref.getMap().getTo()); rseqs.add(rsq); - if (xrfs[r].getMap().getMap().getFromRatio() != xrfs[r] + if (xref.getMap().getMap().getFromRatio() != xref .getMap().getMap().getToRatio()) { // get sense of map correct for adding to product alignment. if (dna) { // map is from dna seq to a protein product - cf.addMap(dss, rsq, xrfs[r].getMap().getMap()); + cf.addMap(dss, rsq, xref.getMap().getMap()); } else { // map should be from protein seq to its coding dna - cf.addMap(rsq, dss, xrfs[r].getMap().getMap().getInverse()); + cf.addMap(rsq, dss, xref.getMap().getMap().getInverse()); } } found = true; @@ -278,7 +279,7 @@ public class CrossRef // xrefs on this sequence. if (dataset != null) { - found |= searchDataset(dss, xrfs[r], dataset, rseqs, cf); // ,false,!dna); + found |= searchDataset(dss, xref, dataset, rseqs, cf); // ,false,!dna); if (found) { xrfs[r] = null; // we've recovered seqs for this one. @@ -326,9 +327,8 @@ public class CrossRef xrfs = t; try { - retrieved = sftch.getSequences(xrfs); // problem here is we don't - // know which of xrfs - // resulted in which + retrieved = sftch.getSequences(xrfs, !dna); + // problem here is we don't know which of xrfs resulted in which // retrieved element } catch (Exception e) { @@ -401,7 +401,7 @@ public class CrossRef SequenceI[] rsqs = new SequenceI[rseqs.size()]; rseqs.toArray(rsqs); ral = new Alignment(rsqs); - if (cf != null && cf.getProtMappings() != null) + if (cf != null && !cf.isEmpty()) { ral.addCodonFrame(cf); } diff --git a/src/jalview/ws/seqfetcher/ASequenceFetcher.java b/src/jalview/ws/seqfetcher/ASequenceFetcher.java index 1e3ae7a..9e438d3 100644 --- a/src/jalview/ws/seqfetcher/ASequenceFetcher.java +++ b/src/jalview/ws/seqfetcher/ASequenceFetcher.java @@ -26,13 +26,13 @@ import jalview.datamodel.DBRefEntry; import jalview.datamodel.SequenceI; import jalview.util.DBRefUtils; import jalview.util.MessageManager; -import jalview.util.QuickSort; import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; import java.util.Enumeration; import java.util.HashSet; import java.util.Hashtable; -import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Stack; @@ -41,46 +41,72 @@ import java.util.Vector; public class ASequenceFetcher { - /** + /* * set of databases we can retrieve entries from */ - protected Hashtable> FETCHABLEDBS; + protected Hashtable> fetchableDbs; + + /* + * comparator to sort by tier (0/1/2) and name + */ + private Comparator proxyComparator; + /** + * Constructor + */ public ASequenceFetcher() { super(); + + /* + * comparator to sort proxies by tier and name + */ + proxyComparator = new Comparator() + { + @Override + public int compare(DbSourceProxy o1, DbSourceProxy o2) + { + /* + * Tier 0 precedes 1 precedes 2 + */ + int compared = Integer.compare(o1.getTier(), o2.getTier()); + if (compared == 0) + { + // defend against NullPointer - should never happen + String o1Name = o1.getDbName(); + String o2Name = o2.getDbName(); + if (o1Name != null && o2Name != null) + { + compared = o1Name.compareToIgnoreCase(o2Name); + } + } + return compared; + } + }; } /** - * get list of supported Databases + * get array of supported Databases * * @return database source string for each database - only the latest version * of a source db is bound to each source. */ public String[] getSupportedDb() { - if (FETCHABLEDBS == null) + if (fetchableDbs == null) { return null; } - String[] sf = new String[FETCHABLEDBS.size()]; - Enumeration e = FETCHABLEDBS.keys(); - int i = 0; - while (e.hasMoreElements()) - { - sf[i++] = e.nextElement(); - } - ; + String[] sf = fetchableDbs.keySet().toArray( + new String[fetchableDbs.size()]); return sf; } public boolean isFetchable(String source) { - Enumeration e = FETCHABLEDBS.keys(); - while (e.hasMoreElements()) + for (String db : fetchableDbs.keySet()) { - String db = e.nextElement(); - if (source.compareToIgnoreCase(db) == 0) + if (source.equalsIgnoreCase(db)) { return true; } @@ -90,9 +116,16 @@ public class ASequenceFetcher return false; } - public SequenceI[] getSequences(DBRefEntry[] refs) + /** + * Fetch sequences for the given cross-references + * + * @param refs + * @param dna + * if true, only fetch from nucleotide data sources, else peptide + * @return + */ + public SequenceI[] getSequences(DBRefEntry[] refs, boolean dna) { - SequenceI[] ret = null; Vector rseqs = new Vector(); Hashtable> queries = new Hashtable>(); for (int r = 0; r < refs.length; r++) @@ -120,17 +153,20 @@ public class ASequenceFetcher "Don't know how to fetch from this database :" + db)); continue; } - Iterator fetchers = getSourceProxy(db).iterator(); + Stack queriesLeft = new Stack(); - // List queriesFailed = new ArrayList(); queriesLeft.addAll(query); - while (fetchers.hasNext()) + + for (DbSourceProxy fetcher : getSourceProxy(db)) { List queriesMade = new ArrayList(); HashSet queriesFound = new HashSet(); try { - DbSourceProxy fetcher = fetchers.next(); + if (fetcher.isDnaCoding() != dna) + { + continue; // wrong sort of data + } boolean doMultiple = fetcher.getAccessionSeparator() != null; // No separator - no Multiple Queries while (!queriesLeft.isEmpty()) @@ -224,19 +260,19 @@ public class ASequenceFetcher } } } + + SequenceI[] result = null; if (rseqs.size() > 0) { - ret = new SequenceI[rseqs.size()]; - Enumeration sqs = rseqs.elements(); + result = new SequenceI[rseqs.size()]; int si = 0; - while (sqs.hasMoreElements()) + for (SequenceI s : rseqs) { - SequenceI s = sqs.nextElement(); - ret[si++] = s; + result[si++] = s; s.updatePDBIds(); } } - return ret; + return result; } public void reportStdError(String db, List queriesMade, @@ -260,44 +296,27 @@ public class ASequenceFetcher } /** - * Retrieve an instance of the proxy for the given source + * Returns a list of proxies for the given source * * @param db * database source string TODO: add version string/wildcard for * retrieval of specific DB source/version combinations. - * @return an instance of DbSourceProxy for that db. + * @return a list of DbSourceProxy for the db */ public List getSourceProxy(String db) { - List dbs; - Map dblist = FETCHABLEDBS.get(db); + db = DBRefUtils.getCanonicalName(db); + Map dblist = fetchableDbs.get(db); if (dblist == null) { return new ArrayList(); } - ; - if (dblist.size() > 1) - { - DbSourceProxy[] l = dblist.values().toArray(new DbSourceProxy[0]); - int i = 0; - String[] nm = new String[l.length]; - // make sure standard dbs appear first, followed by reference das sources, - // followed by anything else. - for (DbSourceProxy s : l) - { - nm[i++] = "" + s.getTier() + s.getDbName().toLowerCase(); - } - QuickSort.sort(nm, l); - dbs = new ArrayList(); - for (i = l.length - 1; i >= 0; i--) - { - dbs.add(l[i]); - } - } - else - { - dbs = new ArrayList(dblist.values()); - } + + /* + * sort so that primary sources precede secondary + */ + List dbs = new ArrayList(dblist.values()); + Collections.sort(dbs, proxyComparator); return dbs; } @@ -341,15 +360,15 @@ public class ASequenceFetcher { if (proxy != null) { - if (FETCHABLEDBS == null) + if (fetchableDbs == null) { - FETCHABLEDBS = new Hashtable>(); + fetchableDbs = new Hashtable>(); } - Map slist = FETCHABLEDBS.get(proxy + Map slist = fetchableDbs.get(proxy .getDbSource()); if (slist == null) { - FETCHABLEDBS.put(proxy.getDbSource(), + fetchableDbs.put(proxy.getDbSource(), slist = new Hashtable()); } slist.put(proxy.getDbName(), proxy); @@ -372,17 +391,17 @@ public class ASequenceFetcher "error.implementation_error_dbinstance_must_implement_interface", new String[] { class1.toString() })); } - if (FETCHABLEDBS == null) + if (fetchableDbs == null) { return null; } String[] sources = null; Vector src = new Vector(); - Enumeration dbs = FETCHABLEDBS.keys(); + Enumeration dbs = fetchableDbs.keys(); while (dbs.hasMoreElements()) { String dbn = dbs.nextElement(); - for (DbSourceProxy dbp : FETCHABLEDBS.get(dbn).values()) + for (DbSourceProxy dbp : fetchableDbs.get(dbn).values()) { if (class1.isAssignableFrom(dbp.getClass())) { -- 1.7.10.2