From a7591fdadf3edeb556d1417967787a5bfdb1da32 Mon Sep 17 00:00:00 2001 From: Jim Procter Date: Sat, 1 Oct 2016 13:00:34 +0100 Subject: [PATCH] JAL-2210 refactor code to remove from a list of dbrefs those which match primary dbrefs for sequences in the dataset --- src/jalview/analysis/CrossRef.java | 60 +++++++++++++++++++++++------------- 1 file changed, 38 insertions(+), 22 deletions(-) diff --git a/src/jalview/analysis/CrossRef.java b/src/jalview/analysis/CrossRef.java index 4a7a41b..05814c2 100644 --- a/src/jalview/analysis/CrossRef.java +++ b/src/jalview/analysis/CrossRef.java @@ -401,28 +401,7 @@ public class CrossRef // first filter in case we are retrieving crossrefs that have already been // retrieved. this happens for cases where a database record doesn't yield // protein products for CDS - DBRefEntry[] dbrSourceSet = sourceRefs.toArray(new DBRefEntry[0]); - for (SequenceI sq : dataset.getSequences()) - { - boolean dupeFound = false; - // !fromDna means we are looking only for nucleotide sequences, not - // protein - if (sq.isProtein() == fromDna) - { - for (DBRefEntry dbr : sq.getPrimaryDBRefs()) - { - for (DBRefEntry found : DBRefUtils.searchRefs(dbrSourceSet, dbr)) - { - sourceRefs.remove(found); - dupeFound = true; - } - } - } - if (dupeFound) - { - dbrSourceSet = sourceRefs.toArray(new DBRefEntry[0]); - } - } + removeAlreadyRetrievedSeqs(sourceRefs, fromDna); if (sourceRefs.size() == 0) { // no more work to do! We already had all requested sequence records in @@ -451,6 +430,43 @@ public class CrossRef : retrievedSequence.getDatasetSequence(); DBRefEntry[] dbr = retrievedSequence.getDBRefs(); if (dbr != null) + + /** + * Search dataset for sequences with a primary reference contained in + * sourceRefs. + * + * @param sourceRefs + * - list of references to filter. + * @param fromDna + * - type of sequence to search for matching primary reference. + */ + private void removeAlreadyRetrievedSeqs(List sourceRefs, + boolean fromDna) + { + DBRefEntry[] dbrSourceSet = sourceRefs.toArray(new DBRefEntry[0]); + for (SequenceI sq : dataset.getSequences()) + { + boolean dupeFound = false; + // !fromDna means we are looking only for nucleotide sequences, not + // protein + if (sq.isProtein() == fromDna) + { + for (DBRefEntry dbr : sq.getPrimaryDBRefs()) + { + for (DBRefEntry found : DBRefUtils.searchRefs(dbrSourceSet, dbr)) + { + sourceRefs.remove(found); + dupeFound = true; + } + } + } + if (dupeFound) + { + dbrSourceSet = sourceRefs.toArray(new DBRefEntry[0]); + } + } + } + { for (DBRefEntry dbref : dbr) { -- 1.7.10.2