// first filter in case we are retrieving crossrefs that have already been
// retrieved. this happens for cases where a database record doesn't yield
// protein products for CDS
- DBRefEntry[] dbrSourceSet = sourceRefs.toArray(new DBRefEntry[0]);
- for (SequenceI sq : dataset.getSequences())
- {
- boolean dupeFound = false;
- // !fromDna means we are looking only for nucleotide sequences, not
- // protein
- if (sq.isProtein() == fromDna)
- {
- for (DBRefEntry dbr : sq.getPrimaryDBRefs())
- {
- for (DBRefEntry found : DBRefUtils.searchRefs(dbrSourceSet, dbr))
- {
- sourceRefs.remove(found);
- dupeFound = true;
- }
- }
- }
- if (dupeFound)
- {
- dbrSourceSet = sourceRefs.toArray(new DBRefEntry[0]);
- }
- }
+ removeAlreadyRetrievedSeqs(sourceRefs, fromDna);
if (sourceRefs.size() == 0)
{
// no more work to do! We already had all requested sequence records in
: retrievedSequence.getDatasetSequence();
DBRefEntry[] dbr = retrievedSequence.getDBRefs();
if (dbr != null)
+
+ /**
+ * Search dataset for sequences with a primary reference contained in
+ * sourceRefs.
+ *
+ * @param sourceRefs
+ * - list of references to filter.
+ * @param fromDna
+ * - type of sequence to search for matching primary reference.
+ */
+ private void removeAlreadyRetrievedSeqs(List<DBRefEntry> sourceRefs,
+ boolean fromDna)
+ {
+ DBRefEntry[] dbrSourceSet = sourceRefs.toArray(new DBRefEntry[0]);
+ for (SequenceI sq : dataset.getSequences())
+ {
+ boolean dupeFound = false;
+ // !fromDna means we are looking only for nucleotide sequences, not
+ // protein
+ if (sq.isProtein() == fromDna)
+ {
+ for (DBRefEntry dbr : sq.getPrimaryDBRefs())
+ {
+ for (DBRefEntry found : DBRefUtils.searchRefs(dbrSourceSet, dbr))
+ {
+ sourceRefs.remove(found);
+ dupeFound = true;
+ }
+ }
+ }
+ if (dupeFound)
+ {
+ dbrSourceSet = sourceRefs.toArray(new DBRefEntry[0]);
+ }
+ }
+ }
+
{
for (DBRefEntry dbref : dbr)
{