+ SequenceI retrievedDss = retrievedSequence
+ .getDatasetSequence() == null ? retrievedSequence
+ : retrievedSequence.getDatasetSequence();
+ addedXref |= importCrossRefSeq(cf, newDsSeqs, doNotAdd, dss,
+ retrievedDss);
+ }
+ // JBPNote: What assumptions are made for dbref structures on
+ // retrieved sequences ?
+ // addedXref will be true means importCrossRefSeq found
+ // sequences with dbrefs with mappings to sequences congruent with dss
+
+ if (!addedXref)
+ {
+ // try again, after looking for matching IDs
+ // shouldn't need to do this unless the dbref mechanism has broken.
+ updateDbrefMappings(seq, xrfs, retrieved, cf, fromDna);
+ for (SequenceI retrievedSequence : retrieved)
+ {
+ // dataset gets contaminated ccwith non-ds sequences. why ??!
+ // try: Ensembl -> Nuc->Ensembl, Nuc->Uniprot-->Protein->EMBL->
+ SequenceI retrievedDss = retrievedSequence
+ .getDatasetSequence() == null ? retrievedSequence
+ : retrievedSequence.getDatasetSequence();
+ addedXref |= importCrossRefSeq(cf, newDsSeqs, doNotAdd, dss,
+ retrievedDss);
+ }
+ }
+ for (SequenceI newToSeq : newDsSeqs)
+ {
+ if (!doNotAdd.contains(newToSeq)
+ && dataset.findIndex(newToSeq) == -1)
+ {
+ dataset.addSequence(newToSeq);
+ matcher.add(newToSeq);
+ }
+ }
+ }
+ }
+
+ /**
+ * Search dataset for sequences with a primary reference contained in
+ * sourceRefs.
+ *
+ * @param sourceRefs
+ * - list of references to filter.
+ * @param fromDna
+ * - type of sequence to search for matching primary reference.
+ */
+ private void removeAlreadyRetrievedSeqs(List<DBRefEntry> sourceRefs,
+ boolean fromDna)
+ {
+ List<DBRefEntry> dbrSourceSet = new ArrayList<>(sourceRefs);
+ List<SequenceI> dsSeqs = dataset.getSequences();
+ for (int ids = 0, nds = dsSeqs.size(); ids < nds; ids++)
+ {
+ SequenceI sq = dsSeqs.get(ids);
+ boolean dupeFound = false;
+ // !fromDna means we are looking only for nucleotide sequences, not
+ // protein
+ if (sq.isProtein() == fromDna)
+ {
+ List<DBRefEntry> sqdbrefs = sq.getPrimaryDBRefs();
+ for (int idb = 0, ndb = sqdbrefs.size(); idb < ndb; idb++)
+ {
+ DBRefEntry dbr = sqdbrefs.get(idb);
+ List<DBRefEntry> searchrefs = DBRefUtils.searchRefs(dbrSourceSet,
+ dbr, DBRefUtils.SEARCH_MODE_FULL);
+ for (int isr = 0, nsr = searchrefs.size(); isr < nsr; isr++)
+ {
+ sourceRefs.remove(searchrefs.get(isr));
+ dupeFound = true;
+ }
+ }
+ }
+ if (dupeFound)
+ {
+ // rebuild the search array from the filtered sourceRefs list
+ dbrSourceSet.clear();
+ dbrSourceSet.addAll(sourceRefs);
+ }
+ }
+ }
+
+ /**
+ * process sequence retrieved via a dbref on source sequence to resolve and
+ * transfer data JBPNote: as of 2022-02-03 - this assumes retrievedSequence
+ * has dbRefs with Mapping references to a sequence congruent with
+ * sourceSequence
+ *
+ * @param cf
+ * @param sourceSequence
+ * @param retrievedSequence
+ * @return true if retrieveSequence was imported
+ */
+ private boolean importCrossRefSeq(AlignedCodonFrame cf,
+ List<SequenceI> newDsSeqs, List<SequenceI> doNotAdd,
+ SequenceI sourceSequence, SequenceI retrievedSequence)
+ {
+ /**
+ * set when retrievedSequence has been verified as a crossreference for
+ * sourceSequence
+ */
+ boolean imported = false;
+ List<DBRefEntry> dbr = retrievedSequence.getDBRefs();
+ if (dbr != null)
+ {
+ for (int ib = 0, nb = dbr.size(); ib < nb; ib++)
+ {
+
+ DBRefEntry dbref = dbr.get(ib);
+ // matched will return null if the dbref has no map
+ SequenceI matched = findInDataset(dbref);
+ if (matched == sourceSequence)