X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fanalysis%2FCrossRef.java;h=87f854a4fc9eb764886f999dcc82d14173fc5d80;hb=dc82563dc0d6752da31ade9031035854e5c33409;hp=4f01cea243c05bf3bf922276190f0e25db6e1d7c;hpb=26b115b0a77d521da92a06572d9b7819c2d0d49a;p=jalview.git diff --git a/src/jalview/analysis/CrossRef.java b/src/jalview/analysis/CrossRef.java index 4f01cea..87f854a 100644 --- a/src/jalview/analysis/CrossRef.java +++ b/src/jalview/analysis/CrossRef.java @@ -20,6 +20,10 @@ */ package jalview.analysis; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; + import jalview.datamodel.AlignedCodonFrame; import jalview.datamodel.Alignment; import jalview.datamodel.AlignmentI; @@ -31,12 +35,7 @@ import jalview.datamodel.SequenceFeature; import jalview.datamodel.SequenceI; import jalview.util.DBRefUtils; import jalview.util.MapList; -import jalview.ws.SequenceFetcherFactory; -import jalview.ws.seqfetcher.ASequenceFetcher; - -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; +import jalview.ws.SequenceFetcher; /** * Functions for cross-referencing sequence databases. @@ -143,14 +142,16 @@ public class CrossRef /* * first find seq's xrefs (dna-to-peptide or peptide-to-dna) */ - List rfs = DBRefUtils.selectDbRefs(!fromDna, seq.getDBRefs()); + List rfs = DBRefUtils.selectDbRefs(!fromDna, + seq.getDBRefs()); addXrefsToSources(rfs, sources); if (dataset != null) { /* * find sequence's direct (dna-to-dna, peptide-to-peptide) xrefs */ - List lrfs = DBRefUtils.selectDbRefs(fromDna, seq.getDBRefs()); + List lrfs = DBRefUtils.selectDbRefs(fromDna, + seq.getDBRefs()); List foundSeqs = new ArrayList<>(); /* @@ -370,7 +371,8 @@ public class CrossRef { // do a bit more work - search for sequences with references matching // xrefs on this sequence. - found = searchDataset(fromDna, dss, xref, rseqs, cf, false, DBRefUtils.SEARCH_MODE_FULL); + found = searchDataset(fromDna, dss, xref, rseqs, cf, false, + DBRefUtils.SEARCH_MODE_FULL); } if (found) { @@ -402,7 +404,6 @@ public class CrossRef private void retrieveCrossRef(List sourceRefs, SequenceI seq, List xrfs, boolean fromDna, AlignedCodonFrame cf) { - ASequenceFetcher sftch = SequenceFetcherFactory.getSequenceFetcher(); SequenceI[] retrieved = null; SequenceI dss = seq.getDatasetSequence() == null ? seq : seq.getDatasetSequence(); @@ -418,7 +419,7 @@ public class CrossRef } try { - retrieved = sftch.getSequences(sourceRefs, !fromDna); + retrieved = SequenceFetcher.getInstance().getSequences(sourceRefs, !fromDna); } catch (Exception e) { System.err.println( @@ -443,6 +444,11 @@ public class CrossRef addedXref |= importCrossRefSeq(cf, newDsSeqs, doNotAdd, dss, retrievedDss); } + // JBPNote: What assumptions are made for dbref structures on + // retrieved sequences ? + // addedXref will be true means importCrossRefSeq found + // sequences with dbrefs with mappings to sequences congruent with dss + if (!addedXref) { // try again, after looking for matching IDs @@ -483,9 +489,9 @@ public class CrossRef private void removeAlreadyRetrievedSeqs(List sourceRefs, boolean fromDna) { - List dbrSourceSet = new ArrayList(sourceRefs); + List dbrSourceSet = new ArrayList<>(sourceRefs); List dsSeqs = dataset.getSequences(); - for (int ids = dsSeqs.size(); --ids >= 0;) + for (int ids = 0, nds = dsSeqs.size(); ids < nds; ids++) { SequenceI sq = dsSeqs.get(ids); boolean dupeFound = false; @@ -493,12 +499,13 @@ public class CrossRef // protein if (sq.isProtein() == fromDna) { - List sqdbrefs = sq.getPrimaryDBRefs(); - for (int idb = sqdbrefs.size(); --idb >= 0;) + List sqdbrefs = sq.getPrimaryDBRefs(); + for (int idb = 0, ndb = sqdbrefs.size(); idb < ndb; idb++) { - DBRefEntry dbr = sqdbrefs.get(idb); - List searchrefs = DBRefUtils.searchRefs(dbrSourceSet, dbr, DBRefUtils.SEARCH_MODE_FULL); - for (int isr = searchrefs.size(); --isr >= 0;) + DBRefEntry dbr = sqdbrefs.get(idb); + List searchrefs = DBRefUtils.searchRefs(dbrSourceSet, + dbr, DBRefUtils.SEARCH_MODE_FULL); + for (int isr = 0, nsr = searchrefs.size(); isr < nsr; isr++) { sourceRefs.remove(searchrefs.get(isr)); dupeFound = true; @@ -516,7 +523,9 @@ public class CrossRef /** * process sequence retrieved via a dbref on source sequence to resolve and - * transfer data + * transfer data JBPNote: as of 2022-02-03 - this assumes retrievedSequence + * has dbRefs with Mapping references to a sequence congruent with + * sourceSequence * * @param cf * @param sourceSequence @@ -535,10 +544,11 @@ public class CrossRef List dbr = retrievedSequence.getDBRefs(); if (dbr != null) { - for (int ib = 0, nb = dbr.size(); ib < nb; ib++) + for (int ib = 0, nb = dbr.size(); ib < nb; ib++) { - DBRefEntry dbref = dbr.get(ib); + DBRefEntry dbref = dbr.get(ib); + // matched will return null if the dbref has no map SequenceI matched = findInDataset(dbref); if (matched == sourceSequence) { @@ -550,7 +560,7 @@ public class CrossRef Mapping map = dbref.getMap(); if (map != null) { - SequenceI ms = map.getTo(); + SequenceI ms = map.getTo(); if (ms != null && map.getMap() != null) { if (ms == sourceSequence) @@ -715,7 +725,8 @@ public class CrossRef /** * Returns null or the first sequence in the dataset which is identical to * xref.mapTo, and has a) a primary dbref matching xref, or if none found, the - * first one with an ID source|xrefacc + * first one with an ID source|xrefacc JBPNote: Could refactor this to + * AlignmentI/DatasetI * * @param xref * with map and mapped-to sequence @@ -746,8 +757,8 @@ public class CrossRef for (SequenceI seq : dataset.getSequences()) { // first check primary refs. - List match = DBRefUtils.searchRefs( - seq.getPrimaryDBRefs(), template, DBRefUtils.SEARCH_MODE_FULL); + List match = DBRefUtils.searchRefs(seq.getPrimaryDBRefs(), + template, DBRefUtils.SEARCH_MODE_FULL); if (match != null && match.size() == 1 && sameSequence(seq, dss)) { return seq; @@ -813,7 +824,8 @@ public class CrossRef /** * Updates any empty mappings in the cross-references with one to a compatible * retrieved sequence if found, and adds any new mappings to the - * AlignedCodonFrame + * AlignedCodonFrame JBPNote: TODO: this relies on sequence IDs like + * UNIPROT|ACCESSION - which do not always happen. * * @param mapFrom * @param xrefs @@ -929,7 +941,7 @@ public class CrossRef if (fromDna) { - AlignmentUtils.computeProteinFeatures(mapFrom, mapTo, mapping); + // AlignmentUtils.computeProteinFeatures(mapFrom, mapTo, mapping); mappings.addMap(mapFrom, mapTo, mapping); } else @@ -964,10 +976,10 @@ public class CrossRef } for (int i = 0, n = lrfs.size(); i < n; i++) { -// DBRefEntry xref = new DBRefEntry(lrfs.get(i)); -// // add in wildcards -// xref.setVersion(null); -// xref.setMap(null); + // DBRefEntry xref = new DBRefEntry(lrfs.get(i)); + // // add in wildcards + // xref.setVersion(null); + // xref.setMap(null); found |= searchDataset(fromDna, sequenceI, lrfs.get(i), foundSeqs, cf, false, DBRefUtils.SEARCH_MODE_NO_MAP_NO_VERSION); } @@ -1000,7 +1012,8 @@ public class CrossRef * sequenceI or all the returned sequences (eg a genomic reference * associated with a locus and one or more transcripts) * - * @param mode SEARCH_MODE_FULL for all; SEARCH_MODE_NO_MAP_NO_VERSION optional + * @param mode + * SEARCH_MODE_FULL for all; SEARCH_MODE_NO_MAP_NO_VERSION optional * @return true if relationship found and sequence added. */ boolean searchDataset(boolean fromDna, SequenceI fromSeq, DBRefEntry xrf,