X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;ds=sidebyside;f=src%2Fjalview%2Fanalysis%2FCrossRef.java;h=ed87f0536d7704d544364c83cce6a76c340f5ef2;hb=453687279dd54095ec2a92b69cb2210a3ff0d586;hp=e6bae9b1a0c31c5f8057109d188bf69df5072763;hpb=f4766a7bbcfae845fc95923b01fa14ff83d589ff;p=jalview.git diff --git a/src/jalview/analysis/CrossRef.java b/src/jalview/analysis/CrossRef.java index e6bae9b..ed87f05 100644 --- a/src/jalview/analysis/CrossRef.java +++ b/src/jalview/analysis/CrossRef.java @@ -31,8 +31,7 @@ import jalview.datamodel.SequenceFeature; import jalview.datamodel.SequenceI; import jalview.util.DBRefUtils; import jalview.util.MapList; -import jalview.ws.SequenceFetcherFactory; -import jalview.ws.seqfetcher.ASequenceFetcher; +import jalview.ws.SequenceFetcher; import java.util.ArrayList; import java.util.Iterator; @@ -99,7 +98,7 @@ public class CrossRef */ public List findXrefSourcesForSequences(boolean dna) { - List sources = new ArrayList(); + List sources = new ArrayList<>(); for (SequenceI seq : fromSeqs) { if (seq != null) @@ -143,15 +142,15 @@ public class CrossRef /* * first find seq's xrefs (dna-to-peptide or peptide-to-dna) */ - DBRefEntry[] rfs = DBRefUtils.selectDbRefs(!fromDna, seq.getDBRefs()); + List rfs = DBRefUtils.selectDbRefs(!fromDna, seq.getDBRefs()); addXrefsToSources(rfs, sources); if (dataset != null) { /* * find sequence's direct (dna-to-dna, peptide-to-peptide) xrefs */ - DBRefEntry[] lrfs = DBRefUtils.selectDbRefs(fromDna, seq.getDBRefs()); - List foundSeqs = new ArrayList(); + List lrfs = DBRefUtils.selectDbRefs(fromDna, seq.getDBRefs()); + List foundSeqs = new ArrayList<>(); /* * find sequences in the alignment which xref one of these DBRefs @@ -164,7 +163,7 @@ public class CrossRef */ for (SequenceI rs : foundSeqs) { - DBRefEntry[] xrs = DBRefUtils.selectDbRefs(!fromDna, + List xrs = DBRefUtils.selectDbRefs(!fromDna, rs.getDBRefs()); addXrefsToSources(xrs, sources); } @@ -178,7 +177,7 @@ public class CrossRef * @param xrefs * @param sources */ - void addXrefsToSources(DBRefEntry[] xrefs, List sources) + void addXrefsToSources(List xrefs, List sources) { if (xrefs != null) { @@ -218,7 +217,7 @@ public class CrossRef public Alignment findXrefSequences(String source, boolean fromDna) { - rseqs = new ArrayList(); + rseqs = new ArrayList<>(); AlignedCodonFrame cf = new AlignedCodonFrame(); matcher = new SequenceIdMatcher(dataset.getSequences()); @@ -230,18 +229,18 @@ public class CrossRef dss = dss.getDatasetSequence(); } boolean found = false; - DBRefEntry[] xrfs = DBRefUtils.selectDbRefs(!fromDna, + List xrfs = DBRefUtils.selectDbRefs(!fromDna, dss.getDBRefs()); // ENST & ENSP comes in to both Protein and nucleotide, so we need to // filter them // out later. - if ((xrfs == null || xrfs.length == 0) && dataset != null) + if ((xrfs == null || xrfs.size() == 0) && dataset != null) { /* * found no suitable dbrefs on sequence - look for sequences in the * alignment which share a dbref with this one */ - DBRefEntry[] lrfs = DBRefUtils.selectDbRefs(fromDna, + List lrfs = DBRefUtils.selectDbRefs(fromDna, seq.getDBRefs()); /* @@ -370,7 +369,7 @@ public class CrossRef { // do a bit more work - search for sequences with references matching // xrefs on this sequence. - found = searchDataset(fromDna, dss, xref, rseqs, cf, false); + found = searchDataset(fromDna, dss, xref, rseqs, cf, false, DBRefUtils.SEARCH_MODE_FULL); } if (found) { @@ -400,9 +399,8 @@ public class CrossRef } private void retrieveCrossRef(List sourceRefs, SequenceI seq, - DBRefEntry[] xrfs, boolean fromDna, AlignedCodonFrame cf) + List xrfs, boolean fromDna, AlignedCodonFrame cf) { - ASequenceFetcher sftch = SequenceFetcherFactory.getSequenceFetcher(); SequenceI[] retrieved = null; SequenceI dss = seq.getDatasetSequence() == null ? seq : seq.getDatasetSequence(); @@ -418,7 +416,8 @@ public class CrossRef } try { - retrieved = sftch.getSequences(sourceRefs, !fromDna); + retrieved = SequenceFetcher.getInstance() + .getSequences(sourceRefs, !fromDna); } catch (Exception e) { System.err.println( @@ -430,8 +429,8 @@ public class CrossRef if (retrieved != null) { boolean addedXref = false; - List newDsSeqs = new ArrayList(), - doNotAdd = new ArrayList(); + List newDsSeqs = new ArrayList<>(), + doNotAdd = new ArrayList<>(); for (SequenceI retrievedSequence : retrieved) { @@ -483,19 +482,24 @@ public class CrossRef private void removeAlreadyRetrievedSeqs(List sourceRefs, boolean fromDna) { - DBRefEntry[] dbrSourceSet = sourceRefs.toArray(new DBRefEntry[0]); - for (SequenceI sq : dataset.getSequences()) + List dbrSourceSet = new ArrayList<>(sourceRefs); + List dsSeqs = dataset.getSequences(); + for (int ids = 0, nds = dsSeqs.size(); ids < nds; ids++) { + SequenceI sq = dsSeqs.get(ids); boolean dupeFound = false; // !fromDna means we are looking only for nucleotide sequences, not // protein if (sq.isProtein() == fromDna) { - for (DBRefEntry dbr : sq.getPrimaryDBRefs()) + List sqdbrefs = sq.getPrimaryDBRefs(); + for (int idb = 0, ndb = sqdbrefs.size(); idb < ndb; idb++) { - for (DBRefEntry found : DBRefUtils.searchRefs(dbrSourceSet, dbr)) + DBRefEntry dbr = sqdbrefs.get(idb); + List searchrefs = DBRefUtils.searchRefs(dbrSourceSet, dbr, DBRefUtils.SEARCH_MODE_FULL); + for (int isr = 0, nsr = searchrefs.size(); isr < nsr; isr++) { - sourceRefs.remove(found); + sourceRefs.remove(searchrefs.get(isr)); dupeFound = true; } } @@ -503,7 +507,8 @@ public class CrossRef if (dupeFound) { // rebuild the search array from the filtered sourceRefs list - dbrSourceSet = sourceRefs.toArray(new DBRefEntry[0]); + dbrSourceSet.clear(); + dbrSourceSet.addAll(sourceRefs); } } } @@ -526,11 +531,13 @@ public class CrossRef * sourceSequence */ boolean imported = false; - DBRefEntry[] dbr = retrievedSequence.getDBRefs(); + List dbr = retrievedSequence.getDBRefs(); if (dbr != null) { - for (DBRefEntry dbref : dbr) + for (int ib = 0, nb = dbr.size(); ib < nb; ib++) { + + DBRefEntry dbref = dbr.get(ib); SequenceI matched = findInDataset(dbref); if (matched == sourceSequence) { @@ -542,9 +549,10 @@ public class CrossRef Mapping map = dbref.getMap(); if (map != null) { - if (map.getTo() != null && map.getMap() != null) + SequenceI ms = map.getTo(); + if (ms != null && map.getMap() != null) { - if (map.getTo() == sourceSequence) + if (ms == sourceSequence) { // already called to import once, and most likely this sequence // already imported ! @@ -555,7 +563,7 @@ public class CrossRef /* * sequence is new to dataset, so save a reference so it can be added. */ - newDsSeqs.add(map.getTo()); + newDsSeqs.add(ms); continue; } @@ -567,7 +575,6 @@ public class CrossRef { // compare ms with dss and replace with dss in mapping // if map is congruent - SequenceI ms = map.getTo(); // TODO findInDataset requires exact sequence match but // 'congruent' test is only for the mapped part // maybe not a problem in practice since only ENA provide a @@ -589,7 +596,7 @@ public class CrossRef + matched.getName(); System.out.println(msg); - DBRefEntry[] toRefs = map.getTo().getDBRefs(); + List toRefs = map.getTo().getDBRefs(); if (toRefs != null) { /* @@ -630,10 +637,22 @@ public class CrossRef */ SequenceFeature newFeature = new SequenceFeature(feat) { + // BH 2019.08.15 We must override equalsInterval, not + // equals, because that is part of the IntervalI interface, + // and IntervalStore may need that for proper, faster + // processing. + // But SequenceFeature changes were reverted... @Override public boolean equals(Object o) { - return super.equals(o, true); + return o instanceof SequenceFeature + && equalsWithParent((SequenceFeature) o); + } + + @Override + public boolean equalsWithParent(SequenceFeature sf) + { + return sf != null && equals(sf, true); } }; matched.addSequenceFeature(newFeature); @@ -682,7 +701,7 @@ public class CrossRef { return; } - DBRefEntry[] dbrefs = mapTo.getDBRefs(); + List dbrefs = mapTo.getDBRefs(); if (dbrefs == null) { return; @@ -739,7 +758,7 @@ public class CrossRef { // first check primary refs. List match = DBRefUtils.searchRefs( - seq.getPrimaryDBRefs().toArray(new DBRefEntry[0]), template); + seq.getPrimaryDBRefs(), template, DBRefUtils.SEARCH_MODE_FULL); if (match != null && match.size() == 1 && sameSequence(seq, dss)) { return seq; @@ -812,7 +831,7 @@ public class CrossRef * @param retrieved * @param acf */ - void updateDbrefMappings(SequenceI mapFrom, DBRefEntry[] xrefs, + void updateDbrefMappings(SequenceI mapFrom, List xrefs, SequenceI[] retrieved, AlignedCodonFrame acf, boolean fromDna) { SequenceIdMatcher idMatcher = new SequenceIdMatcher(retrieved); @@ -946,7 +965,7 @@ public class CrossRef * @return true if matches were found. */ private boolean searchDatasetXrefs(boolean fromDna, SequenceI sequenceI, - DBRefEntry[] lrfs, List foundSeqs, + List lrfs, List foundSeqs, AlignedCodonFrame cf) { boolean found = false; @@ -954,14 +973,14 @@ public class CrossRef { return false; } - for (int i = 0; i < lrfs.length; i++) + for (int i = 0, n = lrfs.size(); i < n; i++) { - DBRefEntry xref = new DBRefEntry(lrfs[i]); - // add in wildcards - xref.setVersion(null); - xref.setMap(null); - found |= searchDataset(fromDna, sequenceI, xref, foundSeqs, cf, - false); +// DBRefEntry xref = new DBRefEntry(lrfs.get(i)); +// // add in wildcards +// xref.setVersion(null); +// xref.setMap(null); + found |= searchDataset(fromDna, sequenceI, lrfs.get(i), foundSeqs, cf, + false, DBRefUtils.SEARCH_MODE_NO_MAP_NO_VERSION); } return found; } @@ -992,11 +1011,12 @@ public class CrossRef * sequenceI or all the returned sequences (eg a genomic reference * associated with a locus and one or more transcripts) * + * @param mode SEARCH_MODE_FULL for all; SEARCH_MODE_NO_MAP_NO_VERSION optional * @return true if relationship found and sequence added. */ boolean searchDataset(boolean fromDna, SequenceI fromSeq, DBRefEntry xrf, List foundSeqs, AlignedCodonFrame mappings, - boolean direct) + boolean direct, int mode) { boolean found = false; if (dataset == null) @@ -1008,8 +1028,8 @@ public class CrossRef System.err.println("Empty dataset sequence set - NO VECTOR"); return false; } - List ds; - synchronized (ds = dataset.getSequences()) + List ds = dataset.getSequences(); + synchronized (ds) { for (SequenceI nxt : ds) { @@ -1041,13 +1061,13 @@ public class CrossRef } // look for direct or indirect references in common - DBRefEntry[] poss = nxt.getDBRefs(); + List poss = nxt.getDBRefs(); List cands = null; // todo: indirect specifies we select either direct references to nxt // that match xrf which is indirect to sequenceI, or indirect // references to nxt that match xrf which is direct to sequenceI - cands = DBRefUtils.searchRefs(poss, xrf); + cands = DBRefUtils.searchRefs(poss, xrf, mode); // else // { // poss = DBRefUtils.selectDbRefs(nxt.isProtein()!fromDna, poss);