/*
* first find seq's xrefs (dna-to-peptide or peptide-to-dna)
*/
- DBRefEntry[] rfs = DBRefUtils.selectDbRefs(!fromDna, seq.getDBRefs());
+ List<DBRefEntry> rfs = DBRefUtils.selectDbRefs(!fromDna, seq.getDBRefs());
addXrefsToSources(rfs, sources);
if (dataset != null)
{
/*
* find sequence's direct (dna-to-dna, peptide-to-peptide) xrefs
*/
- DBRefEntry[] lrfs = DBRefUtils.selectDbRefs(fromDna, seq.getDBRefs());
+ List<DBRefEntry> lrfs = DBRefUtils.selectDbRefs(fromDna, seq.getDBRefs());
List<SequenceI> foundSeqs = new ArrayList<>();
/*
*/
for (SequenceI rs : foundSeqs)
{
- DBRefEntry[] xrs = DBRefUtils.selectDbRefs(!fromDna,
+ List<DBRefEntry> xrs = DBRefUtils.selectDbRefs(!fromDna,
rs.getDBRefs());
addXrefsToSources(xrs, sources);
}
* @param xrefs
* @param sources
*/
- void addXrefsToSources(DBRefEntry[] xrefs, List<String> sources)
+ void addXrefsToSources(List<DBRefEntry> xrefs, List<String> sources)
{
if (xrefs != null)
{
dss = dss.getDatasetSequence();
}
boolean found = false;
- DBRefEntry[] xrfs = DBRefUtils.selectDbRefs(!fromDna,
+ List<DBRefEntry> xrfs = DBRefUtils.selectDbRefs(!fromDna,
dss.getDBRefs());
// ENST & ENSP comes in to both Protein and nucleotide, so we need to
// filter them
// out later.
- if ((xrfs == null || xrfs.length == 0) && dataset != null)
+ if ((xrfs == null || xrfs.size() == 0) && dataset != null)
{
/*
* found no suitable dbrefs on sequence - look for sequences in the
* alignment which share a dbref with this one
*/
- DBRefEntry[] lrfs = DBRefUtils.selectDbRefs(fromDna,
+ List<DBRefEntry> lrfs = DBRefUtils.selectDbRefs(fromDna,
seq.getDBRefs());
/*
{
// do a bit more work - search for sequences with references matching
// xrefs on this sequence.
- found = searchDataset(fromDna, dss, xref, rseqs, cf, false);
+ found = searchDataset(fromDna, dss, xref, rseqs, cf, false, DBRefUtils.SEARCH_MODE_FULL);
}
if (found)
{
}
private void retrieveCrossRef(List<DBRefEntry> sourceRefs, SequenceI seq,
- DBRefEntry[] xrfs, boolean fromDna, AlignedCodonFrame cf)
+ List<DBRefEntry> xrfs, boolean fromDna, AlignedCodonFrame cf)
{
ASequenceFetcher sftch = SequenceFetcherFactory.getSequenceFetcher();
SequenceI[] retrieved = null;
private void removeAlreadyRetrievedSeqs(List<DBRefEntry> sourceRefs,
boolean fromDna)
{
- DBRefEntry[] dbrSourceSet = sourceRefs.toArray(new DBRefEntry[0]);
- for (SequenceI sq : dataset.getSequences())
+ List<DBRefEntry> dbrSourceSet = new ArrayList<DBRefEntry>(sourceRefs);
+ List<SequenceI> dsSeqs = dataset.getSequences();
+ for (int ids = dsSeqs.size(); --ids >= 0;)
{
+ SequenceI sq = dsSeqs.get(ids);
boolean dupeFound = false;
// !fromDna means we are looking only for nucleotide sequences, not
// protein
if (sq.isProtein() == fromDna)
{
- for (DBRefEntry dbr : sq.getPrimaryDBRefs())
+ List<DBRefEntry> sqdbrefs = sq.getPrimaryDBRefs();
+ for (int idb = sqdbrefs.size(); --idb >= 0;)
{
- for (DBRefEntry found : DBRefUtils.searchRefs(dbrSourceSet, dbr))
+ DBRefEntry dbr = sqdbrefs.get(idb);
+ List<DBRefEntry> searchrefs = DBRefUtils.searchRefs(dbrSourceSet, dbr, DBRefUtils.SEARCH_MODE_FULL);
+ for (int isr = searchrefs.size(); --isr >= 0;)
{
- sourceRefs.remove(found);
+ sourceRefs.remove(searchrefs.get(isr));
dupeFound = true;
}
}
if (dupeFound)
{
// rebuild the search array from the filtered sourceRefs list
- dbrSourceSet = sourceRefs.toArray(new DBRefEntry[0]);
+ dbrSourceSet.clear();
+ dbrSourceSet.addAll(sourceRefs);
}
}
}
* sourceSequence
*/
boolean imported = false;
- DBRefEntry[] dbr = retrievedSequence.getDBRefs();
+ List<DBRefEntry> dbr = retrievedSequence.getDBRefs();
if (dbr != null)
{
- for (DBRefEntry dbref : dbr)
+ for (int ib = 0, nb = dbr.size(); ib < nb; ib++)
{
+
+ DBRefEntry dbref = dbr.get(ib);
SequenceI matched = findInDataset(dbref);
if (matched == sourceSequence)
{
Mapping map = dbref.getMap();
if (map != null)
{
- if (map.getTo() != null && map.getMap() != null)
+ SequenceI ms = map.getTo();
+ if (ms != null && map.getMap() != null)
{
- if (map.getTo() == sourceSequence)
+ if (ms == sourceSequence)
{
// already called to import once, and most likely this sequence
// already imported !
/*
* sequence is new to dataset, so save a reference so it can be added.
*/
- newDsSeqs.add(map.getTo());
+ newDsSeqs.add(ms);
continue;
}
{
// compare ms with dss and replace with dss in mapping
// if map is congruent
- SequenceI ms = map.getTo();
// TODO findInDataset requires exact sequence match but
// 'congruent' test is only for the mapped part
// maybe not a problem in practice since only ENA provide a
+ matched.getName();
System.out.println(msg);
- DBRefEntry[] toRefs = map.getTo().getDBRefs();
+ List<DBRefEntry> toRefs = map.getTo().getDBRefs();
if (toRefs != null)
{
/*
{
return;
}
- DBRefEntry[] dbrefs = mapTo.getDBRefs();
+ List<DBRefEntry> dbrefs = mapTo.getDBRefs();
if (dbrefs == null)
{
return;
{
// first check primary refs.
List<DBRefEntry> match = DBRefUtils.searchRefs(
- seq.getPrimaryDBRefs().toArray(new DBRefEntry[0]), template);
+ seq.getPrimaryDBRefs(), template, DBRefUtils.SEARCH_MODE_FULL);
if (match != null && match.size() == 1 && sameSequence(seq, dss))
{
return seq;
* @param retrieved
* @param acf
*/
- void updateDbrefMappings(SequenceI mapFrom, DBRefEntry[] xrefs,
+ void updateDbrefMappings(SequenceI mapFrom, List<DBRefEntry> xrefs,
SequenceI[] retrieved, AlignedCodonFrame acf, boolean fromDna)
{
SequenceIdMatcher idMatcher = new SequenceIdMatcher(retrieved);
* @return true if matches were found.
*/
private boolean searchDatasetXrefs(boolean fromDna, SequenceI sequenceI,
- DBRefEntry[] lrfs, List<SequenceI> foundSeqs,
+ List<DBRefEntry> lrfs, List<SequenceI> foundSeqs,
AlignedCodonFrame cf)
{
boolean found = false;
{
return false;
}
- for (int i = 0; i < lrfs.length; i++)
+ for (int i = 0, n = lrfs.size(); i < n; i++)
{
- DBRefEntry xref = new DBRefEntry(lrfs[i]);
- // add in wildcards
- xref.setVersion(null);
- xref.setMap(null);
- found |= searchDataset(fromDna, sequenceI, xref, foundSeqs, cf,
- false);
+// DBRefEntry xref = new DBRefEntry(lrfs.get(i));
+// // add in wildcards
+// xref.setVersion(null);
+// xref.setMap(null);
+ found |= searchDataset(fromDna, sequenceI, lrfs.get(i), foundSeqs, cf,
+ false, DBRefUtils.SEARCH_MODE_NO_MAP_NO_VERSION);
}
return found;
}
* sequenceI or all the returned sequences (eg a genomic reference
* associated with a locus and one or more transcripts)</li>
* </ul>
+ * @param mode SEARCH_MODE_FULL for all; SEARCH_MODE_NO_MAP_NO_VERSION optional
* @return true if relationship found and sequence added.
*/
boolean searchDataset(boolean fromDna, SequenceI fromSeq, DBRefEntry xrf,
List<SequenceI> foundSeqs, AlignedCodonFrame mappings,
- boolean direct)
+ boolean direct, int mode)
{
boolean found = false;
if (dataset == null)
}
// look for direct or indirect references in common
- DBRefEntry[] poss = nxt.getDBRefs();
+ List<DBRefEntry> poss = nxt.getDBRefs();
List<DBRefEntry> cands = null;
// todo: indirect specifies we select either direct references to nxt
// that match xrf which is indirect to sequenceI, or indirect
// references to nxt that match xrf which is direct to sequenceI
- cands = DBRefUtils.searchRefs(poss, xrf);
+ cands = DBRefUtils.searchRefs(poss, xrf, mode);
// else
// {
// poss = DBRefUtils.selectDbRefs(nxt.isProtein()!fromDna, poss);