import jalview.datamodel.Alignment;
import jalview.datamodel.AlignmentI;
import jalview.datamodel.DBRefEntry;
-import jalview.datamodel.DBRefSource;
import jalview.datamodel.Mapping;
import jalview.datamodel.Sequence;
import jalview.datamodel.SequenceFeature;
import jalview.datamodel.SequenceI;
+import jalview.util.Comparison;
import jalview.util.DBRefUtils;
import jalview.util.MapList;
-import jalview.ws.SequenceFetcher;
+import jalview.ws.SequenceFetcherFactory;
import jalview.ws.seqfetcher.ASequenceFetcher;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.List;
-import java.util.Vector;
/**
* Functions for cross-referencing sequence databases. user must first specify
}
/**
- * Select just the DNA or protein references for a protein or dna sequence
- *
- * @param fromDna
- * if true, select references from DNA (i.e. Protein databases), else
- * DNA database references
- * @param refs
- * a set of references to select from
- * @return
- */
- public static DBRefEntry[] findXDbRefs(boolean fromDna, DBRefEntry[] refs)
- {
- return DBRefUtils.selectRefs(refs, fromDna ? DBRefSource.PROTEINDBS
- : DBRefSource.DNACODINGDBS);
- // could attempt to find other cross
- // refs here - ie PDB xrefs
- // (not dna, not protein seq)
- }
-
- /**
- * @param dna
- * true if seqs are DNA seqs
- * @param seqs
- * @return a list of sequence database cross reference source types
- */
- public static String[] findSequenceXrefTypes(boolean dna, SequenceI[] seqs)
- {
- return findSequenceXrefTypes(dna, seqs, null);
- }
- /**
- * Indirect references are references from other sequences from the dataset to
- * any of the direct DBRefEntrys on the given sequences.
+ * Returns a list of distinct database sources for which sequences have either
+ * <ul>
+ * <li>a (dna-to-protein or protein-to-dna) cross-reference</li>
+ * <li>an indirect cross-reference - a (dna-to-protein or protein-to-dna)
+ * reference from another sequence in the dataset which has a cross-reference
+ * to a direct DBRefEntry on the given sequence</li>
+ * </ul>
*
* @param dna
- * true if seqs are DNA seqs
+ * true if seqs are nucleotide
* @param seqs
- * @return a list of sequence database cross reference source types
+ * sequences whose xrefs we are seeking
+ * @param dataset
+ * an alignment to search for indirect references
+ * @return
*/
- public static String[] findSequenceXrefTypes(boolean dna,
+ public static List<String> findXrefSourcesForSequences(boolean dna,
SequenceI[] seqs, AlignmentI dataset)
{
- String[] dbrefs = null;
- List<String> refs = new ArrayList<String>();
+ List<String> sources = new ArrayList<String>();
for (SequenceI seq : seqs)
{
if (seq != null)
{
- SequenceI dss = seq;
- while (dss.getDatasetSequence() != null)
- {
- dss = dss.getDatasetSequence();
- }
- DBRefEntry[] rfs = findXDbRefs(dna, dss.getDBRefs());
- if (rfs != null)
- {
- for (DBRefEntry ref : rfs)
- {
- if (!refs.contains(ref.getSource()))
- {
- refs.add(ref.getSource());
- }
- }
- }
- if (dataset != null)
- {
- // search for references to this sequence's direct references.
- DBRefEntry[] lrfs = CrossRef.findXDbRefs(!dna, seq.getDBRefs());
- List<SequenceI> rseqs = new ArrayList<SequenceI>();
- CrossRef.searchDatasetXrefs(seq, !dna, lrfs, dataset, rseqs,
- null); // don't need to specify codon frame for mapping here
- for (SequenceI rs : rseqs)
- {
- DBRefEntry[] xrs = findXDbRefs(dna, rs.getDBRefs());
- if (xrs != null)
- {
- for (DBRefEntry ref : xrs)
- {
- if (!refs.contains(ref.getSource()))
- {
- refs.add(ref.getSource());
- }
- }
- }
- // looks like copy and paste - change rfs to xrs?
- // for (int r = 0; rfs != null && r < rfs.length; r++)
- // {
- // if (!refs.contains(rfs[r].getSource()))
- // {
- // refs.add(rfs[r].getSource());
- // }
- // }
- }
- }
+ findXrefSourcesForSequence(seq, dna, dataset, sources);
}
}
- if (refs.size() > 0)
- {
- dbrefs = new String[refs.size()];
- refs.toArray(dbrefs);
- }
- return dbrefs;
+ return sources;
}
- public static boolean hasCdnaMap(SequenceI[] seqs)
+ /**
+ * Returns a list of distinct database sources for which a sequence has either
+ * <ul>
+ * <li>a (dna-to-protein or protein-to-dna) cross-reference</li>
+ * <li>an indirect cross-reference - a (dna-to-protein or protein-to-dna)
+ * reference from another sequence in the dataset which has a cross-reference
+ * to a direct DBRefEntry on the given sequence</li>
+ * </ul>
+ *
+ * @param seq
+ * the sequence whose dbrefs we are searching against
+ * @param dna
+ * true if the sequence is nucleotide
+ * @param dataset
+ * an alignment to search for indirect references
+ * @param sources
+ * a list of sources to add matches to
+ */
+ static void findXrefSourcesForSequence(SequenceI seq, boolean dna,
+ AlignmentI dataset, List<String> sources)
{
- // TODO unused - remove?
- String[] reftypes = findSequenceXrefTypes(false, seqs);
- for (int s = 0; s < reftypes.length; s++)
+ /*
+ * first find seq's xrefs (dna-to-peptide or peptide-to-dna)
+ */
+ DBRefEntry[] rfs = DBRefUtils.selectDbRefs(!dna, seq.getDBRefs());
+ addXrefsToSources(rfs, sources);
+ if (dataset != null)
{
- if (reftypes.equals(DBRefSource.EMBLCDS))
+ /*
+ * find sequence's direct (dna-to-dna, peptide-to-peptide) xrefs
+ */
+ DBRefEntry[] lrfs = DBRefUtils.selectDbRefs(dna, seq.getDBRefs());
+ List<SequenceI> rseqs = new ArrayList<SequenceI>();
+
+ /*
+ * find sequences in the alignment which xref one of these DBRefs
+ * i.e. is xref-ed to a common sequence identifier
+ */
+ CrossRef.searchDatasetXrefs(seq, !dna, lrfs, dataset, rseqs, null);
+
+ /*
+ * add those sequences' (dna-to-peptide or peptide-to-dna) dbref sources
+ */
+ for (SequenceI rs : rseqs)
{
- return true;
- // no map
+ DBRefEntry[] xrs = DBRefUtils.selectDbRefs(!dna, rs.getDBRefs());
+ addXrefsToSources(xrs, sources);
}
}
- return false;
}
- public static SequenceI[] getCdnaMap(SequenceI[] seqs)
+ /**
+ * Helper method that adds the source identifiers of some cross-references to
+ * a (non-redundant) list of database sources
+ *
+ * @param xrefs
+ * @param sources
+ */
+ static void addXrefsToSources(DBRefEntry[] xrefs, List<String> sources)
{
- // TODO unused - remove?
- Vector cseqs = new Vector();
- for (int s = 0; s < seqs.length; s++)
+ if (xrefs != null)
{
- DBRefEntry[] cdna = findXDbRefs(true, seqs[s].getDBRefs());
- for (int c = 0; c < cdna.length; c++)
+ for (DBRefEntry ref : xrefs)
{
- if (cdna[c].getSource().equals(DBRefSource.EMBLCDS))
+ String source = ref.getSource();
+ if (!sources.contains(source))
{
- System.err
- .println("TODO: unimplemented sequence retrieval for coding region sequence.");
- // TODO: retrieve CDS dataset sequences
- // need global dataset sequence retriever/resolver to reuse refs
- // and construct Mapping entry.
- // insert gaps in CDS according to peptide gaps.
- // add gapped sequence to cseqs
+ sources.add(source);
}
}
}
- if (cseqs.size() > 0)
- {
- SequenceI[] rsqs = new SequenceI[cseqs.size()];
- cseqs.copyInto(rsqs);
- return rsqs;
- }
- return null;
-
}
/**
dss = dss.getDatasetSequence();
}
boolean found = false;
- DBRefEntry[] xrfs = CrossRef.findXDbRefs(dna, dss.getDBRefs());
+ DBRefEntry[] xrfs = DBRefUtils.selectDbRefs(!dna, dss.getDBRefs());
if ((xrfs == null || xrfs.length == 0) && dataset != null)
{
- System.out.println("Attempting to find ds Xrefs refs.");
- // FIXME should be dss not seq here?
- DBRefEntry[] lrfs = CrossRef.findXDbRefs(!dna, seq.getDBRefs());
- // less ambiguous would be a 'find primary dbRefEntry' method.
- // filter for desired source xref here
+ /*
+ * found no suitable dbrefs on sequence - look for sequences in the
+ * alignment which share a dbref with this one
+ */
+ DBRefEntry[] lrfs = DBRefUtils.selectDbRefs(dna, seq.getDBRefs());
+
+ /*
+ * find sequences (except this one!), of complementary type,
+ * which have a dbref to an accession id for this sequence,
+ * and add them to the results
+ */
found = CrossRef.searchDatasetXrefs(dss, !dna, lrfs, dataset,
rseqs, cf);
}
{
if (xref.getMap().getTo() != null)
{
+ found = true;
SequenceI rsq = new Sequence(xref.getMap().getTo());
rseqs.add(rsq);
if (xref.getMap().getMap().getFromRatio() != xref
cf.addMap(rsq, dss, xref.getMap().getMap().getInverse());
}
}
- found = true;
}
}
if (!found)
// xrefs on this sequence.
if (dataset != null)
{
- found |= searchDataset(dss, xref, dataset, rseqs, cf, false,
+ found = searchDataset(dss, xref, dataset, rseqs, cf, false,/*true?*/
!dna);
if (found)
{
{
if (xrfs != null && xrfs.length > 0)
{
- // Try and get the sequence reference...
- /*
- * Ideal world - we ask for a sequence fetcher implementation here if
- * (jalview.io.RunTimeEnvironment.getSequenceFetcher()) (
- */
- ASequenceFetcher sftch = new SequenceFetcher();
+ ASequenceFetcher sftch = SequenceFetcherFactory
+ .getSequenceFetcher();
SequenceI[] retrieved = null;
int l = xrfs.length;
for (int r = 0; r < xrfs.length; r++)
xrfs = t;
try
{
- retrieved = sftch.getSequences(xrfs, !dna);
+ retrieved = sftch.getSequences(Arrays.asList(xrfs), !dna);
// problem here is we don't know which of xrfs resulted in which
// retrieved element
} catch (Exception e)
cf.addMap(retrieved[rs].getDatasetSequence(),
dss, map.getMap());
}
+ // TODO remove this 'else' and the cf.addMap above?
else
{
cf.addMap(retrieved[rs].getDatasetSequence(),
// add in wildcards
xref.setVersion(null);
xref.setMap(null);
- found = searchDataset(sequenceI, xref, dataset, rseqs, cf, false, dna);
+ found |= searchDataset(sequenceI, xref, dataset, rseqs, cf, false,
+ dna);
}
return found;
}
/**
- * search a given sequence dataset for references matching cross-references to
- * the given sequence
+ * Searches dataset for DBRefEntrys matching the given one (xrf) and adds the
+ * associated sequence to rseqs
*
* @param sequenceI
+ * a sequence to ignore (start point of search)
* @param xrf
+ * a cross-reference to try to match
* @param dataset
+ * sequences to search in
* @param rseqs
- * set of unique sequences
+ * result list to add to
* @param cf
- * @return true if one or more unique sequences were found and added
- */
- public static boolean searchDataset(SequenceI sequenceI, DBRefEntry xrf,
- AlignmentI dataset, List<SequenceI> rseqs, AlignedCodonFrame cf)
- {
- return searchDataset(sequenceI, xrf, dataset, rseqs, cf, true, false);
- }
-
- /**
- * TODO: generalise to different protein classifications Search dataset for
- * DBRefEntrys matching the given one (xrf) and add the associated sequence to
- * rseq.
- *
- * @param sequenceI
- * @param xrf
- * @param dataset
- * @param rseqs
+ * a set of sequence mappings to add to
* @param direct
* - search all references or only subset
* @param dna
boolean direct, boolean dna)
{
boolean found = false;
- SequenceI[] typer = new SequenceI[1];
if (dataset == null)
{
return false;
System.err
.println("Implementation warning: getProducts passed a dataset alignment without dataset sequences in it!");
}
- if (nxt != sequenceI && nxt != sequenceI.getDatasetSequence())
+ if (nxt == sequenceI || nxt == sequenceI.getDatasetSequence())
{
- // check if this is the correct sequence type
+ continue;
+ }
+ // check if this is the correct sequence type
+ {
+ // TODO 'direct' is always set to false - remove?
+ // or should it be 'true' from findXrefSequences?
+ // also its Javadoc conflicts with its use:
+ // test below implies 'direct' means find complementary sequences,
+ // !direct means select same molecule type
+ boolean isDna = Comparison
+ .isNucleotide(new SequenceI[] { nxt });
+ if ((direct && isDna == dna) || (!direct && isDna != dna))
{
- typer[0] = nxt;
- boolean isDna = jalview.util.Comparison.isNucleotide(typer);
- if ((direct && isDna == dna) || (!direct && isDna != dna))
- {
- // skip this sequence because it is same molecule type
- continue;
- }
+ // skip this sequence because it is wrong molecule type
+ continue;
}
+ }
- // look for direct or indirect references in common
- DBRefEntry[] poss = nxt.getDBRefs(), cands = null;
- if (direct)
- {
- cands = jalview.util.DBRefUtils.searchRefs(poss, xrf);
- }
- else
- {
- poss = CrossRef.findXDbRefs(dna, poss); //
- cands = jalview.util.DBRefUtils.searchRefs(poss, xrf);
- }
- if (cands != null)
+ // look for direct or indirect references in common
+ DBRefEntry[] poss = nxt.getDBRefs();
+ List<DBRefEntry> cands = null;
+ /*
+ * TODO does this make any sense?
+ * if 'direct', search the dbrefs for xrf
+ * else, filter the dbrefs by type and then search for xrf
+ * - the result is the same isn't it?
+ */
+ if (direct)
+ {
+ cands = DBRefUtils.searchRefs(poss, xrf);
+ }
+ else
+ {
+ poss = DBRefUtils.selectDbRefs(!dna, poss);
+ cands = DBRefUtils.searchRefs(poss, xrf);
+ }
+ if (!cands.isEmpty())
+ {
+ if (!rseqs.contains(nxt))
{
- if (!rseqs.contains(nxt))
+ found = true;
+ rseqs.add(nxt);
+ if (cf != null)
{
- rseqs.add(nxt);
- boolean foundmap = cf != null;
// don't search if we aren't given a codon map object
- for (int r = 0; foundmap && r < cands.length; r++)
+ for (DBRefEntry candidate : cands)
{
- if (cands[r].hasMap())
+ Mapping mapping = candidate.getMap();
+ if (mapping != null)
{
- if (cands[r].getMap().getTo() != null
- && cands[r].getMap().getMap().getFromRatio() != cands[r]
- .getMap().getMap().getToRatio())
+ MapList map = mapping.getMap();
+ if (mapping.getTo() != null
+ && map.getFromRatio() != map.getToRatio())
{
- foundmap = true;
// get sense of map correct for adding to product
// alignment.
if (dna)
{
// map is from dna seq to a protein product
- cf.addMap(sequenceI, nxt, cands[r].getMap()
- .getMap());
+ cf.addMap(sequenceI, nxt, map);
}
else
{
// map should be from protein seq to its coding dna
- cf.addMap(nxt, sequenceI, cands[r].getMap()
- .getMap().getInverse());
+ cf.addMap(nxt, sequenceI, map.getInverse());
}
}
}
}
- // TODO: add mapping between sequences if necessary
- found = true;
}
+ // TODO: add mapping between sequences if necessary
}
-
}
}
}
}
return found;
}
-
- /**
- * precalculate different products that can be found for seqs in dataset and
- * return them.
- *
- * @param dna
- * @param seqs
- * @param dataset
- * @param fake
- * - don't actually build lists - just get types
- * @return public static Object[] buildXProductsList(boolean dna, SequenceI[]
- * seqs, AlignmentI dataset, boolean fake) { String types[] =
- * jalview.analysis.CrossRef.findSequenceXrefTypes( dna, seqs,
- * dataset); if (types != null) { System.out.println("Xref Types for:
- * "+(dna ? "dna" : "prot")); for (int t = 0; t < types.length; t++) {
- * System.out.println("Type: " + types[t]); SequenceI[] prod =
- * jalview.analysis.CrossRef.findXrefSequences(seqs, dna, types[t]);
- * System.out.println("Found " + ((prod == null) ? "no" : "" +
- * prod.length) + " products"); if (prod!=null) { for (int p=0;
- * p<prod.length; p++) { System.out.println("Prod "+p+":
- * "+prod[p].getDisplayId(true)); } } } } else {
- * System.out.println("Trying getProducts for
- * "+al.getSequenceAt(0).getDisplayId(true));
- * System.out.println("Search DS Xref for: "+(dna ? "dna" : "prot"));
- * // have a bash at finding the products amongst all the retrieved
- * sequences. SequenceI[] prod =
- * jalview.analysis.CrossRef.findXrefSequences(al
- * .getSequencesArray(), dna, null, ds); System.out.println("Found " +
- * ((prod == null) ? "no" : "" + prod.length) + " products"); if
- * (prod!=null) { // select non-equivalent sequences from dataset list
- * for (int p=0; p<prod.length; p++) { System.out.println("Prod "+p+":
- * "+prod[p].getDisplayId(true)); } } } }
- */
}
--- /dev/null
+package jalview.analysis;
+
+import jalview.datamodel.AlignedCodonFrame;
+import jalview.datamodel.Alignment;
+import jalview.datamodel.AlignmentI;
+import jalview.datamodel.DBRefEntry;
+import jalview.datamodel.Mapping;
+import jalview.datamodel.Sequence;
+import jalview.datamodel.SequenceFeature;
+import jalview.datamodel.SequenceI;
+import jalview.util.Comparison;
+import jalview.util.DBRefUtils;
+import jalview.util.MapList;
+import jalview.ws.SequenceFetcherFactory;
+import jalview.ws.seqfetcher.ASequenceFetcher;
+
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
+public class CrossRefs
+{
+ /**
+ * Finds cross-references for sequences from a specified source database.
+ * These may be found in four ways:
+ * <ul>
+ * <li>as a DBRefEntry on the known sequence, which has a mapped-to sequence</li>
+ * <li>a sequence of complementary type in the alignment dataset, which has a
+ * DBRefEntry to one of the known sequence's 'direct' DBRefs</li>
+ * <li>a sequence of complementary type in the alignment, which has a
+ * DBRefEntry to one of the known sequence's 'cross-ref' DBRefs</li>
+ * <li>by fetching the accession from the remote database</li>
+ * </ul>
+ *
+ * @param seqs
+ * the sequences whose cross-references we are searching for
+ * @param dna
+ * true if the sequences are from a nucleotide alignment, else false
+ * @param source
+ * the database source we want cross-references to
+ * @param dataset
+ * the alignment dataset the sequences belong to
+ * @return an alignment containing cross-reference sequences, or null if none
+ * found
+ */
+ public static AlignmentI findXrefSequences(SequenceI[] seqs, boolean dna,
+ String source, AlignmentI dataset)
+ {
+ List<SequenceI> foundSeqs = new ArrayList<SequenceI>();
+ AlignedCodonFrame mappings = new AlignedCodonFrame();
+
+ List<DBRefEntry> sourceRefs = new ArrayList<DBRefEntry>();
+
+ for (SequenceI seq : seqs)
+ {
+ if (dna != Comparison.isNucleotide(seq))
+ {
+ /*
+ * mixed alignment, and this sequence is of the wrong type
+ */
+ continue;
+ }
+
+ /*
+ * get this sequence's dbrefs to source database (if any)
+ */
+ List<DBRefEntry> seqSourceRefs = DBRefUtils.searchRefsForSource(
+ seq.getDBRefs(), source);
+
+ /*
+ * first extract any mapped sequences from sourceRefs
+ */
+ findMappedDbrefs(seq, seqSourceRefs, foundSeqs, mappings);
+
+ /*
+ * for remaining sourceRefs, try to match a
+ * complementary sequence in the dataset
+ */
+ findIndirectCrossReferences(seq, source, seqSourceRefs, dataset,
+ foundSeqs, mappings);
+ }
+
+ /*
+ * fetch any remaining sourceRefs from the source database
+ */
+ fetchCrossReferences(sourceRefs, foundSeqs, mappings, dna, dataset);
+
+ if (foundSeqs.isEmpty())
+ {
+ return null;
+ }
+ AlignmentI crossRefs = new Alignment(
+ foundSeqs.toArray(new SequenceI[foundSeqs.size()]));
+ crossRefs.addCodonFrame(mappings);
+ return crossRefs;
+ }
+
+ /**
+ * Looks for DBRefEntrys to 'source' which have a mapping to a sequence. If
+ * found, adds the sequence to foundSeqs and removes the dbref from the list.
+ *
+ * @param seq
+ * the dataset sequence we are searching from
+ * @param sourceRefs
+ * the sequence's dbrefs to 'source'
+ * @param foundSeqs
+ * a list of cross-references to add to
+ * @param mappings
+ * a set of sequence mappings to add to
+ * @return
+ */
+ static void findMappedDbrefs(SequenceI seq, List<DBRefEntry> sourceRefs,
+ List<SequenceI> foundSeqs, AlignedCodonFrame mappings)
+ {
+ Iterator<DBRefEntry> refs = sourceRefs.iterator();
+ while (refs.hasNext())
+ {
+ DBRefEntry dbref = refs.next();
+ Mapping map = dbref.getMap();
+ if (map != null)
+ {
+ SequenceI mappedTo = map.getTo();
+ if (mappedTo != null)
+ {
+ foundSeqs.add(new Sequence(mappedTo));
+ refs.remove();
+
+ /*
+ * check mapping is not 'direct' (it shouldn't be if we reach here)
+ * and add mapping (dna-to-peptide or vice versa) to the set
+ */
+ MapList mapList = map.getMap();
+ int fromRatio = mapList.getFromRatio();
+ int toRatio = mapList.getToRatio();
+ if (fromRatio != toRatio)
+ {
+ if (fromRatio == 3)
+ {
+ mappings.addMap(seq, mappedTo, mapList);
+ }
+ else
+ {
+ mappings.addMap(mappedTo, seq, mapList.getInverse());
+ }
+ }
+ }
+ }
+ }
+ }
+
+ /**
+ * Tries to fetch seq's database references to 'source' database, and add them
+ * to the foundSeqs list. If found, tries to make a mapping between seq and
+ * the retrieved sequence and insert it into the database reference.
+ *
+ * @param seq
+ * @param sourceRefs
+ * @param foundSeqs
+ * @param mappings
+ * @param dna
+ */
+ static void fetchCrossReferences(SequenceI seq,
+ List<DBRefEntry> sourceRefs, List<SequenceI> foundSeqs,
+ AlignedCodonFrame mappings, boolean dna, AlignmentI dataset)
+ {
+ ASequenceFetcher sftch = SequenceFetcherFactory.getSequenceFetcher();
+ SequenceI[] retrieved;
+ try
+ {
+ retrieved = sftch.getSequences(sourceRefs, !dna);
+ } catch (Exception e)
+ {
+ System.err
+ .println("Problem whilst retrieving cross references for Sequence : "
+ + seq.getName());
+ e.printStackTrace();
+ return;
+ }
+
+ if (retrieved != null)
+ {
+ updateDbrefMappings(dna, seq, sourceRefs, retrieved, mappings);
+
+ SequenceIdMatcher matcher = new SequenceIdMatcher(
+ dataset.getSequences());
+ List<SequenceFeature> copiedFeatures = new ArrayList<SequenceFeature>();
+ CrossRef me = new CrossRef();
+ for (int rs = 0; rs < retrieved.length; rs++)
+ {
+ // TODO: examine each sequence for 'redundancy'
+ DBRefEntry[] dbr = retrieved[rs].getDBRefs();
+ if (dbr != null && dbr.length > 0)
+ {
+ for (int di = 0; di < dbr.length; di++)
+ {
+ // find any entry where we should put in the sequence being
+ // cross-referenced into the map
+ Mapping map = dbr[di].getMap();
+ if (map != null)
+ {
+ if (map.getTo() != null && map.getMap() != null)
+ {
+ SequenceI matched = matcher.findIdMatch(map.getTo());
+ if (matched != null)
+ {
+ /*
+ * already got an xref to this sequence; update this
+ * map to point to the same sequence, and add
+ * any new dbrefs to it
+ */
+ for (DBRefEntry ref : map.getTo().getDBRefs())
+ {
+ matched.addDBRef(ref); // add or update mapping
+ }
+ map.setTo(matched);
+ }
+ else
+ {
+ matcher.add(map.getTo());
+ }
+ try
+ {
+ // compare ms with dss and replace with dss in mapping
+ // if map is congruent
+ SequenceI ms = map.getTo();
+ int sf = map.getMap().getToLowest();
+ int st = map.getMap().getToHighest();
+ SequenceI mappedrg = ms.getSubSequence(sf, st);
+ // SequenceI loc = dss.getSubSequence(sf, st);
+ if (mappedrg.getLength() > 0
+ && ms.getSequenceAsString().equals(
+ seq.getSequenceAsString()))
+ // && mappedrg.getSequenceAsString().equals(
+ // loc.getSequenceAsString()))
+ {
+ String msg = "Mapping updated from " + ms.getName()
+ + " to retrieved crossreference "
+ + seq.getName();
+ System.out.println(msg);
+ // method to update all refs of existing To on
+ // retrieved sequence with dss and merge any props
+ // on To onto dss.
+ map.setTo(seq);
+ /*
+ * copy sequence features as well, avoiding
+ * duplication (e.g. same variation from 2
+ * transcripts)
+ */
+ SequenceFeature[] sfs = ms.getSequenceFeatures();
+ if (sfs != null)
+ {
+ for (SequenceFeature feat : sfs)
+ {
+ /*
+ * we override SequenceFeature.equals here (but
+ * not elsewhere) to ignore Parent attribute
+ * TODO not quite working yet!
+ */
+ if (!copiedFeatures
+ .contains(me.new MySequenceFeature(feat)))
+ {
+ seq.addSequenceFeature(feat);
+ copiedFeatures.add(feat);
+ }
+ }
+ }
+ }
+ mappings.addMap(retrieved[rs].getDatasetSequence(),
+ map.getTo(), map.getMap());
+ } catch (Exception e)
+ {
+ System.err
+ .println("Exception when consolidating Mapped sequence set...");
+ e.printStackTrace(System.err);
+ }
+ }
+ }
+ }
+ }
+ retrieved[rs].updatePDBIds();
+ foundSeqs.add(retrieved[rs]);
+ }
+ }
+ }
+
+ /**
+ * Searches the alignment for a sequence of complementary type to 'seq' which
+ * shares a DBRefEntry with it. If found, adds the sequence to foundSeqs and
+ * removes the resolved sourceRef from the search list.
+ *
+ * @param seq
+ * @param source
+ * @param sourceRefs
+ * @param dataset
+ * @param foundSeqs
+ * @param mappings
+ * @return
+ */
+ static void findIndirectCrossReferences(SequenceI seq, String source,
+ List<DBRefEntry> sourceRefs, AlignmentI dataset,
+ List<SequenceI> foundSeqs, AlignedCodonFrame mappings)
+ {
+ Iterator<DBRefEntry> refs = sourceRefs.iterator();
+ while (refs.hasNext())
+ {
+ DBRefEntry dbref = refs.next();
+ boolean found = searchDatasetForCrossReference(seq, dbref, dataset,
+ foundSeqs, mappings);
+ if (found)
+ {
+ refs.remove();
+ }
+ }
+ }
+
+ /**
+ * Searches the dataset for a sequence of opposite type to 'excluding', which
+ * has a cross-reference matching dbref. If found, adds the sequence to
+ * foundSeqs and removes dbref from the search list.
+ *
+ * @param excluding
+ * a sequence to ignore (start point of search)
+ * @param dbref
+ * a cross-reference to try to match
+ * @param dataset
+ * sequences to search in
+ * @param foundSeqs
+ * result list to add to
+ * @param mappings
+ * a set of sequence mappings to add to
+ * @return true if relationship found and sequence added
+ */
+ static boolean searchDatasetForCrossReference(SequenceI excluding,
+ DBRefEntry dbref, AlignmentI dataset, List<SequenceI> foundSeqs,
+ AlignedCodonFrame mappings)
+ {
+ boolean fromNucleotide = Comparison.isNucleotide(excluding);
+ boolean found = false;
+ if (dataset == null)
+ {
+ return false;
+ }
+ if (dataset.getSequences() == null)
+ {
+ return false;
+ }
+ List<SequenceI> ds;
+ synchronized (ds = dataset.getSequences())
+ {
+ for (SequenceI nxt : ds)
+ {
+ if (nxt != null)
+ {
+ if (nxt.getDatasetSequence() != null)
+ {
+ System.err
+ .println("Implementation warning: getProducts passed a dataset alignment without dataset sequences in it!");
+ }
+ if (nxt == excluding || nxt == excluding.getDatasetSequence())
+ {
+ continue;
+ }
+ if (foundSeqs.contains(nxt))
+ {
+ /*
+ * already added this sequence to cross-refs
+ */
+ continue;
+ }
+ boolean isDna = Comparison.isNucleotide(nxt);
+ if (isDna == fromNucleotide)
+ {
+ /*
+ * skip this sequence - wrong molecule type
+ */
+ continue;
+ }
+
+ /*
+ * check if this sequence has any dbref matching source and accession
+ * (version and mapping may differ)
+ */
+ List<DBRefEntry> candidates = DBRefUtils.searchRefs(
+ nxt.getDBRefs(), dbref);
+
+ if (candidates.isEmpty())
+ {
+ continue;
+ }
+ found = true;
+ foundSeqs.add(nxt);
+ if (mappings != null)
+ {
+ // don't search if we aren't given a codon map object
+ for (DBRefEntry candidate : candidates)
+ {
+ if (candidate.hasMap())
+ {
+ Mapping mapping = candidate.getMap();
+ MapList map = mapping.getMap();
+ if (mapping.getTo() != null
+ && map.getFromRatio() != map.getToRatio())
+ {
+ if (fromNucleotide)
+ {
+ // map is from dna seq to a protein product
+ mappings.addMap(excluding, nxt, map);
+ }
+ else
+ {
+ // map is from protein seq to its coding dna
+ mappings.addMap(nxt, excluding, map.getInverse());
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ return found;
+ }
+
+ /**
+ * Updates any empty mappings in the cross-references with one to a compatible
+ * retrieved sequence if found, and adds any new mappings to the
+ * AlignedCodonFrame
+ *
+ * @param dna
+ * @param mapFrom
+ * @param xrefs
+ * @param retrieved
+ * @param mappings
+ */
+ static void updateDbrefMappings(boolean dna, SequenceI mapFrom,
+ List<DBRefEntry> xrefs, SequenceI[] retrieved,
+ AlignedCodonFrame mappings)
+ {
+ SequenceIdMatcher matcher = new SequenceIdMatcher(retrieved);
+ for (DBRefEntry xref : xrefs)
+ {
+ if (!xref.hasMap())
+ {
+ String targetSeqName = xref.getSource() + "|"
+ + xref.getAccessionId();
+ SequenceI[] matches = matcher.findAllIdMatches(targetSeqName);
+ if (matches == null)
+ {
+ return;
+ }
+ for (SequenceI seq : matches)
+ {
+ MapList mapping = null;
+ if (dna)
+ {
+ mapping = AlignmentUtils.mapCdnaToProtein(seq, mapFrom);
+ }
+ else
+ {
+ mapping = AlignmentUtils.mapCdnaToProtein(mapFrom, seq);
+ if (mapping != null)
+ {
+ mapping = mapping.getInverse();
+ }
+ }
+ if (mapping != null)
+ {
+ xref.setMap(new Mapping(seq, mapping));
+ if (dna)
+ {
+ AlignmentUtils.computeProteinFeatures(mapFrom, seq, mapping);
+ }
+ if (dna)
+ {
+ mappings.addMap(mapFrom, seq, mapping);
+ }
+ else
+ {
+ mappings.addMap(seq, mapFrom, mapping.getInverse());
+ }
+ continue;
+ }
+ }
+ }
+ }
+ }
+}
import jalview.analysis.AlignmentSorter;
import jalview.analysis.AlignmentUtils;
import jalview.analysis.CrossRef;
+import jalview.analysis.CrossRefs;
import jalview.analysis.Dna;
import jalview.analysis.ParseProperties;
import jalview.analysis.SequenceIdMatcher;
{
showProducts.removeAll();
final boolean dna = viewport.getAlignment().isNucleotide();
- String[] ptypes = (selection == null || selection.length == 0) ? null
- : CrossRef.findSequenceXrefTypes(dna, selection, dataset);
+ List<String> ptypes = (selection == null || selection.length == 0) ? null
+ : CrossRef.findXrefSourcesForSequences(dna, selection, dataset);
- for (int t = 0; ptypes != null && t < ptypes.length; t++)
+ for (final String source : ptypes)
{
showp = true;
final AlignFrame af = this;
- final String source = ptypes[t];
- JMenuItem xtype = new JMenuItem(ptypes[t]);
+ JMenuItem xtype = new JMenuItem(source);
xtype.addActionListener(new ActionListener()
{
-
@Override
public void actionPerformed(ActionEvent e)
{
showProductsFor(af.viewport.getSequenceSelection(), dna, source);
}
-
});
showProducts.add(xtype);
}
showProducts.setEnabled(showp);
} catch (Exception e)
{
- jalview.bin.Cache.log
+ Cache.log
.warn("canShowProducts threw an exception - please report to help@jalview.org",
e);
return false;
{
AlignmentI alignment = AlignFrame.this.getViewport()
.getAlignment();
- AlignmentI xrefs = CrossRef.findXrefSequences(sel, dna, source,
+ AlignmentI xrefs = CrossRefs.findXrefSequences(sel, dna, source,
alignment);
if (xrefs != null)
{
Cache.log.info(
"Error retrieving " + accession
+ " from " + proxy.getDbName(), e);
- } finally
- {
- return success;
}
+ return success;
}
/**
for (String q : queries)
{
- DBRefEntry[] found = null;
DBRefEntry dbr = new DBRefEntry();
dbr.setSource(proxy.getDbSource());
dbr.setVersion(null);
{
if (rs[r] != null)
{
- found = DBRefUtils.searchRefs(rs[r].getDBRefs(), accId);
- if (found != null && found.length > 0)
+ List<DBRefEntry> found = DBRefUtils.searchRefs(rs[r].getDBRefs(),
+ accId);
+ if (!found.isEmpty())
{
rfound = true;
break;
}
/**
+ * Overloaded method signature to test whether a single sequence is nucleotide
+ * (that is, more than 85% CGTA)
+ *
+ * @param seq
+ * @return
+ */
+ public static final boolean isNucleotide(SequenceI seq)
+ {
+ return isNucleotide(new SequenceI[] { seq });
+ }
+
+ /**
* Answers true if more than 85% of the sequence residues (ignoring gaps) are
* A, G, C, T or U, else false. This is just a heuristic guess and may give a
* wrong answer (as AGCT are also amino acid codes).
}
/**
+ * Returns those DBRefEntry objects whose source identifier (once converted to
+ * Jalview's canonical form) is in the list of sources to search for. Returns
+ * null if no matches found.
*
* @param dbrefs
- * array of DBRef objects to search
+ * DBRefEntry objects to search
* @param sources
- * String[] array of source DBRef IDs to retrieve
+ * array of sources to select
* @return
*/
public static DBRefEntry[] selectRefs(DBRefEntry[] dbrefs,
}
/**
- * Returns an array of those references that match the given entry, or null if
- * no matches. Currently uses a comparator which matches if
+ * Returns a (possibly empty) list of those references that match the given
+ * entry. Currently uses a comparator which matches if
* <ul>
* <li>database sources are the same</li>
* <li>accession ids are the same</li>
* pattern to match
* @return
*/
- public static DBRefEntry[] searchRefs(DBRefEntry[] ref, DBRefEntry entry)
+ public static List<DBRefEntry> searchRefs(DBRefEntry[] ref,
+ DBRefEntry entry)
{
return searchRefs(ref, entry,
matchDbAndIdAndEitherMapOrEquivalentMapList);
}
/**
- * Returns an array of those references that match the given accession id
+ * Returns a list of those references that match the given accession id
* <ul>
* <li>database sources are the same</li>
* <li>accession ids are the same</li>
* <li>both have no mapping, or the mappings are the same</li>
* </ul>
*
- * @param ref
+ * @param refs
* Set of references to search
- * @param entry
- * pattern to match
+ * @param accId
+ * accession id to match
* @return
*/
- public static DBRefEntry[] searchRefs(DBRefEntry[] ref, String accId)
+ public static List<DBRefEntry> searchRefs(DBRefEntry[] refs, String accId)
{
- return searchRefs(ref, new DBRefEntry("", "", accId), matchId);
+ return searchRefs(refs, new DBRefEntry("", "", accId), matchId);
}
/**
- * Returns an array of those references that match the given entry, according
- * to the given comparator. Returns null if no matches.
+ * Returns a (possibly empty) list of those references that match the given
+ * entry, according to the given comparator.
*
* @param refs
* an array of database references to search
* @param comparator
* @return
*/
- static DBRefEntry[] searchRefs(DBRefEntry[] refs, DBRefEntry entry,
+ static List<DBRefEntry> searchRefs(DBRefEntry[] refs, DBRefEntry entry,
DbRefComp comparator)
{
+ List<DBRefEntry> rfs = new ArrayList<DBRefEntry>();
if (refs == null || entry == null)
{
- return null;
+ return rfs;
}
- List<DBRefEntry> rfs = new ArrayList<DBRefEntry>();
for (int i = 0; i < refs.length; i++)
{
if (comparator.matches(entry, refs[i]))
rfs.add(refs[i]);
}
}
- return rfs.size() == 0 ? null : rfs.toArray(new DBRefEntry[rfs.size()]);
+ return rfs;
}
interface DbRefComp
};
/**
- * accession ID and DB must be identical. Version is ignored. No map on either
- * or map but no maplist on either or maplist of map on a is equivalent to the
- * maplist of map on b.
+ * accession ID and DB must be identical, or null on a. Version is ignored. No
+ * map on either or map but no maplist on either or maplist of map on a is
+ * equivalent to the maplist of map on b.
*/
public static DbRefComp matchDbAndIdAndEitherMapOrEquivalentMapList = new DbRefComp()
{
&& refb.getSource().equals(refa.getSource()))
{
// We dont care about version
- if (refa.getAccessionId() != null && refb.getAccessionId() != null
- && refb.getAccessionId().equals(refa.getAccessionId()))
+
+ if (refa.getAccessionId() == null
+ || refa.getAccessionId().equals(refb.getAccessionId()))
{
if (refa.getMap() == null || refb.getMap() == null)
{
|| (refb.getMap().getMap() != null
&& refa.getMap().getMap() != null && (refb
.getMap().getMap().equals(refa.getMap().getMap()))))
- { // getMap().getMap().containsEither(false,refa.getMap().getMap())
+ {
return true;
}
}
return (o1 == null ? o2.equals(o1) : o1.equals(o2));
}
+ /**
+ * Selects just the DNA or protein references from a set of references
+ *
+ * @param selectDna
+ * if true, select references to 'standard' DNA databases, else to
+ * 'standard' peptide databases
+ * @param refs
+ * a set of references to select from
+ * @return
+ */
+ public static DBRefEntry[] selectDbRefs(boolean selectDna,
+ DBRefEntry[] refs)
+ {
+ return selectRefs(refs, selectDna ? DBRefSource.DNACODINGDBS
+ : DBRefSource.PROTEINDBS);
+ // could attempt to find other cross
+ // refs here - ie PDB xrefs
+ // (not dna, not protein seq)
+ }
+
+ /**
+ * Returns the (possibly empty) list of those supplied dbrefs which have the
+ * specified source databse
+ *
+ * @param dbRefs
+ * @param source
+ * @return
+ */
+ public static List<DBRefEntry> searchRefsForSource(DBRefEntry[] dbRefs,
+ String source)
+ {
+ List<DBRefEntry> matches = new ArrayList<DBRefEntry>();
+ if (dbRefs != null && source != null)
+ {
+ for (DBRefEntry dbref : dbRefs)
+ {
+ if (source.equals(dbref.getSource()))
+ {
+ matches.add(dbref);
+ }
+ }
+ }
+ return matches;
+ }
+
}
--- /dev/null
+package jalview.ws;
+
+import jalview.ws.seqfetcher.ASequenceFetcher;
+
+public class SequenceFetcherFactory
+{
+
+ private static SequenceFetcher instance;
+
+ /**
+ * Returns a new SequenceFetcher object, or a mock object if one has been set
+ *
+ * @return
+ */
+ public static ASequenceFetcher getSequenceFetcher()
+ {
+ return instance == null ? new SequenceFetcher() : instance;
+ }
+
+ /**
+ * Set the instance object to use (intended for unit testing with mock
+ * objects).
+ *
+ * Be sure to reset to null in the tearDown method of any tests!
+ *
+ * @param sf
+ */
+ public static void setSequenceFetcher(SequenceFetcher sf)
+ {
+ instance = sf;
+ }
+}
/**
* Constructor
*/
- public ASequenceFetcher()
+ protected ASequenceFetcher()
{
super();
* if true, only fetch from nucleotide data sources, else peptide
* @return
*/
- public SequenceI[] getSequences(DBRefEntry[] refs, boolean dna)
+ public SequenceI[] getSequences(List<DBRefEntry> refs, boolean dna)
{
Vector<SequenceI> rseqs = new Vector<SequenceI>();
Hashtable<String, List<String>> queries = new Hashtable<String, List<String>>();
- for (int r = 0; r < refs.length; r++)
+ for (DBRefEntry ref : refs)
{
- if (!queries.containsKey(refs[r].getSource()))
+ if (!queries.containsKey(ref.getSource()))
{
- queries.put(refs[r].getSource(), new ArrayList<String>());
+ queries.put(ref.getSource(), new ArrayList<String>());
}
- List<String> qset = queries.get(refs[r].getSource());
- if (!qset.contains(refs[r].getAccessionId()))
+ List<String> qset = queries.get(ref.getSource());
+ if (!qset.contains(ref.getAccessionId()))
{
- qset.add(refs[r].getAccessionId());
+ qset.add(ref.getAccessionId());
}
}
Enumeration<String> e = queries.keys();
for (int is = 0; is < seqs.length; is++)
{
rseqs.addElement(seqs[is]);
- DBRefEntry[] frefs = DBRefUtils.searchRefs(seqs[is]
+ List<DBRefEntry> frefs = DBRefUtils.searchRefs(seqs[is]
.getDBRefs(), new DBRefEntry(db, null, null));
- if (frefs != null)
+ for (DBRefEntry dbr : frefs)
{
- for (DBRefEntry dbr : frefs)
- {
- queriesFound.add(dbr.getAccessionId());
- queriesMade.remove(dbr.getAccessionId());
- }
+ queriesFound.add(dbr.getAccessionId());
+ queriesMade.remove(dbr.getAccessionId());
}
seqs[is] = null;
}
package jalview.analysis;
import static org.testng.AssertJUnit.assertEquals;
+import static org.testng.AssertJUnit.assertFalse;
+import static org.testng.AssertJUnit.assertNotSame;
+import static org.testng.AssertJUnit.assertNull;
import static org.testng.AssertJUnit.assertSame;
+import static org.testng.AssertJUnit.assertTrue;
+import static org.testng.internal.junit.ArrayAsserts.assertArrayEquals;
+import jalview.datamodel.Alignment;
+import jalview.datamodel.AlignmentI;
import jalview.datamodel.DBRefEntry;
+import jalview.datamodel.Mapping;
+import jalview.datamodel.Sequence;
+import jalview.datamodel.SequenceFeature;
+import jalview.datamodel.SequenceI;
+import jalview.util.DBRefUtils;
+import jalview.util.MapList;
+import jalview.ws.SequenceFetcher;
+import jalview.ws.SequenceFetcherFactory;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.testng.annotations.AfterClass;
import org.testng.annotations.Test;
public class CrossRefTest
DBRefEntry ref6 = new DBRefEntry("emblCDS", "1", "A123");
DBRefEntry ref7 = new DBRefEntry("GeneDB", "1", "A123");
DBRefEntry ref8 = new DBRefEntry("PFAM", "1", "A123");
+ // ENSEMBL is a source of either dna or protein sequence data
+ DBRefEntry ref9 = new DBRefEntry("ENSEMBL", "1", "A123");
DBRefEntry[] refs = new DBRefEntry[] { ref1, ref2, ref3, ref4, ref5,
- ref6, ref7, ref8 };
+ ref6, ref7, ref8, ref9 };
/*
* Just the DNA refs:
*/
- DBRefEntry[] found = CrossRef.findXDbRefs(false, refs);
- assertEquals(3, found.length);
+ DBRefEntry[] found = DBRefUtils.selectDbRefs(true, refs);
+ assertEquals(4, found.length);
assertSame(ref5, found[0]);
assertSame(ref6, found[1]);
assertSame(ref7, found[2]);
+ assertSame(ref9, found[3]);
/*
* Just the protein refs:
*/
- found = CrossRef.findXDbRefs(true, refs);
- assertEquals(4, found.length);
+ found = DBRefUtils.selectDbRefs(false, refs);
+ assertEquals(5, found.length);
assertSame(ref1, found[0]);
assertSame(ref2, found[1]);
assertSame(ref3, found[2]);
assertSame(ref4, found[3]);
+ assertSame(ref9, found[4]);
+ }
+
+ /**
+ * Test the method that finds a sequence's "product" xref source databases,
+ * which may be direct (dbrefs on the sequence), or indirect (dbrefs on
+ * sequences which share a dbref with the sequence
+ */
+ @Test(groups = { "Functional" })
+ public void testFindXrefSourcesForSequence_proteinToDna()
+ {
+ SequenceI seq = new Sequence("Seq1", "MGKYQARLSS");
+ List<String> sources = new ArrayList<String>();
+ AlignmentI al = new Alignment(new SequenceI[] {});
+
+ /*
+ * first with no dbrefs to search
+ */
+ CrossRef.findXrefSourcesForSequence(seq, false, al, sources);
+ assertTrue(sources.isEmpty());
+
+ /*
+ * add some dbrefs to sequence
+ */
+ // protein db is not a candidate for findXrefSources
+ seq.addDBRef(new DBRefEntry("UNIPROT", "0", "A1234"));
+ // dna coding databatases are
+ seq.addDBRef(new DBRefEntry("EMBL", "0", "E2345"));
+ // a second EMBL xref should not result in a duplicate
+ seq.addDBRef(new DBRefEntry("EMBL", "0", "E2346"));
+ seq.addDBRef(new DBRefEntry("EMBLCDS", "0", "E2347"));
+ seq.addDBRef(new DBRefEntry("GENEDB", "0", "E2348"));
+ seq.addDBRef(new DBRefEntry("ENSEMBL", "0", "E2349"));
+ seq.addDBRef(new DBRefEntry("ENSEMBLGENOMES", "0", "E2350"));
+ CrossRef.findXrefSourcesForSequence(seq, false, al, sources);
+ assertEquals(4, sources.size());
+ assertEquals("[EMBL, EMBLCDS, GENEDB, ENSEMBL]",
+ sources.toString());
+
+ /*
+ * add a sequence to the alignment which has a dbref to UNIPROT|A1234
+ * and others to dna coding databases
+ */
+ sources.clear();
+ seq.setDBRefs(null);
+ seq.addDBRef(new DBRefEntry("UNIPROT", "0", "A1234"));
+ seq.addDBRef(new DBRefEntry("EMBLCDS", "0", "E2347"));
+ SequenceI seq2 = new Sequence("Seq2", "MGKYQARLSS");
+ seq2.addDBRef(new DBRefEntry("UNIPROT", "0", "A1234"));
+ seq2.addDBRef(new DBRefEntry("EMBL", "0", "E2345"));
+ seq2.addDBRef(new DBRefEntry("GENEDB", "0", "E2348"));
+ // TODO include ENSEMBLGENOMES in DBRefSource.DNACODINGDBS ?
+ al.addSequence(seq2);
+ CrossRef.findXrefSourcesForSequence(seq, false, al, sources);
+ assertEquals(3, sources.size());
+ assertEquals("[EMBLCDS, EMBL, GENEDB]", sources.toString());
+ }
+
+ /**
+ * Test for finding 'product' sequences for the case where only an indirect
+ * xref is found - not on the nucleotide sequence but on a peptide sequence in
+ * the alignment which which it shares a nucleotide dbref
+ */
+ @Test(groups = { "Functional" })
+ public void testFindXrefSequences_indirectDbrefToProtein()
+ {
+ /*
+ * Alignment setup:
+ * - nucleotide dbref EMBL|AF039662
+ * - peptide dbrefs EMBL|AF039662, UNIPROT|Q9ZTS2
+ */
+ SequenceI emblSeq = new Sequence("AF039662", "GGGGCAGCACAAGAAC");
+ emblSeq.addDBRef(new DBRefEntry("EMBL", "0", "AF039662"));
+ SequenceI uniprotSeq = new Sequence("Q9ZTS2", "MASVSATMISTS");
+ uniprotSeq.addDBRef(new DBRefEntry("EMBL", "0", "AF039662"));
+ uniprotSeq.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2"));
+
+ /*
+ * Find UNIPROT xrefs for nucleotide
+ * - it has no UNIPROT dbref of its own
+ * - but peptide with matching nucleotide dbref does, so is returned
+ */
+ AlignmentI al = new Alignment(new SequenceI[] { emblSeq, uniprotSeq });
+ Alignment xrefs = CrossRef.findXrefSequences(
+ new SequenceI[] { emblSeq }, true, "UNIPROT", al);
+ assertEquals(1, xrefs.getHeight());
+ assertSame(uniprotSeq, xrefs.getSequenceAt(0));
+ }
+
+ /**
+ * Test for finding 'product' sequences for the case where only an indirect
+ * xref is found - not on the peptide sequence but on a nucleotide sequence in
+ * the alignment which which it shares a protein dbref
+ */
+ @Test(groups = { "Functional" })
+ public void testFindXrefSequences_indirectDbrefToNucleotide()
+ {
+ /*
+ * Alignment setup:
+ * - peptide dbref UNIPROT|Q9ZTS2
+ * - nucleotide dbref EMBL|AF039662, UNIPROT|Q9ZTS2
+ */
+ SequenceI uniprotSeq = new Sequence("Q9ZTS2", "MASVSATMISTS");
+ uniprotSeq.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2"));
+ SequenceI emblSeq = new Sequence("AF039662", "GGGGCAGCACAAGAAC");
+ emblSeq.addDBRef(new DBRefEntry("EMBL", "0", "AF039662"));
+ emblSeq.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2"));
+
+ /*
+ * find EMBL xrefs for peptide sequence - it has no direct
+ * dbrefs, but the 'corresponding' nucleotide sequence does, so is returned
+ */
+ /*
+ * Find EMBL xrefs for peptide
+ * - it has no EMBL dbref of its own
+ * - but nucleotide with matching peptide dbref does, so is returned
+ */
+ AlignmentI al = new Alignment(new SequenceI[] { emblSeq, uniprotSeq });
+ Alignment xrefs = CrossRef.findXrefSequences(
+ new SequenceI[] { uniprotSeq }, false, "EMBL", al);
+ assertEquals(1, xrefs.getHeight());
+ assertSame(emblSeq, xrefs.getSequenceAt(0));
+ }
+
+ /**
+ * Test for finding 'product' sequences for the case where the selected
+ * sequence has no dbref to the desired source, and there are no indirect
+ * references via another sequence in the alignment
+ */
+ @Test(groups = { "Functional" })
+ public void testFindXrefSequences_noDbrefs()
+ {
+ /*
+ * two nucleotide sequences, one with UNIPROT dbref
+ */
+ SequenceI dna1 = new Sequence("AF039662", "GGGGCAGCACAAGAAC");
+ dna1.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2"));
+ SequenceI dna2 = new Sequence("AJ307031", "AAACCCTTT");
+
+ /*
+ * find UNIPROT xrefs for peptide sequence - it has no direct
+ * dbrefs, and the other sequence (which has a UNIPROT dbref) is not
+ * equatable to it, so no results found
+ */
+ AlignmentI al = new Alignment(new SequenceI[] { dna1, dna2 });
+ Alignment xrefs = CrossRef.findXrefSequences(new SequenceI[] { dna2 },
+ true, "UNIPROT", al);
+ assertNull(xrefs);
+ }
+
+ /**
+ * Tests for the method that searches an alignment (with one sequence
+ * excluded) for protein/nucleotide sequences with a given cross-reference
+ */
+ @Test(groups = { "Functional" })
+ public void testSearchDataset()
+ {
+ /*
+ * nucleotide sequence with UNIPROT AND EMBL dbref
+ * peptide sequence with UNIPROT dbref
+ */
+ SequenceI dna1 = new Sequence("AF039662", "GGGGCAGCACAAGAAC");
+ dna1.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2"));
+ dna1.addDBRef(new DBRefEntry("EMBL", "0", "AF039662"));
+ SequenceI pep1 = new Sequence("Q9ZTS2", "MLAVSRGQ");
+ pep1.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2"));
+ AlignmentI al = new Alignment(new SequenceI[] { dna1, pep1 });
+
+ List<SequenceI> result = new ArrayList<SequenceI>();
+
+ /*
+ * first search for a dbref nowhere on the alignment:
+ */
+ DBRefEntry dbref = new DBRefEntry("UNIPROT", "0", "P30419");
+ boolean found = CrossRef.searchDataset(dna1, dbref, al, result, null,
+ true, true);
+ assertFalse(found);
+ assertTrue(result.isEmpty());
+
+ // TODO we are setting direct=true here but it is set to
+ // false in Jalview code...
+
+ /*
+ * search for a protein sequence with dbref UNIPROT:Q9ZTS2
+ */
+ dbref = new DBRefEntry("UNIPROT", "0", "Q9ZTS2");
+ found = CrossRef.searchDataset(dna1, dbref, al, result, null, true,
+ true);
+ assertTrue(found);
+ assertEquals(1, result.size());
+ assertSame(pep1, result.get(0));
+
+ /*
+ * search for a nucleotide sequence with dbref UNIPROT:Q9ZTS2
+ */
+ result.clear();
+ dbref = new DBRefEntry("UNIPROT", "0", "Q9ZTS2");
+ found = CrossRef.searchDataset(pep1, dbref, al, result, null, true,
+ false);
+ assertTrue(found);
+ assertEquals(1, result.size());
+ assertSame(dna1, result.get(0));
+ }
+
+ /**
+ * Test for finding 'product' sequences for the case where the selected
+ * sequence has a dbref with a mapping to a sequence
+ */
+ @Test(groups = { "Functional" })
+ public void testFindXrefSequences_fromDbRefMap()
+ {
+ /*
+ * two peptide sequences each with a DBRef and SequenceFeature
+ */
+ SequenceI pep1 = new Sequence("Q9ZTS2", "MALFQRSV");
+ pep1.addDBRef(new DBRefEntry("Pfam", "0", "PF00111"));
+ pep1.addSequenceFeature(new SequenceFeature("type", "desc", 12, 14, 1f,
+ "group"));
+ SequenceI pep2 = new Sequence("P30419", "MTRRSQIF");
+ pep2.addDBRef(new DBRefEntry("PDB", "0", "3JTK"));
+ pep2.addSequenceFeature(new SequenceFeature("type2", "desc2", 13, 15,
+ 12f, "group2"));
+
+ /*
+ * nucleotide sequence (to go in the alignment)
+ */
+ SequenceI dna1 = new Sequence("AF039662", "GGGGCAGCACAAGAAC");
+
+ /*
+ * add DBRefEntry's to dna1 with mappings from dna to both peptides
+ */
+ MapList mapList = new MapList(new int[] { 1, 24 }, new int[] { 1, 3 },
+ 3, 1);
+ Mapping map = new Mapping(pep1, mapList);
+ DBRefEntry dbRef1 = new DBRefEntry("UNIPROT", "0", "Q9ZTS2", map);
+ dna1.addDBRef(dbRef1);
+ mapList = new MapList(new int[] { 1, 24 }, new int[] { 1, 3 }, 3, 1);
+ map = new Mapping(pep2, mapList);
+ DBRefEntry dbRef2 = new DBRefEntry("UNIPROT", "0", "P30419", map);
+ dna1.addDBRef(dbRef2);
+
+ /*
+ * find UNIPROT xrefs for nucleotide sequence - it should pick up
+ * mapped sequences
+ */
+ AlignmentI al = new Alignment(new SequenceI[] { dna1 });
+ Alignment xrefs = CrossRef.findXrefSequences(new SequenceI[] { dna1 },
+ true, "UNIPROT", al);
+ assertEquals(2, xrefs.getHeight());
+
+ /*
+ * cross-refs alignment holds copies of the mapped sequences
+ * including copies of their dbrefs and features
+ */
+ checkCopySequence(pep1, xrefs.getSequenceAt(0));
+ checkCopySequence(pep2, xrefs.getSequenceAt(1));
+ }
+
+ /**
+ * Helper method to assert seq1 looks like a copy of seq2
+ *
+ * @param seq1
+ * @param seq2
+ */
+ private void checkCopySequence(SequenceI seq1, SequenceI seq2)
+ {
+ assertNotSame(seq1, seq2);
+ assertEquals(seq1.getName(), seq2.getName());
+ assertEquals(seq1.getStart(), seq2.getStart());
+ assertEquals(seq1.getEnd(), seq2.getEnd());
+ assertEquals(seq1.getSequenceAsString(), seq2.getSequenceAsString());
+
+ /*
+ * compare dbrefs
+ */
+ assertArrayEquals(seq1.getDBRefs(), seq2.getDBRefs());
+ // check one to verify a copy, not the same object
+ if (seq1.getDBRefs().length > 0)
+ {
+ assertNotSame(seq1.getDBRefs()[0], seq2.getDBRefs()[0]);
+ }
+
+ /*
+ * compare features
+ */
+ assertArrayEquals(seq1.getSequenceFeatures(),
+ seq2.getSequenceFeatures());
+ if (seq1.getSequenceFeatures().length > 0)
+ {
+ assertNotSame(seq1.getSequenceFeatures()[0],
+ seq2.getSequenceFeatures()[0]);
+ }
+ }
+
+ /**
+ * Test for finding 'product' sequences for the case where the selected
+ * sequence has a dbref with no mapping, triggering a fetch from database
+ */
+ @Test(groups = { "Functional" })
+ public void testFindXrefSequences_withFetch()
+ {
+ SequenceI dna1 = new Sequence("AF039662", "GGGGCAGCACAAGAAC");
+ dna1.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2"));
+ dna1.addDBRef(new DBRefEntry("UNIPROT", "0", "P30419"));
+ dna1.addDBRef(new DBRefEntry("UNIPROT", "0", "P00314"));
+ final SequenceI pep1 = new Sequence("Q9ZTS2", "MYQLIRSSW");
+ final SequenceI pep2 = new Sequence("P00314", "MRKLLAASG");
+
+ SequenceFetcher mockFetcher = new SequenceFetcher()
+ {
+
+ @Override
+ public boolean isFetchable(String source)
+ {
+ return true;
+ }
+
+ @Override
+ public SequenceI[] getSequences(List<DBRefEntry> refs, boolean dna)
+ {
+ return new SequenceI[] { pep1, pep2 };
+ }
+ };
+ SequenceFetcherFactory.setSequenceFetcher(mockFetcher);
+
+ /*
+ * find UNIPROT xrefs for nucleotide sequence
+ */
+ AlignmentI al = new Alignment(new SequenceI[] { dna1 });
+ Alignment xrefs = CrossRef.findXrefSequences(new SequenceI[] { dna1 },
+ true, "UNIPROT", al);
+ assertEquals(2, xrefs.getHeight());
+ assertSame(pep1, xrefs.getSequenceAt(0));
+ assertSame(pep2, xrefs.getSequenceAt(1));
+ }
+
+ @AfterClass
+ public void tearDown()
+ {
+ SequenceFetcherFactory.setSequenceFetcher(null);
}
}
--- /dev/null
+package jalview.analysis;
+
+import static org.testng.AssertJUnit.assertEquals;
+import static org.testng.AssertJUnit.assertNotSame;
+import static org.testng.AssertJUnit.assertNull;
+import static org.testng.AssertJUnit.assertSame;
+import static org.testng.internal.junit.ArrayAsserts.assertArrayEquals;
+
+import jalview.datamodel.Alignment;
+import jalview.datamodel.AlignmentI;
+import jalview.datamodel.DBRefEntry;
+import jalview.datamodel.Mapping;
+import jalview.datamodel.Sequence;
+import jalview.datamodel.SequenceFeature;
+import jalview.datamodel.SequenceI;
+import jalview.util.MapList;
+import jalview.ws.SequenceFetcher;
+import jalview.ws.SequenceFetcherFactory;
+
+import java.util.List;
+
+import org.testng.annotations.Test;
+
+public class CrossRefsTest
+{
+
+ /**
+ * Test for finding 'product' sequences for the case where the selected
+ * sequence has a dbref with a mapping to a sequence
+ */
+ @Test(groups = { "Functional" })
+ public void testFindXrefSequences_fromDbRefMap()
+ {
+ /*
+ * two peptide sequences each with a DBRef and SequenceFeature
+ */
+ SequenceI pep1 = new Sequence("Q9ZTS2", "MALFQRSV");
+ pep1.addDBRef(new DBRefEntry("Pfam", "0", "PF00111"));
+ pep1.addSequenceFeature(new SequenceFeature("type", "desc", 12, 14, 1f,
+ "group"));
+ SequenceI pep2 = new Sequence("P30419", "MTRRSQIF");
+ pep2.addDBRef(new DBRefEntry("PDB", "0", "3JTK"));
+ pep2.addSequenceFeature(new SequenceFeature("type2", "desc2", 13, 15,
+ 12f, "group2"));
+
+ /*
+ * nucleotide sequence (to go in the alignment)
+ */
+ SequenceI dna1 = new Sequence("AF039662", "GGGGCAGCACAAGAAC");
+
+ /*
+ * add DBRefEntry's to dna1 with mappings from dna to both peptides
+ */
+ MapList mapList = new MapList(new int[] { 1, 24 }, new int[] { 1, 3 },
+ 3, 1);
+ Mapping map = new Mapping(pep1, mapList);
+ DBRefEntry dbRef1 = new DBRefEntry("UNIPROT", "0", "Q9ZTS2", map);
+ dna1.addDBRef(dbRef1);
+ mapList = new MapList(new int[] { 1, 24 }, new int[] { 1, 3 }, 3, 1);
+ map = new Mapping(pep2, mapList);
+ DBRefEntry dbRef2 = new DBRefEntry("UNIPROT", "0", "P30419", map);
+ dna1.addDBRef(dbRef2);
+
+ /*
+ * find UNIPROT xrefs for nucleotide sequence - it should pick up
+ * mapped sequences
+ */
+ AlignmentI al = new Alignment(new SequenceI[] { dna1 });
+ AlignmentI xrefs = CrossRefs.findXrefSequences(
+ new SequenceI[] { dna1 },
+ true, "UNIPROT", al);
+ assertEquals(2, xrefs.getHeight());
+
+ /*
+ * cross-refs alignment holds copies of the mapped sequences
+ * including copies of their dbrefs and features
+ */
+ checkCopySequence(pep1, xrefs.getSequenceAt(0));
+ checkCopySequence(pep2, xrefs.getSequenceAt(1));
+ }
+
+ /**
+ * Test for finding 'product' sequences for the case where only an indirect
+ * xref is found - not on the peptide sequence but on a nucleotide sequence in
+ * the alignment which which it shares a protein dbref
+ */
+ @Test(groups = { "Functional" })
+ public void testFindXrefSequences_indirectDbrefToNucleotide()
+ {
+ /*
+ * Alignment setup:
+ * - peptide dbref UNIPROT|Q9ZTS2
+ * - nucleotide dbref EMBL|AF039662, UNIPROT|Q9ZTS2
+ */
+ SequenceI uniprotSeq = new Sequence("Q9ZTS2", "MASVSATMISTS");
+ uniprotSeq.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2"));
+ SequenceI emblSeq = new Sequence("AF039662", "GGGGCAGCACAAGAAC");
+ emblSeq.addDBRef(new DBRefEntry("EMBL", "0", "AF039662"));
+ emblSeq.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2"));
+
+ /*
+ * Find EMBL xrefs for peptide
+ * - it has no EMBL dbref of its own
+ * - but nucleotide with matching peptide dbref does, so is returned
+ */
+ AlignmentI al = new Alignment(new SequenceI[] { emblSeq, uniprotSeq });
+ AlignmentI xrefs = CrossRefs.findXrefSequences(
+ new SequenceI[] { uniprotSeq }, false, "EMBL", al);
+ assertEquals(1, xrefs.getHeight());
+ assertSame(emblSeq, xrefs.getSequenceAt(0));
+ }
+
+ /**
+ * Test for finding 'product' sequences for the case where only an indirect
+ * xref is found - not on the nucleotide sequence but on a peptide sequence in
+ * the alignment which which it shares a nucleotide dbref
+ */
+ @Test(groups = { "Functional" })
+ public void testFindXrefSequences_indirectDbrefToProtein()
+ {
+ /*
+ * Alignment setup:
+ * - nucleotide dbref EMBL|AF039662
+ * - peptide dbrefs EMBL|AF039662, UNIPROT|Q9ZTS2
+ */
+ SequenceI emblSeq = new Sequence("AF039662", "GGGGCAGCACAAGAAC");
+ emblSeq.addDBRef(new DBRefEntry("EMBL", "0", "AF039662"));
+ SequenceI uniprotSeq = new Sequence("Q9ZTS2", "MASVSATMISTS");
+ uniprotSeq.addDBRef(new DBRefEntry("EMBL", "0", "AF039662"));
+ uniprotSeq.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2"));
+
+ /*
+ * Find UNIPROT xrefs for nucleotide
+ * - it has no UNIPROT dbref of its own
+ * - but peptide with matching nucleotide dbref does, so is returned
+ */
+ AlignmentI al = new Alignment(new SequenceI[] { emblSeq, uniprotSeq });
+ AlignmentI xrefs = CrossRefs.findXrefSequences(
+ new SequenceI[] { emblSeq }, true, "UNIPROT", al);
+ assertEquals(1, xrefs.getHeight());
+ assertSame(uniprotSeq, xrefs.getSequenceAt(0));
+ }
+
+ /**
+ * Test for finding 'product' sequences for the case where the selected
+ * sequence has no dbref to the desired source, and there are no indirect
+ * references via another sequence in the alignment
+ */
+ @Test(groups = { "Functional" })
+ public void testFindXrefSequences_noDbrefs()
+ {
+ /*
+ * two nucleotide sequences, one with UNIPROT dbref
+ */
+ SequenceI dna1 = new Sequence("AF039662", "GGGGCAGCACAAGAAC");
+ dna1.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2"));
+ SequenceI dna2 = new Sequence("AJ307031", "AAACCCTTT");
+
+ /*
+ * find UNIPROT xrefs for peptide sequence - it has no direct
+ * dbrefs, and the other sequence (which has a UNIPROT dbref) is not
+ * equatable to it, so no results found
+ */
+ AlignmentI al = new Alignment(new SequenceI[] { dna1, dna2 });
+ AlignmentI xrefs = CrossRefs.findXrefSequences(
+ new SequenceI[] { dna2 },
+ true, "UNIPROT", al);
+ assertNull(xrefs);
+ }
+
+ /**
+ * Test for finding 'product' sequences for the case where the selected
+ * sequence has a dbref with no mapping, triggering a fetch from database
+ */
+ @Test(groups = { "Functional" })
+ public void testFindXrefSequences_withFetch()
+ {
+ SequenceI dna1 = new Sequence("AF039662", "GGGGCAGCACAAGAAC");
+ dna1.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2"));
+ dna1.addDBRef(new DBRefEntry("UNIPROT", "0", "P30419"));
+ dna1.addDBRef(new DBRefEntry("UNIPROT", "0", "P00314"));
+ final SequenceI pep1 = new Sequence("Q9ZTS2", "MYQLIRSSW");
+ final SequenceI pep2 = new Sequence("P00314", "MRKLLAASG");
+
+ SequenceFetcher mockFetcher = new SequenceFetcher()
+ {
+
+ @Override
+ public boolean isFetchable(String source)
+ {
+ return true;
+ }
+
+ @Override
+ public SequenceI[] getSequences(List<DBRefEntry> refs, boolean dna)
+ {
+ return new SequenceI[] { pep1, pep2 };
+ }
+ };
+ SequenceFetcherFactory.setSequenceFetcher(mockFetcher);
+
+ /*
+ * find UNIPROT xrefs for nucleotide sequence
+ */
+ AlignmentI al = new Alignment(new SequenceI[] { dna1 });
+ AlignmentI xrefs = CrossRefs.findXrefSequences(
+ new SequenceI[] { dna1 },
+ true, "UNIPROT", al);
+ assertEquals(2, xrefs.getHeight());
+ assertSame(pep1, xrefs.getSequenceAt(0));
+ assertSame(pep2, xrefs.getSequenceAt(1));
+ }
+
+ /**
+ * Helper method to assert seq1 looks like a copy of seq2
+ *
+ * @param seq1
+ * @param seq2
+ */
+ private void checkCopySequence(SequenceI seq1, SequenceI seq2)
+ {
+ assertNotSame(seq1, seq2);
+ assertEquals(seq1.getName(), seq2.getName());
+ assertEquals(seq1.getStart(), seq2.getStart());
+ assertEquals(seq1.getEnd(), seq2.getEnd());
+ assertEquals(seq1.getSequenceAsString(), seq2.getSequenceAsString());
+
+ /*
+ * compare dbrefs
+ */
+ assertArrayEquals(seq1.getDBRefs(), seq2.getDBRefs());
+ // check one to verify a copy, not the same object
+ if (seq1.getDBRefs().length > 0)
+ {
+ assertNotSame(seq1.getDBRefs()[0], seq2.getDBRefs()[0]);
+ }
+
+ /*
+ * compare features
+ */
+ assertArrayEquals(seq1.getSequenceFeatures(),
+ seq2.getSequenceFeatures());
+ if (seq1.getSequenceFeatures().length > 0)
+ {
+ assertNotSame(seq1.getSequenceFeatures()[0],
+ seq2.getSequenceFeatures()[0]);
+ }
+ }
+
+ /**
+ * Test for finding 'product' sequences for the case where the selected
+ * sequence has two dbrefs with no mapping, triggering a fetch from database.
+ *
+ * @see http://issues.jalview.org/browse/JAL-2029
+ */
+ @Test(groups = { "Functional" })
+ public void testFindXrefSequences_withFetchMultipleRefs()
+ {
+ /*
+ * EMBL|X07547 has a
+ */
+ SequenceI dna1 = new Sequence("X07547", "GGGGCAGCACAAGAAC");
+ dna1.addDBRef(new DBRefEntry("UNIPROT", "0", "B0BCM4"));
+ dna1.addDBRef(new DBRefEntry("UNIPROT", "0", "P0CE20"));
+ final SequenceI pep1 = new Sequence("B0BCM4", "MGKGIL");
+ final SequenceI pep2 = new Sequence("P0CE20", "MGKGIL");
+
+ SequenceFetcher mockFetcher = new SequenceFetcher()
+ {
+ int call = 0;
+
+ @Override
+ public boolean isFetchable(String source)
+ {
+ return true;
+ }
+ @Override
+ public SequenceI[] getSequences(List<DBRefEntry> refs, boolean dna)
+ {
+ // pending Mockito with its thenReturn(pep1).thenReturn(pep2) syntax!
+ return new SequenceI[] { call++ == 0 ? pep1 : pep2 };
+ }
+ };
+ SequenceFetcherFactory.setSequenceFetcher(mockFetcher);
+
+ /*
+ * find UNIPROT xrefs for nucleotide sequence
+ */
+ AlignmentI al = new Alignment(new SequenceI[] { dna1 });
+ AlignmentI xrefs = CrossRefs.findXrefSequences(
+ new SequenceI[] { dna1 },
+ true, "UNIPROT", al);
+ assertEquals(2, xrefs.getHeight());
+ assertSame(pep1, xrefs.getSequenceAt(0));
+ assertSame(pep2, xrefs.getSequenceAt(1));
+ }
+
+}
import jalview.datamodel.Sequence;
import jalview.datamodel.SequenceI;
+import java.util.List;
+
import org.testng.annotations.Test;
public class DBRefUtilsTest
ref5.setMap(new Mapping(new MapList(new int[] { 1, 1 }, new int[] { 1,
1 }, 1, 1)));
- DBRefEntry[] matches = DBRefUtils.searchRefs(new DBRefEntry[] { ref1,
+ List<DBRefEntry> matches = DBRefUtils.searchRefs(new DBRefEntry[] {
+ ref1,
ref2, ref3, ref4, ref5 }, target);
- assertEquals(3, matches.length);
- assertSame(ref1, matches[0]);
- assertSame(ref2, matches[1]);
- assertSame(ref5, matches[2]);
+ assertEquals(3, matches.size());
+ assertSame(ref1, matches.get(0));
+ assertSame(ref2, matches.get(1));
+ assertSame(ref5, matches.get(2));
}
/**
new int[] { 1, 1 }, 2, 2));
ref3.setMap(map3);
- DBRefEntry[] matches = DBRefUtils.searchRefs(new DBRefEntry[] { ref1,
+ List<DBRefEntry> matches = DBRefUtils.searchRefs(new DBRefEntry[] {
+ ref1,
ref2, ref3 }, target);
- assertEquals(2, matches.length);
- assertSame(ref1, matches[0]);
- assertSame(ref2, matches[1]);
+ assertEquals(2, matches.size());
+ assertSame(ref1, matches.get(0));
+ assertSame(ref2, matches.get(1));
}
/**
ref5.setMap(new Mapping(new MapList(new int[] { 1, 1 }, new int[] { 1,
1 }, 1, 1)));
- DBRefEntry[] matches = DBRefUtils.searchRefs(new DBRefEntry[] { ref1,
- ref2, ref3, ref4, ref5 }, "A1234");
- assertEquals(3, matches.length);
- assertSame(ref1, matches[0]);
- assertSame(ref2, matches[1]);
- assertSame(ref5, matches[2]);
+ DBRefEntry[] dbrefs = new DBRefEntry[] { ref1,
+ ref2, ref3, ref4, ref5 };
+ List<DBRefEntry> matches = DBRefUtils.searchRefs(dbrefs, "A1234");
+ assertEquals(3, matches.size());
+ assertSame(ref1, matches.get(0));
+ assertSame(ref2, matches.get(1));
+ assertSame(ref5, matches.get(2));
+ }
+
+ /**
+ * Test the method that searches for matches references - case when we are
+ * matching a reference with null (any) accession id
+ */
+ @Test(groups = { "Functional" })
+ public void testSearchRefs_wildcardAccessionid()
+ {
+ DBRefEntry target = new DBRefEntry("EMBL", "2", null);
+
+ DBRefEntry ref1 = new DBRefEntry("EMBL", "1", "A1234"); // matches
+ // constructor changes embl to EMBL
+ DBRefEntry ref2 = new DBRefEntry("embl", "1", "A1235"); // matches
+ // constructor does not upper-case accession id
+ DBRefEntry ref3 = new DBRefEntry("EMBL", "1", "A1236"); // matches
+ DBRefEntry ref4 = new DBRefEntry("EMBLCDS", "1", "A1234"); // no match
+ // ref5 matches although it has a mapping - ignored
+ DBRefEntry ref5 = new DBRefEntry("EMBL", "1", "A1237");
+ ref5.setMap(new Mapping(new MapList(new int[] { 1, 1 }, new int[] { 1,
+ 1 }, 1, 1)));
+
+ List<DBRefEntry> matches = DBRefUtils.searchRefs(new DBRefEntry[] {
+ ref1,
+ ref2, ref3, ref4, ref5 }, target);
+ assertEquals(4, matches.size());
+ assertSame(ref1, matches.get(0));
+ assertSame(ref2, matches.get(1));
+ assertSame(ref3, matches.get(2));
+ assertSame(ref5, matches.get(3));
}
}
package jalview.ws;
+import jalview.analysis.CrossRef;
import jalview.datamodel.Alignment;
import jalview.datamodel.AlignmentI;
import jalview.datamodel.DBRefSource;
{
boolean dna = sp.isDnaCoding();
// try and find products
- String types[] = jalview.analysis.CrossRef
- .findSequenceXrefTypes(dna, al.getSequencesArray());
+ List<String> types = CrossRef.findXrefSourcesForSequences(dna,
+ al.getSequencesArray(), null);
if (types != null)
{
System.out.println("Xref Types for: "
+ (dna ? "dna" : "prot"));
- for (int t = 0; t < types.length; t++)
+ for (String source : types)
{
- System.out.println("Type: " + types[t]);
+ System.out.println("Type: " + source);
SequenceI[] prod = jalview.analysis.CrossRef
.findXrefSequences(al.getSequencesArray(), dna,
- types[t], null)
+ source, null)
.getSequencesArray();
System.out.println("Found "
+ ((prod == null) ? "no" : "" + prod.length)