import jalview.datamodel.AlignedCodon;
import jalview.datamodel.AlignedCodonFrame;
+import jalview.datamodel.AlignedCodonFrame.SequenceToSequenceMapping;
import jalview.datamodel.Alignment;
import jalview.datamodel.AlignmentAnnotation;
import jalview.datamodel.AlignmentI;
*
* @param dna
* aligned dna sequences
- * @param mappings
- * from dna to protein
- * @param al
+ * @param dataset
+ * - throws error if not given a dataset
* @return an alignment whose sequences are the cds-only parts of the dna
* sequences (or null if no mappings are found)
*/
public static AlignmentI makeCdsAlignment(SequenceI[] dna,
- List<AlignedCodonFrame> mappings, AlignmentI al)
+ AlignmentI dataset)
{
+ if (dataset.getDataset() != null)
+ {
+ throw new Error(
+ "IMPLEMENTATION ERROR: dataset.getDataset() must be null!");
+ }
List<SequenceI> cdsSeqs = new ArrayList<SequenceI>();
+ List<AlignedCodonFrame> mappings = dataset.getCodonFrames();
+
+ /*
+ * construct CDS sequences from the (cds-to-protein) mappings made earlier;
+ * this makes it possible to model multiple products from dna (e.g. EMBL);
+ * however it does mean we don't have the EMBL protein_id (a property on
+ * the CDS features) in order to make the CDS sequence name :-(
+ */
for (SequenceI seq : dna)
{
- AlignedCodonFrame cdsMappings = new AlignedCodonFrame();
+ SequenceI seqDss = seq.getDatasetSequence() == null ? seq : seq
+ .getDatasetSequence();
List<AlignedCodonFrame> seqMappings = MappingUtils
.findMappingsForSequence(seq, mappings);
- List<AlignedCodonFrame> alignmentMappings = al.getCodonFrames();
for (AlignedCodonFrame mapping : seqMappings)
{
- for (Mapping aMapping : mapping.getMappingsFromSequence(seq))
+ List<Mapping> mappingsFromSequence = mapping.getMappingsFromSequence(seq);
+
+ for (Mapping aMapping : mappingsFromSequence)
{
- SequenceI cdsSeq = makeCdsSequence(seq.getDatasetSequence(),
- aMapping);
+ if (aMapping.getMap().getFromRatio() == 1)
+ {
+ /*
+ * not a dna-to-protein mapping (likely dna-to-cds)
+ */
+ continue;
+ }
+
+ /*
+ * check for an existing CDS sequence i.e. a 3:1 mapping to
+ * the dna mapping's product
+ */
+ SequenceI cdsSeq = null;
+ // TODO better mappings collection data model so we can do
+ // a table lookup instead of double loops to find mappings
+ SequenceI proteinProduct = aMapping.getTo();
+ for (AlignedCodonFrame acf : MappingUtils
+ .findMappingsForSequence(proteinProduct, mappings))
+ {
+ for (SequenceToSequenceMapping map : acf.getMappings())
+ {
+ if (map.getMapping().getMap().getFromRatio() == 3
+ && proteinProduct == map.getMapping().getTo()
+ && seqDss != map.getFromSeq())
+ {
+ /*
+ * found a 3:1 mapping to the protein product which is not
+ * from the dna sequence...assume it is from the CDS sequence
+ * TODO mappings data model that brings together related
+ * dna-cds-protein mappings in one object
+ */
+ cdsSeq = map.getFromSeq();
+ }
+ }
+ }
+ if (cdsSeq != null)
+ {
+ /*
+ * mappings are always to dataset sequences so create an aligned
+ * sequence to own it; add the dataset sequence to the dataset
+ */
+ SequenceI derivedSequence = cdsSeq.deriveSequence();
+ cdsSeqs.add(derivedSequence);
+ if (!dataset.getSequences().contains(cdsSeq))
+ {
+ dataset.addSequence(cdsSeq);
+ }
+ continue;
+ }
+
+ /*
+ * didn't find mapped CDS sequence - construct it and add
+ * its dataset sequence to the dataset
+ */
+ cdsSeq = makeCdsSequence(seq.getDatasetSequence(), aMapping);
+ SequenceI cdsSeqDss = cdsSeq.createDatasetSequence();
cdsSeqs.add(cdsSeq);
-
+ if (!dataset.getSequences().contains(cdsSeqDss))
+ {
+ dataset.addSequence(cdsSeqDss);
+ }
+
/*
* add a mapping from CDS to the (unchanged) mapped to range
*/
MapList map = new MapList(cdsRange, aMapping.getMap()
.getToRanges(), aMapping.getMap().getFromRatio(),
aMapping.getMap().getToRatio());
- cdsMappings.addMap(cdsSeq, aMapping.getTo(), map);
+ AlignedCodonFrame cdsToProteinMapping = new AlignedCodonFrame();
+ cdsToProteinMapping.addMap(cdsSeq, proteinProduct, map);
+
+ /*
+ * guard against duplicating the mapping if repeating this action
+ */
+ if (!mappings.contains(cdsToProteinMapping))
+ {
+ mappings.add(cdsToProteinMapping);
+ }
/*
* add another mapping from original 'from' range to CDS
*/
+ AlignedCodonFrame dnaToProteinMapping = new AlignedCodonFrame();
map = new MapList(aMapping.getMap().getFromRanges(), cdsRange, 1,
1);
- cdsMappings.addMap(seq.getDatasetSequence(), cdsSeq, map);
+ dnaToProteinMapping.addMap(seq.getDatasetSequence(), cdsSeq, map);
+ if (!mappings.contains(dnaToProteinMapping))
+ {
+ mappings.add(dnaToProteinMapping);
+ }
- alignmentMappings.add(cdsMappings);
/*
* transfer any features on dna that overlap the CDS
}
}
- /*
- * add CDS seqs to shared dataset
- */
- Alignment dataset = al.getDataset();
- for (SequenceI seq : cdsSeqs)
- {
- if (!dataset.getSequences().contains(seq.getDatasetSequence()))
- {
- dataset.addSequence(seq.getDatasetSequence());
- }
- }
AlignmentI cds = new Alignment(cdsSeqs.toArray(new SequenceI[cdsSeqs
.size()]));
- cds.setDataset(dataset);
+ cds.setDataset((Alignment) dataset);
return cds;
}
*
* @param seq
* @param mapping
- * @return
+ * @return CDS sequence (as a dataset sequence)
*/
static SequenceI makeCdsSequence(SequenceI seq, Mapping mapping)
{
SequenceI newSeq = new Sequence(seq.getName() + "|"
+ mapping.getTo().getName(), newSeqChars, 1, newPos);
- newSeq.createDatasetSequence();
return newSeq;
}
* sort to get sequence features in start position order
* - would be better to store in Sequence as a TreeSet or NCList?
*/
- Arrays.sort(peptide.getSequenceFeatures(),
- new Comparator<SequenceFeature>()
- {
- @Override
- public int compare(SequenceFeature o1, SequenceFeature o2)
+ if (peptide.getSequenceFeatures() != null)
+ {
+ Arrays.sort(peptide.getSequenceFeatures(),
+ new Comparator<SequenceFeature>()
{
- int c = Integer.compare(o1.getBegin(), o2.getBegin());
- return c == 0 ? Integer.compare(o1.getEnd(), o2.getEnd())
- : c;
- }
- });
+ @Override
+ public int compare(SequenceFeature o1, SequenceFeature o2)
+ {
+ int c = Integer.compare(o1.getBegin(), o2.getBegin());
+ return c == 0 ? Integer.compare(o1.getEnd(), o2.getEnd())
+ : c;
+ }
+ });
+ }
return count;
}
import jalview.datamodel.Alignment;
import jalview.datamodel.AlignmentI;
import jalview.datamodel.DBRefEntry;
-import jalview.datamodel.DBRefSource;
import jalview.datamodel.Mapping;
import jalview.datamodel.Sequence;
import jalview.datamodel.SequenceFeature;
import jalview.datamodel.SequenceI;
import jalview.util.DBRefUtils;
import jalview.util.MapList;
-import jalview.ws.SequenceFetcher;
+import jalview.ws.SequenceFetcherFactory;
import jalview.ws.seqfetcher.ASequenceFetcher;
import java.util.ArrayList;
+import java.util.Iterator;
import java.util.List;
-import java.util.Vector;
/**
- * Functions for cross-referencing sequence databases. user must first specify
- * if cross-referencing from protein or dna (set dna==true)
+ * Functions for cross-referencing sequence databases.
*
* @author JimP
*
public class CrossRef
{
/*
- * A sub-class that ignores Parent attribute when comparing sequence
- * features. This avoids 'duplicate' CDS features that only
- * differ in their parent Transcript ids.
+ * the dataset of the alignment for which we are searching for
+ * cross-references; in some cases we may resolve xrefs by
+ * searching in the dataset
*/
- class MySequenceFeature extends SequenceFeature
- {
- private SequenceFeature feat;
+ private AlignmentI dataset;
- MySequenceFeature(SequenceFeature sf)
- {
- this.feat = sf;
- }
+ /*
+ * the sequences for which we are seeking cross-references
+ */
+ private SequenceI[] fromSeqs;
- @Override
- public boolean equals(Object o)
- {
- return feat.equals(o, true);
- }
- }
+ /**
+ * matcher built from dataset
+ */
+ SequenceIdMatcher matcher;
/**
- * Select just the DNA or protein references for a protein or dna sequence
- *
- * @param fromDna
- * if true, select references from DNA (i.e. Protein databases), else
- * DNA database references
- * @param refs
- * a set of references to select from
- * @return
+ * sequences found by cross-ref searches to fromSeqs
*/
- public static DBRefEntry[] findXDbRefs(boolean fromDna, DBRefEntry[] refs)
- {
- return DBRefUtils.selectRefs(refs, fromDna ? DBRefSource.PROTEINDBS
- : DBRefSource.DNACODINGDBS);
- // could attempt to find other cross
- // refs here - ie PDB xrefs
- // (not dna, not protein seq)
- }
+ List<SequenceI> rseqs;
/**
- * @param dna
- * true if seqs are DNA seqs
+ * mappings constructed
+ */
+ AlignedCodonFrame cf;
+
+ /**
+ * Constructor
+ *
* @param seqs
- * @return a list of sequence database cross reference source types
+ * the sequences for which we are seeking cross-references
+ * @param ds
+ * the containing alignment dataset (may be searched to resolve
+ * cross-references)
*/
- public static String[] findSequenceXrefTypes(boolean dna, SequenceI[] seqs)
+ public CrossRef(SequenceI[] seqs, AlignmentI ds)
{
- return findSequenceXrefTypes(dna, seqs, null);
+ fromSeqs = seqs;
+ dataset = ds.getDataset() == null ? ds : ds.getDataset();
}
/**
- * Indirect references are references from other sequences from the dataset to
- * any of the direct DBRefEntrys on the given sequences.
+ * Returns a list of distinct database sources for which sequences have either
+ * <ul>
+ * <li>a (dna-to-protein or protein-to-dna) cross-reference</li>
+ * <li>an indirect cross-reference - a (dna-to-protein or protein-to-dna)
+ * reference from another sequence in the dataset which has a cross-reference
+ * to a direct DBRefEntry on the given sequence</li>
+ * </ul>
*
* @param dna
- * true if seqs are DNA seqs
- * @param seqs
- * @return a list of sequence database cross reference source types
+ * - when true, cross-references *from* dna returned. When false,
+ * cross-references *from* protein are returned
+ * @return
*/
- public static String[] findSequenceXrefTypes(boolean dna,
- SequenceI[] seqs, AlignmentI dataset)
+ public List<String> findXrefSourcesForSequences(boolean dna)
{
- String[] dbrefs = null;
- List<String> refs = new ArrayList<String>();
- for (SequenceI seq : seqs)
+ List<String> sources = new ArrayList<String>();
+ for (SequenceI seq : fromSeqs)
{
if (seq != null)
{
- SequenceI dss = seq;
- while (dss.getDatasetSequence() != null)
- {
- dss = dss.getDatasetSequence();
- }
- DBRefEntry[] rfs = findXDbRefs(dna, dss.getDBRefs());
- if (rfs != null)
- {
- for (DBRefEntry ref : rfs)
- {
- if (!refs.contains(ref.getSource()))
- {
- refs.add(ref.getSource());
- }
- }
- }
- if (dataset != null)
- {
- // search for references to this sequence's direct references.
- DBRefEntry[] lrfs = CrossRef.findXDbRefs(!dna, seq.getDBRefs());
- List<SequenceI> rseqs = new ArrayList<SequenceI>();
- CrossRef.searchDatasetXrefs(seq, !dna, lrfs, dataset, rseqs,
- null); // don't need to specify codon frame for mapping here
- for (SequenceI rs : rseqs)
- {
- DBRefEntry[] xrs = findXDbRefs(dna, rs.getDBRefs());
- if (xrs != null)
- {
- for (DBRefEntry ref : xrs)
- {
- if (!refs.contains(ref.getSource()))
- {
- refs.add(ref.getSource());
- }
- }
- }
- // looks like copy and paste - change rfs to xrs?
- // for (int r = 0; rfs != null && r < rfs.length; r++)
- // {
- // if (!refs.contains(rfs[r].getSource()))
- // {
- // refs.add(rfs[r].getSource());
- // }
- // }
- }
- }
+ findXrefSourcesForSequence(seq, dna, sources);
}
}
- if (refs.size() > 0)
- {
- dbrefs = new String[refs.size()];
- refs.toArray(dbrefs);
- }
- return dbrefs;
+ return sources;
}
- public static boolean hasCdnaMap(SequenceI[] seqs)
+ /**
+ * Returns a list of distinct database sources for which a sequence has either
+ * <ul>
+ * <li>a (dna-to-protein or protein-to-dna) cross-reference</li>
+ * <li>an indirect cross-reference - a (dna-to-protein or protein-to-dna)
+ * reference from another sequence in the dataset which has a cross-reference
+ * to a direct DBRefEntry on the given sequence</li>
+ * </ul>
+ *
+ * @param seq
+ * the sequence whose dbrefs we are searching against
+ * @param fromDna
+ * when true, context is DNA - so sources identifying protein
+ * products will be returned.
+ * @param sources
+ * a list of sources to add matches to
+ */
+ void findXrefSourcesForSequence(SequenceI seq, boolean fromDna,
+ List<String> sources)
{
- // TODO unused - remove?
- String[] reftypes = findSequenceXrefTypes(false, seqs);
- for (int s = 0; s < reftypes.length; s++)
+ /*
+ * first find seq's xrefs (dna-to-peptide or peptide-to-dna)
+ */
+ DBRefEntry[] rfs = DBRefUtils.selectDbRefs(!fromDna, seq.getDBRefs());
+ addXrefsToSources(rfs, sources);
+ if (dataset != null)
{
- if (reftypes.equals(DBRefSource.EMBLCDS))
+ /*
+ * find sequence's direct (dna-to-dna, peptide-to-peptide) xrefs
+ */
+ DBRefEntry[] lrfs = DBRefUtils.selectDbRefs(fromDna, seq.getDBRefs());
+ List<SequenceI> rseqs = new ArrayList<SequenceI>();
+
+ /*
+ * find sequences in the alignment which xref one of these DBRefs
+ * i.e. is xref-ed to a common sequence identifier
+ */
+ searchDatasetXrefs(fromDna, seq, lrfs, rseqs, null);
+
+ /*
+ * add those sequences' (dna-to-peptide or peptide-to-dna) dbref sources
+ */
+ for (SequenceI rs : rseqs)
{
- return true;
- // no map
+ DBRefEntry[] xrs = DBRefUtils
+ .selectDbRefs(!fromDna, rs.getDBRefs());
+ addXrefsToSources(xrs, sources);
}
}
- return false;
}
- public static SequenceI[] getCdnaMap(SequenceI[] seqs)
+ /**
+ * Helper method that adds the source identifiers of some cross-references to
+ * a (non-redundant) list of database sources
+ *
+ * @param xrefs
+ * @param sources
+ */
+ void addXrefsToSources(DBRefEntry[] xrefs, List<String> sources)
{
- // TODO unused - remove?
- Vector cseqs = new Vector();
- for (int s = 0; s < seqs.length; s++)
+ if (xrefs != null)
{
- DBRefEntry[] cdna = findXDbRefs(true, seqs[s].getDBRefs());
- for (int c = 0; c < cdna.length; c++)
+ for (DBRefEntry ref : xrefs)
{
- if (cdna[c].getSource().equals(DBRefSource.EMBLCDS))
+ /*
+ * avoid duplication e.g. ENSEMBL and Ensembl
+ */
+ String source = DBRefUtils.getCanonicalName(ref.getSource());
+ if (!sources.contains(source))
{
- System.err
- .println("TODO: unimplemented sequence retrieval for coding region sequence.");
- // TODO: retrieve CDS dataset sequences
- // need global dataset sequence retriever/resolver to reuse refs
- // and construct Mapping entry.
- // insert gaps in CDS according to peptide gaps.
- // add gapped sequence to cseqs
+ sources.add(source);
}
}
}
- if (cseqs.size() > 0)
- {
- SequenceI[] rsqs = new SequenceI[cseqs.size()];
- cseqs.copyInto(rsqs);
- return rsqs;
- }
- return null;
-
}
/**
+ * Attempts to find cross-references from the sequences provided in the
+ * constructor to the given source database. Cross-references may be found
+ * <ul>
+ * <li>in dbrefs on the sequence which hold a mapping to a sequence
+ * <ul>
+ * <li>provided with a fetched sequence (e.g. ENA translation), or</li>
+ * <li>populated previously after getting cross-references</li>
+ * </ul>
+ * <li>as other sequences in the alignment which share a dbref identifier with
+ * the sequence</li>
+ * <li>by fetching from the remote database</li>
+ * </ul>
+ * The cross-referenced sequences, and mappings to them, are added to the
+ * alignment dataset.
*
- * @param seqs
- * sequences whose xrefs are being retrieved
- * @param dna
- * true if sequences are nucleotide
* @param source
- * @param al
- * alignment to search for cross-referenced sequences (and possibly
- * add to)
- * @return products (as dataset sequences)
+ * @return cross-referenced sequences (as dataset sequences)
*/
- public static Alignment findXrefSequences(SequenceI[] seqs,
- final boolean dna, final String source, AlignmentI al)
+ public Alignment findXrefSequences(String source, boolean fromDna)
{
- AlignmentI dataset = al.getDataset() == null ? al : al.getDataset();
- List<SequenceI> rseqs = new ArrayList<SequenceI>();
- AlignedCodonFrame cf = new AlignedCodonFrame();
- for (SequenceI seq : seqs)
+
+ rseqs = new ArrayList<SequenceI>();
+ cf = new AlignedCodonFrame();
+ matcher = new SequenceIdMatcher(
+ dataset.getSequences());
+
+ for (SequenceI seq : fromSeqs)
{
SequenceI dss = seq;
while (dss.getDatasetSequence() != null)
dss = dss.getDatasetSequence();
}
boolean found = false;
- DBRefEntry[] xrfs = CrossRef.findXDbRefs(dna, dss.getDBRefs());
+ DBRefEntry[] xrfs = DBRefUtils
+ .selectDbRefs(!fromDna, dss.getDBRefs());
if ((xrfs == null || xrfs.length == 0) && dataset != null)
{
- System.out.println("Attempting to find ds Xrefs refs.");
- // FIXME should be dss not seq here?
- DBRefEntry[] lrfs = CrossRef.findXDbRefs(!dna, seq.getDBRefs());
- // less ambiguous would be a 'find primary dbRefEntry' method.
- // filter for desired source xref here
- found = CrossRef.searchDatasetXrefs(dss, !dna, lrfs, dataset,
- rseqs, cf);
+ /*
+ * found no suitable dbrefs on sequence - look for sequences in the
+ * alignment which share a dbref with this one
+ */
+ DBRefEntry[] lrfs = DBRefUtils.selectDbRefs(fromDna,
+ seq.getDBRefs());
+
+ /*
+ * find sequences (except this one!), of complementary type,
+ * which have a dbref to an accession id for this sequence,
+ * and add them to the results
+ */
+ found = searchDatasetXrefs(fromDna, dss, lrfs, rseqs, cf);
}
- for (int r = 0; xrfs != null && r < xrfs.length; r++)
+ if (xrfs == null && !found)
{
- DBRefEntry xref = xrfs[r];
- if (source != null && !source.equals(xref.getSource()))
- {
- continue;
- }
+ /*
+ * no dbref to source on this sequence or matched
+ * complementary sequence in the dataset
+ */
+ continue;
+ }
+ List<DBRefEntry> sourceRefs = DBRefUtils.searchRefsForSource(xrfs,
+ source);
+ Iterator<DBRefEntry> refIterator = sourceRefs.iterator();
+ while (refIterator.hasNext())
+ {
+ DBRefEntry xref = refIterator.next();
+ found = false;
if (xref.hasMap())
{
- if (xref.getMap().getTo() != null)
+ SequenceI mappedTo = xref.getMap().getTo();
+ if (mappedTo != null)
{
- SequenceI rsq = new Sequence(xref.getMap().getTo());
+ /*
+ * dbref contains the sequence it maps to; add it to the
+ * results unless we have done so already (could happen if
+ * fetching xrefs for sequences which have xrefs in common)
+ * for example: UNIPROT {P0CE19, P0CE20} -> EMBL {J03321, X06707}
+ */
+ found = true;
+ /*
+ * problem: matcher.findIdMatch() is lenient - returns a sequence
+ * with a dbref to the search arg e.g. ENST for ENSP - wrong
+ * but findInDataset() matches ENSP when looking for Uniprot...
+ */
+ SequenceI matchInDataset = findInDataset(xref);
+ /*matcher.findIdMatch(mappedTo);*/
+ if (matchInDataset != null)
+ {
+ if (!rseqs.contains(matchInDataset))
+ {
+ rseqs.add(matchInDataset);
+ }
+ refIterator.remove();
+ continue;
+ }
+ SequenceI rsq = new Sequence(mappedTo);
rseqs.add(rsq);
- if (xref.getMap().getMap().getFromRatio() != xref
- .getMap().getMap().getToRatio())
+ if (xref.getMap().getMap().getFromRatio() != xref.getMap()
+ .getMap().getToRatio())
{
// get sense of map correct for adding to product alignment.
- if (dna)
+ if (fromDna)
{
// map is from dna seq to a protein product
cf.addMap(dss, rsq, xref.getMap().getMap());
cf.addMap(rsq, dss, xref.getMap().getMap().getInverse());
}
}
- found = true;
}
}
+
if (!found)
{
- // do a bit more work - search for sequences with references matching
- // xrefs on this sequence.
- if (dataset != null)
+ SequenceI matchedSeq = matcher.findIdMatch(xref.getSource() + "|"
+ + xref.getAccessionId());
+ if (matchedSeq != null)
{
- found |= searchDataset(dss, xref, dataset, rseqs, cf, false,
- !dna);
- if (found)
+ if (constructMapping(seq, matchedSeq, xref, cf, fromDna))
{
- xrfs[r] = null; // we've recovered seqs for this one.
+ found = true;
}
}
}
+
+ if (!found)
+ {
+ // do a bit more work - search for sequences with references matching
+ // xrefs on this sequence.
+ found = searchDataset(fromDna, dss, xref, rseqs, cf, false);
+ }
+ if (found)
+ {
+ refIterator.remove();
+ }
+ }
+
+ /*
+ * fetch from source database any dbrefs we haven't resolved up to here
+ */
+ if (!sourceRefs.isEmpty())
+ {
+ retrieveCrossRef(sourceRefs, seq, xrfs, fromDna);
}
- if (!found)
+ }
+
+ Alignment ral = null;
+ if (rseqs.size() > 0)
+ {
+ ral = new Alignment(rseqs.toArray(new SequenceI[rseqs.size()]));
+ if (!cf.isEmpty())
{
- if (xrfs != null && xrfs.length > 0)
+ dataset.addCodonFrame(cf);
+ }
+ }
+ return ral;
+ }
+
+ private void retrieveCrossRef(List<DBRefEntry> sourceRefs, SequenceI seq,
+ DBRefEntry[] xrfs, boolean fromDna)
+ {
+ ASequenceFetcher sftch = SequenceFetcherFactory.getSequenceFetcher();
+ SequenceI[] retrieved = null;
+ SequenceI dss = null;
+ try
+ {
+ retrieved = sftch.getSequences(sourceRefs, !fromDna);
+ } catch (Exception e)
+ {
+ System.err
+ .println("Problem whilst retrieving cross references for Sequence : "
+ + seq.getName());
+ e.printStackTrace();
+ }
+
+ if (retrieved != null)
+ {
+ updateDbrefMappings(seq, xrfs, retrieved, cf, fromDna);
+ for (SequenceI retrievedSequence : retrieved)
+ {
+ // dataset gets contaminated ccwith non-ds sequences. why ??!
+ // try: Ensembl -> Nuc->Ensembl, Nuc->Uniprot-->Protein->EMBL->
+ SequenceI retrievedDss = retrievedSequence.getDatasetSequence() == null ? retrievedSequence
+ : retrievedSequence.getDatasetSequence();
+ DBRefEntry[] dbr = retrievedSequence.getDBRefs();
+ if (dbr != null)
{
- // Try and get the sequence reference...
- /*
- * Ideal world - we ask for a sequence fetcher implementation here if
- * (jalview.io.RunTimeEnvironment.getSequenceFetcher()) (
- */
- ASequenceFetcher sftch = new SequenceFetcher();
- SequenceI[] retrieved = null;
- int l = xrfs.length;
- for (int r = 0; r < xrfs.length; r++)
+ for (DBRefEntry dbref : dbr)
{
- // filter out any irrelevant or irretrievable references
- if (xrfs[r] == null
- || ((source != null && !source.equals(xrfs[r]
- .getSource())) || !sftch.isFetchable(xrfs[r]
- .getSource())))
+ // find any entry where we should put in the sequence being
+ // cross-referenced into the map
+ Mapping map = dbref.getMap();
+ if (map != null)
{
- l--;
- xrfs[r] = null;
- }
- }
- if (l > 0)
- {
- // System.out
- // .println("Attempting to retrieve cross referenced sequences.");
- DBRefEntry[] t = new DBRefEntry[l];
- l = 0;
- for (int r = 0; r < xrfs.length; r++)
- {
- if (xrfs[r] != null)
- {
- t[l++] = xrfs[r];
- }
- }
- xrfs = t;
- try
- {
- retrieved = sftch.getSequences(xrfs, !dna);
- // problem here is we don't know which of xrfs resulted in which
- // retrieved element
- } catch (Exception e)
- {
- System.err
- .println("Problem whilst retrieving cross references for Sequence : "
- + seq.getName());
- e.printStackTrace();
- }
-
- if (retrieved != null)
- {
- updateDbrefMappings(dna, seq, xrfs, retrieved, cf);
-
- SequenceIdMatcher matcher = new SequenceIdMatcher(
- dataset.getSequences());
- List<SequenceFeature> copiedFeatures = new ArrayList<SequenceFeature>();
- CrossRef me = new CrossRef();
- for (int rs = 0; rs < retrieved.length; rs++)
+ if (map.getTo() != null && map.getMap() != null)
{
- // TODO: examine each sequence for 'redundancy'
- DBRefEntry[] dbr = retrieved[rs].getDBRefs();
- if (dbr != null && dbr.length > 0)
+ // TODO findInDataset requires exact sequence match but
+ // 'congruent' test is only for the mapped part
+ // maybe not a problem in practice since only ENA provide a
+ // mapping and it is to the full protein translation of CDS
+ SequenceI matched = findInDataset(dbref);
+ // matcher.findIdMatch(map.getTo());
+ if (matched != null)
{
- for (int di = 0; di < dbr.length; di++)
+ /*
+ * already got an xref to this sequence; update this
+ * map to point to the same sequence, and add
+ * any new dbrefs to it
+ */
+ DBRefEntry[] toRefs = map.getTo().getDBRefs();
+ if (toRefs != null)
{
- // find any entry where we should put in the sequence being
- // cross-referenced into the map
- Mapping map = dbr[di].getMap();
- if (map != null)
+ for (DBRefEntry ref : toRefs)
{
- if (map.getTo() != null && map.getMap() != null)
+ matched.addDBRef(ref); // add or update mapping
+ }
+ }
+ map.setTo(matched);
+ }
+ else
+ {
+ matcher.add(map.getTo());
+ }
+ try
+ {
+ // compare ms with dss and replace with dss in mapping
+ // if map is congruent
+ SequenceI ms = map.getTo();
+ int sf = map.getMap().getToLowest();
+ int st = map.getMap().getToHighest();
+ SequenceI mappedrg = ms.getSubSequence(sf, st);
+ // SequenceI loc = dss.getSubSequence(sf, st);
+ if (mappedrg.getLength() > 0
+ && ms.getSequenceAsString().equals(
+ dss.getSequenceAsString()))
+ // && mappedrg.getSequenceAsString().equals(
+ // loc.getSequenceAsString()))
+ {
+ String msg = "Mapping updated from " + ms.getName()
+ + " to retrieved crossreference "
+ + dss.getName();
+ System.out.println(msg);
+ map.setTo(dss);
+
+ /*
+ * give the reverse reference the inverse mapping
+ * (if it doesn't have one already)
+ */
+ setReverseMapping(dss, dbref, cf);
+
+ /*
+ * copy sequence features as well, avoiding
+ * duplication (e.g. same variation from two
+ * transcripts)
+ */
+ SequenceFeature[] sfs = ms.getSequenceFeatures();
+ if (sfs != null)
+ {
+ for (SequenceFeature feat : sfs)
{
- SequenceI matched = matcher
- .findIdMatch(map.getTo());
- if (matched != null)
- {
- /*
- * already got an xref to this sequence; update this
- * map to point to the same sequence, and add
- * any new dbrefs to it
- */
- for (DBRefEntry ref : map.getTo().getDBRefs())
- {
- matched.addDBRef(ref); // add or update mapping
- }
- map.setTo(matched);
- }
- else
- {
- matcher.add(map.getTo());
- }
- try
+ /*
+ * make a flyweight feature object which ignores Parent
+ * attribute in equality test; this avoids creating many
+ * otherwise duplicate exon features on genomic sequence
+ */
+ SequenceFeature newFeature = new SequenceFeature(
+ feat)
{
- // compare ms with dss and replace with dss in mapping
- // if map is congruent
- SequenceI ms = map.getTo();
- int sf = map.getMap().getToLowest();
- int st = map.getMap().getToHighest();
- SequenceI mappedrg = ms.getSubSequence(sf, st);
- // SequenceI loc = dss.getSubSequence(sf, st);
- if (mappedrg.getLength() > 0
- && ms.getSequenceAsString().equals(
- dss.getSequenceAsString()))
- // && mappedrg.getSequenceAsString().equals(
- // loc.getSequenceAsString()))
- {
- String msg = "Mapping updated from "
- + ms.getName()
- + " to retrieved crossreference "
- + dss.getName();
- System.out.println(msg);
- // method to update all refs of existing To on
- // retrieved sequence with dss and merge any props
- // on To onto dss.
- map.setTo(dss);
- /*
- * copy sequence features as well, avoiding
- * duplication (e.g. same variation from 2
- * transcripts)
- */
- SequenceFeature[] sfs = ms
- .getSequenceFeatures();
- if (sfs != null)
- {
- for (SequenceFeature feat : sfs)
- {
- /*
- * we override SequenceFeature.equals here (but
- * not elsewhere) to ignore Parent attribute
- * TODO not quite working yet!
- */
- if (!copiedFeatures
- .contains(me.new MySequenceFeature(
- feat)))
- {
- dss.addSequenceFeature(feat);
- copiedFeatures.add(feat);
- }
- }
- }
- cf.addMap(retrieved[rs].getDatasetSequence(),
- dss, map.getMap());
- }
- else
+ @Override
+ public boolean equals(Object o)
{
- cf.addMap(retrieved[rs].getDatasetSequence(),
- map.getTo(), map.getMap());
+ return super.equals(o, true);
}
- } catch (Exception e)
- {
- System.err
- .println("Exception when consolidating Mapped sequence set...");
- e.printStackTrace(System.err);
- }
+ };
+ dss.addSequenceFeature(newFeature);
}
}
}
+ cf.addMap(retrievedDss, map.getTo(), map.getMap());
+ } catch (Exception e)
+ {
+ System.err
+ .println("Exception when consolidating Mapped sequence set...");
+ e.printStackTrace(System.err);
}
- retrieved[rs].updatePDBIds();
- rseqs.add(retrieved[rs]);
}
}
}
}
+ retrievedSequence.updatePDBIds();
+ rseqs.add(retrievedDss);
+ dataset.addSequence(retrievedDss);
+ matcher.add(retrievedDss);
+ }
+ }
+ }
+ /**
+ * Sets the inverse sequence mapping in the corresponding dbref of the mapped
+ * to sequence (if any). This is used after fetching a cross-referenced
+ * sequence, if the fetched sequence has a mapping to the original sequence,
+ * to set the mapping in the original sequence's dbref.
+ *
+ * @param mapFrom
+ * the sequence mapped from
+ * @param dbref
+ * @param mappings
+ */
+ void setReverseMapping(SequenceI mapFrom, DBRefEntry dbref,
+ AlignedCodonFrame mappings)
+ {
+ SequenceI mapTo = dbref.getMap().getTo();
+ if (mapTo == null)
+ {
+ return;
+ }
+ DBRefEntry[] dbrefs = mapTo.getDBRefs();
+ if (dbrefs == null)
+ {
+ return;
+ }
+ for (DBRefEntry toRef : dbrefs)
+ {
+ if (toRef.hasMap() && mapFrom == toRef.getMap().getTo())
+ {
+ /*
+ * found the reverse dbref; update its mapping if null
+ */
+ if (toRef.getMap().getMap() == null)
+ {
+ MapList inverse = dbref.getMap().getMap().getInverse();
+ toRef.getMap().setMap(inverse);
+ mappings.addMap(mapTo, mapFrom, inverse);
+ }
}
}
+ }
- Alignment ral = null;
- if (rseqs.size() > 0)
+ /**
+ * Returns the first identical sequence in the dataset if any, else null
+ *
+ * @param xref
+ * @return
+ */
+ SequenceI findInDataset(DBRefEntry xref)
+ {
+ if (xref == null || !xref.hasMap() || xref.getMap().getTo() == null)
{
- ral = new Alignment(rseqs.toArray(new SequenceI[rseqs.size()]));
- if (cf != null && !cf.isEmpty())
+ return null;
+ }
+ SequenceI mapsTo = xref.getMap().getTo();
+ String name = xref.getAccessionId();
+ String name2 = xref.getSource() + "|" + name;
+ SequenceI dss = mapsTo.getDatasetSequence() == null ? mapsTo : mapsTo
+ .getDatasetSequence();
+ for (SequenceI seq : dataset.getSequences())
+ {
+ /*
+ * clumsy alternative to using SequenceIdMatcher which currently
+ * returns sequences with a dbref to the matched accession id
+ * which we don't want
+ */
+ if (name.equals(seq.getName()) || seq.getName().startsWith(name2))
{
- ral.addCodonFrame(cf);
+ if (sameSequence(seq, dss))
+ {
+ return seq;
+ }
}
}
- return ral;
+ return null;
+ }
+
+ /**
+ * Answers true if seq1 and seq2 contain exactly the same characters (ignoring
+ * case), else false. This method compares the lengths, then each character in
+ * turn, in order to 'fail fast'. For case-sensitive comparison, it would be
+ * possible to use Arrays.equals(seq1.getSequence(), seq2.getSequence()).
+ *
+ * @param seq1
+ * @param seq2
+ * @return
+ */
+ // TODO move to Sequence / SequenceI
+ static boolean sameSequence(SequenceI seq1, SequenceI seq2)
+ {
+ if (seq1 == seq2)
+ {
+ return true;
+ }
+ if (seq1 == null || seq2 == null)
+ {
+ return false;
+ }
+ char[] c1 = seq1.getSequence();
+ char[] c2 = seq2.getSequence();
+ if (c1.length != c2.length)
+ {
+ return false;
+ }
+ for (int i = 0; i < c1.length; i++)
+ {
+ int diff = c1[i] - c2[i];
+ /*
+ * same char or differ in case only ('a'-'A' == 32)
+ */
+ if (diff != 0 && diff != 32 && diff != -32)
+ {
+ return false;
+ }
+ }
+ return true;
}
/**
* retrieved sequence if found, and adds any new mappings to the
* AlignedCodonFrame
*
- * @param dna
* @param mapFrom
* @param xrefs
* @param retrieved
* @param acf
*/
- static void updateDbrefMappings(boolean dna, SequenceI mapFrom,
- DBRefEntry[] xrefs, SequenceI[] retrieved, AlignedCodonFrame acf)
+ void updateDbrefMappings(SequenceI mapFrom, DBRefEntry[] xrefs,
+ SequenceI[] retrieved, AlignedCodonFrame acf, boolean fromDna)
{
SequenceIdMatcher matcher = new SequenceIdMatcher(retrieved);
for (DBRefEntry xref : xrefs)
}
for (SequenceI seq : matches)
{
- MapList mapping = null;
- if (dna)
- {
- mapping = AlignmentUtils.mapCdnaToProtein(seq, mapFrom);
- }
- else
- {
- mapping = AlignmentUtils.mapCdnaToProtein(mapFrom, seq);
- if (mapping != null)
- {
- mapping = mapping.getInverse();
- }
- }
- if (mapping != null)
- {
- xref.setMap(new Mapping(seq, mapping));
- if (dna)
- {
- AlignmentUtils.computeProteinFeatures(mapFrom, seq, mapping);
- }
- if (dna)
- {
- acf.addMap(mapFrom, seq, mapping);
- }
- else
- {
- acf.addMap(seq, mapFrom, mapping.getInverse());
- }
- continue;
- }
+ constructMapping(mapFrom, seq, xref, acf, fromDna);
}
}
}
}
/**
+ * Tries to make a mapping between sequences. If successful, adds the mapping
+ * to the dbref and the mappings collection and answers true, otherwise
+ * answers false. The following methods of making are mapping are tried in
+ * turn:
+ * <ul>
+ * <li>if 'mapTo' holds a mapping to 'mapFrom', take the inverse; this is, for
+ * example, the case after fetching EMBL cross-references for a Uniprot
+ * sequence</li>
+ * <li>else check if the dna translates exactly to the protein (give or take
+ * start and stop codons></li>
+ * <li>else try to map based on CDS features on the dna sequence</li>
+ * </ul>
+ *
+ * @param mapFrom
+ * @param mapTo
+ * @param xref
+ * @param mappings
+ * @return
+ */
+ boolean constructMapping(SequenceI mapFrom, SequenceI mapTo,
+ DBRefEntry xref, AlignedCodonFrame mappings, boolean fromDna)
+ {
+ MapList mapping = null;
+
+ /*
+ * look for a reverse mapping, if found make its inverse
+ */
+ if (mapTo.getDBRefs() != null)
+ {
+ for (DBRefEntry dbref : mapTo.getDBRefs())
+ {
+ String name = dbref.getSource() + "|" + dbref.getAccessionId();
+ if (dbref.hasMap() && mapFrom.getName().startsWith(name))
+ {
+ /*
+ * looks like we've found a map from 'mapTo' to 'mapFrom'
+ * - invert it to make the mapping the other way
+ */
+ MapList reverse = dbref.getMap().getMap().getInverse();
+ xref.setMap(new Mapping(mapTo, reverse));
+ mappings.addMap(mapFrom, mapTo, reverse);
+ return true;
+ }
+ }
+ }
+
+ if (fromDna)
+ {
+ mapping = AlignmentUtils.mapCdnaToProtein(mapTo, mapFrom);
+ }
+ else
+ {
+ mapping = AlignmentUtils.mapCdnaToProtein(mapFrom, mapTo);
+ if (mapping != null)
+ {
+ mapping = mapping.getInverse();
+ }
+ }
+ if (mapping == null)
+ {
+ return false;
+ }
+ xref.setMap(new Mapping(mapTo, mapping));
+ if (fromDna)
+ {
+ AlignmentUtils.computeProteinFeatures(mapFrom, mapTo, mapping);
+ mappings.addMap(mapFrom, mapTo, mapping);
+ }
+ else
+ {
+ mappings.addMap(mapTo, mapFrom, mapping.getInverse());
+ }
+
+ return true;
+ }
+
+ /**
* find references to lrfs in the cross-reference set of each sequence in
* dataset (that is not equal to sequenceI) Identifies matching DBRefEntry
* based on source and accession string only - Map and Version are nulled.
*
+ * @param fromDna
+ * - true if context was searching from Dna sequences, false if
+ * context was searching from Protein sequences
* @param sequenceI
* @param lrfs
- * @param dataset
* @param rseqs
* @return true if matches were found.
*/
- private static boolean searchDatasetXrefs(SequenceI sequenceI,
- boolean dna, DBRefEntry[] lrfs, AlignmentI dataset,
- List<SequenceI> rseqs, AlignedCodonFrame cf)
+ private boolean searchDatasetXrefs(boolean fromDna, SequenceI sequenceI,
+ DBRefEntry[] lrfs, List<SequenceI> rseqs, AlignedCodonFrame cf)
{
boolean found = false;
if (lrfs == null)
// add in wildcards
xref.setVersion(null);
xref.setMap(null);
- found = searchDataset(sequenceI, xref, dataset, rseqs, cf, false, dna);
+ found |= searchDataset(fromDna, sequenceI, xref, rseqs, cf, false);
}
return found;
}
/**
- * search a given sequence dataset for references matching cross-references to
- * the given sequence
+ * Searches dataset for DBRefEntrys matching the given one (xrf) and adds the
+ * associated sequence to rseqs
*
+ * @param fromDna
+ * true if context was searching for refs *from* dna sequence, false
+ * if context was searching for refs *from* protein sequence
* @param sequenceI
+ * a sequence to ignore (start point of search)
* @param xrf
- * @param dataset
+ * a cross-reference to try to match
* @param rseqs
- * set of unique sequences
+ * result list to add to
* @param cf
- * @return true if one or more unique sequences were found and added
- */
- public static boolean searchDataset(SequenceI sequenceI, DBRefEntry xrf,
- AlignmentI dataset, List<SequenceI> rseqs, AlignedCodonFrame cf)
- {
- return searchDataset(sequenceI, xrf, dataset, rseqs, cf, true, false);
- }
-
- /**
- * TODO: generalise to different protein classifications Search dataset for
- * DBRefEntrys matching the given one (xrf) and add the associated sequence to
- * rseq.
- *
- * @param sequenceI
- * @param xrf
- * @param dataset
- * @param rseqs
+ * a set of sequence mappings to add to
* @param direct
- * - search all references or only subset
- * @param dna
- * search dna or protein xrefs (if direct=false)
+ * - indicates the type of relationship between returned sequences,
+ * xrf, and sequenceI that is required.
+ * <ul>
+ * <li>direct implies xrf is a primary reference for sequenceI AND
+ * the sequences to be located (eg a uniprot ID for a protein
+ * sequence, and a uniprot ref on a transcript sequence).</li>
+ * <li>indirect means xrf is a cross reference with respect to
+ * sequenceI or all the returned sequences (eg a genomic reference
+ * associated with a locus and one or more transcripts)</li>
+ * </ul>
* @return true if relationship found and sequence added.
*/
- public static boolean searchDataset(SequenceI sequenceI, DBRefEntry xrf,
- AlignmentI dataset, List<SequenceI> rseqs, AlignedCodonFrame cf,
- boolean direct, boolean dna)
+ boolean searchDataset(boolean fromDna, SequenceI sequenceI,
+ DBRefEntry xrf, List<SequenceI> rseqs, AlignedCodonFrame cf,
+ boolean direct)
{
boolean found = false;
- SequenceI[] typer = new SequenceI[1];
if (dataset == null)
{
return false;
if (nxt.getDatasetSequence() != null)
{
System.err
- .println("Implementation warning: getProducts passed a dataset alignment without dataset sequences in it!");
+ .println("Implementation warning: CrossRef initialised with a dataset alignment with non-dataset sequences in it! ("
+ + nxt.getDisplayId(true)
+ + " has ds reference "
+ + nxt.getDatasetSequence().getDisplayId(true)
+ + ")");
+ }
+ if (nxt == sequenceI || nxt == sequenceI.getDatasetSequence())
+ {
+ continue;
}
- if (nxt != sequenceI && nxt != sequenceI.getDatasetSequence())
+ /*
+ * only look at same molecule type if 'direct', or
+ * complementary type if !direct
+ */
{
- // check if this is the correct sequence type
+ boolean isDna = !nxt.isProtein();
+ if (direct ? (isDna != fromDna) : (isDna == fromDna))
{
- typer[0] = nxt;
- boolean isDna = jalview.util.Comparison.isNucleotide(typer);
- if ((direct && isDna == dna) || (!direct && isDna != dna))
- {
- // skip this sequence because it is same molecule type
- continue;
- }
+ // skip this sequence because it is wrong molecule type
+ continue;
}
+ }
- // look for direct or indirect references in common
- DBRefEntry[] poss = nxt.getDBRefs(), cands = null;
- if (direct)
- {
- cands = jalview.util.DBRefUtils.searchRefs(poss, xrf);
- }
- else
- {
- poss = CrossRef.findXDbRefs(dna, poss); //
- cands = jalview.util.DBRefUtils.searchRefs(poss, xrf);
- }
- if (cands != null)
+ // look for direct or indirect references in common
+ DBRefEntry[] poss = nxt.getDBRefs();
+ List<DBRefEntry> cands = null;
+
+ // todo: indirect specifies we select either direct references to nxt
+ // that match xrf which is indirect to sequenceI, or indirect
+ // references to nxt that match xrf which is direct to sequenceI
+ cands = DBRefUtils.searchRefs(poss, xrf);
+ // else
+ // {
+ // poss = DBRefUtils.selectDbRefs(nxt.isProtein()!fromDna, poss);
+ // cands = DBRefUtils.searchRefs(poss, xrf);
+ // }
+ if (!cands.isEmpty())
+ {
+ if (!rseqs.contains(nxt))
{
- if (!rseqs.contains(nxt))
+ found = true;
+ rseqs.add(nxt);
+ if (cf != null)
{
- rseqs.add(nxt);
- boolean foundmap = cf != null;
// don't search if we aren't given a codon map object
- for (int r = 0; foundmap && r < cands.length; r++)
+ for (DBRefEntry candidate : cands)
{
- if (cands[r].hasMap())
+ Mapping mapping = candidate.getMap();
+ if (mapping != null)
{
- if (cands[r].getMap().getTo() != null
- && cands[r].getMap().getMap().getFromRatio() != cands[r]
- .getMap().getMap().getToRatio())
+ MapList map = mapping.getMap();
+ if (mapping.getTo() != null
+ && map.getFromRatio() != map.getToRatio())
{
- foundmap = true;
// get sense of map correct for adding to product
// alignment.
- if (dna)
+ if (fromDna)
{
// map is from dna seq to a protein product
- cf.addMap(sequenceI, nxt, cands[r].getMap()
- .getMap());
+ cf.addMap(sequenceI, nxt, map);
}
else
{
// map should be from protein seq to its coding dna
- cf.addMap(nxt, sequenceI, cands[r].getMap()
- .getMap().getInverse());
+ cf.addMap(nxt, sequenceI, map.getInverse());
}
}
}
}
- // TODO: add mapping between sequences if necessary
- found = true;
}
+ // TODO: add mapping between sequences if necessary
}
-
}
}
}
}
return found;
}
-
- /**
- * precalculate different products that can be found for seqs in dataset and
- * return them.
- *
- * @param dna
- * @param seqs
- * @param dataset
- * @param fake
- * - don't actually build lists - just get types
- * @return public static Object[] buildXProductsList(boolean dna, SequenceI[]
- * seqs, AlignmentI dataset, boolean fake) { String types[] =
- * jalview.analysis.CrossRef.findSequenceXrefTypes( dna, seqs,
- * dataset); if (types != null) { System.out.println("Xref Types for:
- * "+(dna ? "dna" : "prot")); for (int t = 0; t < types.length; t++) {
- * System.out.println("Type: " + types[t]); SequenceI[] prod =
- * jalview.analysis.CrossRef.findXrefSequences(seqs, dna, types[t]);
- * System.out.println("Found " + ((prod == null) ? "no" : "" +
- * prod.length) + " products"); if (prod!=null) { for (int p=0;
- * p<prod.length; p++) { System.out.println("Prod "+p+":
- * "+prod[p].getDisplayId(true)); } } } } else {
- * System.out.println("Trying getProducts for
- * "+al.getSequenceAt(0).getDisplayId(true));
- * System.out.println("Search DS Xref for: "+(dna ? "dna" : "prot"));
- * // have a bash at finding the products amongst all the retrieved
- * sequences. SequenceI[] prod =
- * jalview.analysis.CrossRef.findXrefSequences(al
- * .getSequencesArray(), dna, null, ds); System.out.println("Found " +
- * ((prod == null) ? "no" : "" + prod.length) + " products"); if
- * (prod!=null) { // select non-equivalent sequences from dataset list
- * for (int p=0; p<prod.length; p++) { System.out.println("Prod "+p+":
- * "+prod[p].getDisplayId(true)); } } } }
- */
}
final private int dnaWidth;
- final private Alignment dataset;
+ final private AlignmentI dataset;
/*
* Working variables for the translation.
{
if (s != null)
{
- id = new String(s.toLowerCase());
+ id = s.toLowerCase();
}
else
{
.indexOf(s.charAt(id.length())) > -1)) : false;
}
}
+
+ /**
+ * toString method returns the wrapped sequence id. For debugging purposes
+ * only, behaviour not guaranteed not to change.
+ */
+ @Override
+ public String toString()
+ {
+ return id;
+ }
}
}
import jalview.util.MapList;
import jalview.util.MappingUtils;
+import java.util.AbstractList;
import java.util.ArrayList;
import java.util.List;
/*
* Data bean to hold mappings from one sequence to another
*/
- private class SequenceToSequenceMapping
+ public class SequenceToSequenceMapping
{
private SequenceI fromSeq;
return String.format("From %s %s", fromSeq.getName(),
mapping.toString());
}
+
+ /**
+ * Returns a hashCode derived from the hashcodes of the mappings and fromSeq
+ *
+ * @see SequenceToSequenceMapping#hashCode()
+ */
+ @Override
+ public int hashCode()
+ {
+ return (fromSeq == null ? 0 : fromSeq.hashCode() * 31)
+ + mapping.hashCode();
+ }
+
+ /**
+ * Answers true if the objects hold the same mapping between the same two
+ * sequences
+ *
+ * @see Mapping#equals
+ */
+ @Override
+ public boolean equals(Object obj)
+ {
+ if (!(obj instanceof SequenceToSequenceMapping))
+ {
+ return false;
+ }
+ SequenceToSequenceMapping that = (SequenceToSequenceMapping) obj;
+ if (this.mapping == null)
+ {
+ return that.mapping == null;
+ }
+ // TODO: can simplify by asserting fromSeq is a dataset sequence
+ return (this.fromSeq == that.fromSeq || (this.fromSeq != null
+ && that.fromSeq != null
+ && this.fromSeq.getDatasetSequence() != null && this.fromSeq
+ .getDatasetSequence() == that.fromSeq
+ .getDatasetSequence())) && this.mapping.equals(that.mapping);
+ }
+
+ public SequenceI getFromSeq()
+ {
+ return fromSeq;
+ }
+
+ public Mapping getMapping()
+ {
+ return mapping;
+ }
}
private List<SequenceToSequenceMapping> mappings;
/*
* if we already hold a mapping between these sequences, just add to it
+ * note that 'adding' a duplicate map does nothing; this protects against
+ * creating duplicate mappings in AlignedCodonFrame
*/
for (SequenceToSequenceMapping ssm : mappings)
{
}
return null;
}
+
+ /**
+ * Returns a hashcode derived from the list of sequence mappings
+ *
+ * @see SequenceToSequenceMapping#hashCode()
+ * @see AbstractList#hashCode()
+ */
+ @Override
+ public int hashCode()
+ {
+ return this.mappings.hashCode();
+ }
+
+ /**
+ * Two AlignedCodonFrame objects are equal if they hold the same ordered list
+ * of mappings
+ *
+ * @see SequenceToSequenceMapping#
+ */
+ @Override
+ public boolean equals(Object obj)
+ {
+ if (!(obj instanceof AlignedCodonFrame))
+ {
+ return false;
+ }
+ return this.mappings.equals(((AlignedCodonFrame) obj).mappings);
+ }
+
+ public List<SequenceToSequenceMapping> getMappings()
+ {
+ return mappings;
+ }
}
*/
public class Alignment implements AlignmentI
{
- protected Alignment dataset;
+ private Alignment dataset;
protected List<SequenceI> sequences;
/*
* Share the same dataset sequence mappings (if any).
*/
- this.setCodonFrames(al.getCodonFrames());
+ if (dataset == null && al.getDataset() == null)
+ {
+ this.setCodonFrames(al.getCodonFrames());
+ }
}
/**
}
@Override
- public void setDataset(Alignment data)
+ public void setDataset(AlignmentI data)
{
if (dataset == null && data == null)
{
}
else if (dataset == null && data != null)
{
- dataset = data;
+ if (!(data instanceof Alignment))
+ {
+ throw new Error(
+ "Implementation Error: jalview.datamodel.Alignment does not yet support other implementations of AlignmentI as its dataset reference");
+ }
+ dataset = (Alignment) data;
for (int i = 0; i < getHeight(); i++)
{
SequenceI currentSeq = getSequenceAt(i);
@Override
public List<AlignedCodonFrame> getCodonFrames()
{
+ // TODO: Fix this method to fix failing AlignedCodonFrame tests
+ // this behaviour is currently incorrect. method should return codon frames
+ // for just the alignment,
+ // selected from dataset
return dataset != null ? dataset.getCodonFrames() : codonFrameList;
}
addAnnotation(alan[a]);
}
+ // use add method
getCodonFrames().addAll(toappend.getCodonFrames());
List<SequenceGroup> sg = toappend.getGroups();
* @return Alignment containing dataset sequences or null of this is a
* dataset.
*/
- Alignment getDataset();
+ AlignmentI getDataset();
/**
* Set the associated dataset for the alignment, or create one.
* @param dataset
* The dataset alignment or null to construct one.
*/
- void setDataset(Alignment dataset);
+ void setDataset(AlignmentI dataset);
/**
* pads sequences with gaps (to ensure the set looks like an alignment)
* otherwise the versions have to match
*/
String otherVersion = other.getVersion();
+
if ((version == null || version.equals("0") || version.endsWith(":0"))
&& otherVersion != null)
{
}
else
{
- if (!version.equalsIgnoreCase(otherVersion))
+ if (version != null
+ && (otherVersion == null || !version
+ .equalsIgnoreCase(otherVersion)))
{
return false;
}
/**
* Equals that compares both the to references and MapList mappings.
*
- * @param other
+ * @param o
* @return
+ * @see MapList#equals
*/
@Override
public boolean equals(Object o)
{
- // TODO should override Object.hashCode() to ensure that equal objects have
- // equal hashcodes
if (o == null || !(o instanceof Mapping))
{
return false;
}
/**
+ * Returns a hashCode made from the sequence and maplist
+ */
+ @Override
+ public int hashCode()
+ {
+ int hashCode = (this.to == null ? 1 : this.to.hashCode());
+ if (this.map != null)
+ {
+ hashCode = hashCode * 31 + this.map.hashCode();
+ }
+
+ return hashCode;
+ }
+
+ /**
* get the 'initial' position in the associated sequence for a position in the
* mapped reference frame
*
return new Sequence(this);
}
+ private boolean _isNa;
+
+ private long _seqhash = 0;
+
+ @Override
+ public boolean isProtein()
+ {
+ if (datasetSequence != null)
+ {
+ return datasetSequence.isProtein();
+ }
+ if (_seqhash != sequence.hashCode())
+ {
+ _seqhash = sequence.hashCode();
+ _isNa=jalview.util.Comparison.isNucleotide(new SequenceI[] { this });
+ }
+ return !_isNa;
+ };
+
/*
* (non-Javadoc)
*
public int[] findPositionMap();
/**
+ *
+ * @return true if sequence is composed of amino acid characters
+ */
+ public boolean isProtein();
+
+ /**
* Delete a range of aligned sequence columns, creating a new dataset sequence
* if necessary and adjusting start and end positions accordingly.
*
retrievedref.setMap(new Mapping(null, new int[] { 1, dna.getLength() },
new int[] { 1, dna.getLength() }, 1, 1));
+
+ /*
+ * transform EMBL Database refs to canonical form
+ */
if (dbRefs != null)
{
for (DBRefEntry dbref : dbRefs)
{
+ dbref.setSource(DBRefUtils.getCanonicalName(dbref.getSource()));
dna.addDBRef(dbref);
}
}
{
for (EmblFeature feature : features)
{
- if (feature.dbRefs != null)
- {
- for (DBRefEntry dbref : feature.dbRefs)
- {
- dna.addDBRef(dbref);
- }
- }
if (FeatureProperties.isCodingFeature(sourceDb, feature.getName()))
{
parseCodingFeature(feature, sourceDb, dna, peptides, matcher);
* parent dna sequence for this record
* @param peptides
* list of protein product sequences for Embl entry
+ * @param matcher
+ * helper to match xrefs in already retrieved sequences
*/
void parseCodingFeature(EmblFeature feature, String sourceDb,
SequenceI dna, List<SequenceI> peptides, SequenceIdMatcher matcher)
boolean mappingUsed = false;
for (DBRefEntry ref : feature.dbRefs)
{
+ /*
+ * ensure UniProtKB/Swiss-Prot converted to UNIPROT
+ */
ref.setSource(DBRefUtils.getCanonicalName(ref.getSource()));
if (ref.getSource().equals(DBRefSource.UNIPROT))
{
}
/**
- * Searches selected sequences for xRef products and builds the Show
- * Cross-References menu (formerly called Show Products)
+ * Searches the alignment sequences for xRefs and builds the Show
+ * Cross-References menu (formerly called Show Products), with database
+ * sources for which cross-references are found (protein sources for a
+ * nucleotide alignment and vice versa)
*
- * @return true if Show Cross-references menu should be enabled.
+ * @return true if Show Cross-references menu should be enabled
*/
public boolean canShowProducts()
{
- SequenceI[] selection = viewport.getSequenceSelection();
+ SequenceI[] seqs = viewport.getAlignment().getSequencesArray();
AlignmentI dataset = viewport.getAlignment().getDataset();
boolean showp = false;
try
{
showProducts.removeAll();
final boolean dna = viewport.getAlignment().isNucleotide();
- String[] ptypes = (selection == null || selection.length == 0) ? null
- : CrossRef.findSequenceXrefTypes(dna, selection, dataset);
+ List<String> ptypes = (seqs == null || seqs.length == 0) ? null
+ : new CrossRef(seqs, dataset)
+ .findXrefSourcesForSequences(dna);
- for (int t = 0; ptypes != null && t < ptypes.length; t++)
+ for (final String source : ptypes)
{
showp = true;
final AlignFrame af = this;
- final String source = ptypes[t];
- JMenuItem xtype = new JMenuItem(ptypes[t]);
+ JMenuItem xtype = new JMenuItem(source);
xtype.addActionListener(new ActionListener()
{
-
@Override
public void actionPerformed(ActionEvent e)
{
showProductsFor(af.viewport.getSequenceSelection(), dna, source);
}
-
});
showProducts.add(xtype);
}
showProducts.setEnabled(showp);
} catch (Exception e)
{
- jalview.bin.Cache.log
+ Cache.log
.warn("canShowProducts threw an exception - please report to help@jalview.org",
e);
return false;
* @param source
* the database to show cross-references for
*/
- protected void showProductsFor(final SequenceI[] sel, final boolean dna,
+ protected void showProductsFor(final SequenceI[] sel, final boolean _odna,
final String source)
{
Runnable foo = new Runnable()
{
AlignmentI alignment = AlignFrame.this.getViewport()
.getAlignment();
- AlignmentI xrefs = CrossRef.findXrefSequences(sel, dna, source,
- alignment);
- if (xrefs != null)
+ AlignmentI dataset = alignment.getDataset() == null ? alignment
+ : alignment.getDataset();
+ boolean dna = alignment.isNucleotide();
+ if (_odna!=dna)
{
- /*
- * get display scheme (if any) to apply to features
- */
- FeatureSettingsModelI featureColourScheme = new SequenceFetcher()
- .getFeatureColourScheme(source);
+ System.err
+ .println("Conflict: showProducts for alignment originally "
+ + "thought to be "
+ + (_odna ? "DNA" : "Protein")
+ + " now searching for "
+ + (dna ? "DNA" : "Protein") + " Context.");
+ }
+ AlignmentI xrefs = new CrossRef(sel, dataset)
+ .findXrefSequences(source, dna);
+ if (xrefs == null)
+ {
+ return;
+ }
+ /*
+ * get display scheme (if any) to apply to features
+ */
+ FeatureSettingsModelI featureColourScheme = new SequenceFetcher()
+ .getFeatureColourScheme(source);
- AlignmentI al = makeCrossReferencesAlignment(
- alignment.getDataset(), xrefs);
+ AlignmentI xrefsAlignment = makeCrossReferencesAlignment(dataset,
+ xrefs);
- AlignFrame newFrame = new AlignFrame(al, DEFAULT_WIDTH,
+ AlignFrame newFrame = new AlignFrame(xrefsAlignment, DEFAULT_WIDTH,
+ DEFAULT_HEIGHT);
+ if (Cache.getDefault("HIDE_INTRONS", true))
+ {
+ newFrame.hideFeatureColumns(SequenceOntologyI.EXON, false);
+ }
+ String newtitle = String.format("%s %s %s", MessageManager
+ .getString(dna ? "label.proteins" : "label.nucleotides"),
+ MessageManager.getString("label.for"), getTitle());
+ newFrame.setTitle(newtitle);
+
+ if (!Cache.getDefault(Preferences.ENABLE_SPLIT_FRAME, true))
+ {
+ /*
+ * split frame display is turned off in preferences file
+ */
+ Desktop.addInternalFrame(newFrame, newtitle, DEFAULT_WIDTH,
DEFAULT_HEIGHT);
- if (Cache.getDefault("HIDE_INTRONS", true))
- {
- newFrame.hideFeatureColumns(SequenceOntologyI.EXON, false);
- }
- String newtitle = String.format("%s %s %s",
- MessageManager.getString(dna ? "label.proteins"
- : "label.nucleotides"), MessageManager
- .getString("label.for"), getTitle());
- newFrame.setTitle(newtitle);
+ return; // via finally clause
+ }
- if (!Cache.getDefault(Preferences.ENABLE_SPLIT_FRAME, true))
+ /*
+ * Make a copy of this alignment (sharing the same dataset
+ * sequences). If we are DNA, drop introns and update mappings
+ */
+ AlignmentI copyAlignment = null;
+ final SequenceI[] sequenceSelection = AlignFrame.this.viewport
+ .getSequenceSelection();
+ // List<AlignedCodonFrame> cf = xrefs.getCodonFrames();
+ boolean copyAlignmentIsAligned = false;
+ if (dna)
+ {
+ copyAlignment = AlignmentUtils.makeCdsAlignment(
+ sequenceSelection, dataset);
+ if (copyAlignment.getHeight() == 0)
{
- /*
- * split frame display is turned off in preferences file
- */
- Desktop.addInternalFrame(newFrame, newtitle, DEFAULT_WIDTH,
- DEFAULT_HEIGHT);
- return; // via finally clause
+ System.err.println("Failed to make CDS alignment");
}
/*
- * Make a copy of this alignment (sharing the same dataset
- * sequences). If we are DNA, drop introns and update mappings
+ * pending getting Embl transcripts to 'align',
+ * we are only doing this for Ensembl
*/
- AlignmentI copyAlignment = null;
- final SequenceI[] sequenceSelection = AlignFrame.this.viewport
- .getSequenceSelection();
- List<AlignedCodonFrame> cf = xrefs.getCodonFrames();
- boolean copyAlignmentIsAligned = false;
- if (dna)
- {
- copyAlignment = AlignmentUtils.makeCdsAlignment(
- sequenceSelection, cf, alignment);
- if (copyAlignment.getHeight() == 0)
- {
- System.err.println("Failed to make CDS alignment");
- }
- al.getCodonFrames().clear();
- al.addCodonFrames(copyAlignment.getCodonFrames());
- al.addCodonFrames(cf);
-
- /*
- * pending getting Embl transcripts to 'align',
- * we are only doing this for Ensembl
- */
- // TODO proper criteria for 'can align as cdna'
- if (DBRefSource.ENSEMBL.equalsIgnoreCase(source)
- || AlignmentUtils.looksLikeEnsembl(alignment))
- {
- copyAlignment.alignAs(alignment);
- copyAlignmentIsAligned = true;
- }
- }
- else
+ // TODO proper criteria for 'can align as cdna'
+ if (DBRefSource.ENSEMBL.equalsIgnoreCase(source)
+ || AlignmentUtils.looksLikeEnsembl(alignment))
{
- copyAlignment = AlignmentUtils.makeCopyAlignment(
- sequenceSelection, xrefs.getSequencesArray());
- copyAlignment.addCodonFrames(cf);
- al.addCodonFrames(copyAlignment.getCodonFrames());
- al.addCodonFrames(cf);
+ copyAlignment.alignAs(alignment);
+ copyAlignmentIsAligned = true;
}
- copyAlignment.setGapCharacter(AlignFrame.this.viewport
- .getGapCharacter());
+ }
+ else
+ {
+ copyAlignment = AlignmentUtils.makeCopyAlignment(
+ sequenceSelection, xrefs.getSequencesArray());
+ }
+ copyAlignment.setGapCharacter(AlignFrame.this.viewport
+ .getGapCharacter());
- StructureSelectionManager ssm = StructureSelectionManager
- .getStructureSelectionManager(Desktop.instance);
- ssm.registerMappings(cf);
+ StructureSelectionManager ssm = StructureSelectionManager
+ .getStructureSelectionManager(Desktop.instance);
- if (copyAlignment.getHeight() <= 0)
- {
- System.err.println("No Sequences generated for xRef type "
- + source);
- return;
- }
+ /*
+ * register any new mappings for sequence mouseover etc
+ * (will not duplicate any previously registered mappings)
+ */
+ ssm.registerMappings(dataset.getCodonFrames());
+
+ if (copyAlignment.getHeight() <= 0)
+ {
+ System.err.println("No Sequences generated for xRef type "
+ + source);
+ return;
+ }
+ /*
+ * align protein to dna
+ */
+ if (dna && copyAlignmentIsAligned)
+ {
+ xrefsAlignment.alignAs(copyAlignment);
+ }
+ else
+ {
/*
- * align protein to dna
+ * align cdna to protein - currently only if
+ * fetching and aligning Ensembl transcripts!
*/
- if (dna && copyAlignmentIsAligned)
+ if (DBRefSource.ENSEMBL.equalsIgnoreCase(source))
{
- al.alignAs(copyAlignment);
- }
- else
- {
- /*
- * align cdna to protein - currently only if
- * fetching and aligning Ensembl transcripts!
- */
- if (DBRefSource.ENSEMBL.equalsIgnoreCase(source))
- {
- copyAlignment.alignAs(al);
- }
+ copyAlignment.alignAs(xrefsAlignment);
}
+ }
- AlignFrame copyThis = new AlignFrame(copyAlignment,
- AlignFrame.DEFAULT_WIDTH, AlignFrame.DEFAULT_HEIGHT);
- copyThis.setTitle(AlignFrame.this.getTitle());
+ AlignFrame copyThis = new AlignFrame(copyAlignment,
+ AlignFrame.DEFAULT_WIDTH, AlignFrame.DEFAULT_HEIGHT);
+ copyThis.setTitle(AlignFrame.this.getTitle());
- boolean showSequenceFeatures = viewport
- .isShowSequenceFeatures();
- newFrame.setShowSeqFeatures(showSequenceFeatures);
- copyThis.setShowSeqFeatures(showSequenceFeatures);
- FeatureRenderer myFeatureStyling = alignPanel.getSeqPanel().seqCanvas
- .getFeatureRenderer();
+ boolean showSequenceFeatures = viewport.isShowSequenceFeatures();
+ newFrame.setShowSeqFeatures(showSequenceFeatures);
+ copyThis.setShowSeqFeatures(showSequenceFeatures);
+ FeatureRenderer myFeatureStyling = alignPanel.getSeqPanel().seqCanvas
+ .getFeatureRenderer();
- /*
- * copy feature rendering settings to split frame
- */
- newFrame.alignPanel.getSeqPanel().seqCanvas
- .getFeatureRenderer()
- .transferSettings(myFeatureStyling);
- copyThis.alignPanel.getSeqPanel().seqCanvas
- .getFeatureRenderer()
- .transferSettings(myFeatureStyling);
+ /*
+ * copy feature rendering settings to split frame
+ */
+ newFrame.alignPanel.getSeqPanel().seqCanvas.getFeatureRenderer()
+ .transferSettings(myFeatureStyling);
+ copyThis.alignPanel.getSeqPanel().seqCanvas.getFeatureRenderer()
+ .transferSettings(myFeatureStyling);
- /*
- * apply 'database source' feature configuration
- * if any was found
- */
- // TODO is this the feature colouring for the original
- // alignment or the fetched xrefs? either could be Ensembl
- newFrame.getViewport().applyFeaturesStyle(featureColourScheme);
- copyThis.getViewport().applyFeaturesStyle(featureColourScheme);
-
- SplitFrame sf = new SplitFrame(dna ? copyThis : newFrame,
- dna ? newFrame : copyThis);
- newFrame.setVisible(true);
- copyThis.setVisible(true);
- String linkedTitle = MessageManager
- .getString("label.linked_view_title");
- Desktop.addInternalFrame(sf, linkedTitle, -1, -1);
- sf.adjustDivider();
- }
- } catch (Exception e)
- {
- Cache.log.error("Exception when finding crossreferences", e);
+ /*
+ * apply 'database source' feature configuration
+ * if any was found
+ */
+ // TODO is this the feature colouring for the original
+ // alignment or the fetched xrefs? either could be Ensembl
+ newFrame.getViewport().applyFeaturesStyle(featureColourScheme);
+ copyThis.getViewport().applyFeaturesStyle(featureColourScheme);
+
+ SplitFrame sf = new SplitFrame(dna ? copyThis : newFrame,
+ dna ? newFrame : copyThis);
+ newFrame.setVisible(true);
+ copyThis.setVisible(true);
+ String linkedTitle = MessageManager
+ .getString("label.linked_view_title");
+ Desktop.addInternalFrame(sf, linkedTitle, -1, -1);
+ sf.adjustDivider();
} catch (OutOfMemoryError e)
{
new OOMWarning("whilst fetching crossreferences", e);
}
/**
- * Makes an alignment containing the given sequences. If this is of the
- * same type as the given dataset (nucleotide/protein), then the new
- * alignment shares the same dataset, and its dataset sequences are added
- * to it. Otherwise a new dataset sequence is created for the
- * cross-references.
+ * Makes an alignment containing the given sequences, and adds them to the
+ * given dataset, which is also set as the dataset for the new alignment
*
* @param dataset
* @param seqs
protected AlignmentI makeCrossReferencesAlignment(AlignmentI dataset,
AlignmentI seqs)
{
- boolean sameType = dataset.isNucleotide() == seqs.isNucleotide();
-
SequenceI[] sprods = new SequenceI[seqs.getHeight()];
for (int s = 0; s < sprods.length; s++)
{
sprods[s] = (seqs.getSequenceAt(s)).deriveSequence();
- if (sameType)
+ if (dataset.getSequences() == null
+ || !dataset.getSequences().contains(
+ sprods[s].getDatasetSequence()))
{
- if (dataset.getSequences() == null
- || !dataset.getSequences().contains(
- sprods[s].getDatasetSequence()))
- {
- dataset.addSequence(sprods[s].getDatasetSequence());
- }
+ dataset.addSequence(sprods[s].getDatasetSequence());
}
sprods[s].updatePDBIds();
}
Alignment al = new Alignment(sprods);
- if (sameType)
- {
- al.setDataset((Alignment) dataset);
- }
- else
- {
- al.createDatasetAlignment();
- }
+ al.setDataset(dataset);
return al;
}
SequenceI[] orderedSeqs = tmpseqs
.toArray(new SequenceI[tmpseqs.size()]);
- Alignment al = new Alignment(orderedSeqs);
+ AlignmentI al = new Alignment(orderedSeqs);
if (referenceseqForView != null)
{
}
AlignFrame loadViewport(String file, JSeq[] JSEQ,
- List<SequenceI> hiddenSeqs, Alignment al,
+ List<SequenceI> hiddenSeqs, AlignmentI al,
JalviewModelSequence jms, Viewport view, String uniqueSeqSetId,
String viewId, List<JvAnnotRow> autoAlan)
{
}
private ColourSchemeI constructAnnotationColour(
- AnnotationColours viewAnnColour, AlignFrame af, Alignment al,
+ AnnotationColours viewAnnColour, AlignFrame af, AlignmentI al,
JalviewModelSequence jms, boolean checkGroupAnnColour)
{
boolean propagateAnnColour = false;
return cs;
}
- private void reorderAutoannotation(AlignFrame af, Alignment al,
+ private void reorderAutoannotation(AlignFrame af, AlignmentI al,
List<JvAnnotRow> autoAlan)
{
// copy over visualization settings for autocalculated annotation in the
}
}
- private void recoverDatasetFor(SequenceSet vamsasSet, Alignment al,
+ private void recoverDatasetFor(SequenceSet vamsasSet, AlignmentI al,
boolean ignoreUnrefed)
{
- jalview.datamodel.Alignment ds = getDatasetFor(vamsasSet.getDatasetId());
+ jalview.datamodel.AlignmentI ds = getDatasetFor(vamsasSet
+ .getDatasetId());
Vector dseqs = null;
if (ds == null)
{
* TODO use AlignmentI here and in related methods - needs
* AlignmentI.getDataset() changed to return AlignmentI instead of Alignment
*/
- Hashtable<String, Alignment> datasetIds = null;
+ Hashtable<String, AlignmentI> datasetIds = null;
- IdentityHashMap<Alignment, String> dataset2Ids = null;
+ IdentityHashMap<AlignmentI, String> dataset2Ids = null;
- private Alignment getDatasetFor(String datasetId)
+ private AlignmentI getDatasetFor(String datasetId)
{
if (datasetIds == null)
{
- datasetIds = new Hashtable<String, Alignment>();
+ datasetIds = new Hashtable<String, AlignmentI>();
return null;
}
if (datasetIds.containsKey(datasetId))
return null;
}
- private void addDatasetRef(String datasetId, Alignment dataset)
+ private void addDatasetRef(String datasetId, AlignmentI dataset)
{
if (datasetIds == null)
{
- datasetIds = new Hashtable<String, Alignment>();
+ datasetIds = new Hashtable<String, AlignmentI>();
}
datasetIds.put(datasetId, dataset);
}
* @param dataset
* @return
*/
- private String getDatasetIdRef(Alignment dataset)
+ private String getDatasetIdRef(AlignmentI dataset)
{
if (dataset.getDataset() != null)
{
// make a new datasetId and record it
if (dataset2Ids == null)
{
- dataset2Ids = new IdentityHashMap<Alignment, String>();
+ dataset2Ids = new IdentityHashMap<AlignmentI, String>();
}
else
{
package jalview.gui;
import jalview.datamodel.Alignment;
+import jalview.datamodel.AlignmentI;
import jalview.datamodel.AlignmentView;
import jalview.datamodel.ColumnSelection;
import jalview.datamodel.SeqCigar;
{
// AlignmentOrder origorder = new AlignmentOrder(alAndColsel[0]);
- Alignment al = new Alignment((SequenceI[]) alAndColsel[0]);
- Alignment dataset = (av != null && av.getAlignment() != null) ? av
+ AlignmentI al = new Alignment((SequenceI[]) alAndColsel[0]);
+ AlignmentI dataset = (av != null && av.getAlignment() != null) ? av
.getAlignment().getDataset() : null;
if (dataset != null)
{
Cache.log.info(
"Error retrieving " + accession
+ " from " + proxy.getDbName(), e);
- } finally
- {
- return success;
}
+ return success;
}
/**
for (String q : queries)
{
- DBRefEntry[] found = null;
DBRefEntry dbr = new DBRefEntry();
dbr.setSource(proxy.getDbSource());
dbr.setVersion(null);
{
if (rs[r] != null)
{
- found = DBRefUtils.searchRefs(rs[r].getDBRefs(), accId);
- if (found != null && found.length > 0)
+ List<DBRefEntry> found = DBRefUtils.searchRefs(rs[r].getDBRefs(),
+ accId);
+ if (!found.isEmpty())
{
rfound = true;
break;
{
// AlignmentOrder origorder = new AlignmentOrder(alAndColsel[0]);
- Alignment al = new Alignment((SequenceI[]) alAndColsel[0]);
- Alignment dataset = (av != null && av.getAlignment() != null) ? av
+ AlignmentI al = new Alignment((SequenceI[]) alAndColsel[0]);
+ AlignmentI dataset = (av != null && av.getAlignment() != null) ? av
.getAlignment().getDataset() : null;
if (dataset != null)
{
}
/**
+ * Overloaded method signature to test whether a single sequence is nucleotide
+ * (that is, more than 85% CGTA)
+ *
+ * @param seq
+ * @return
+ */
+ public static final boolean isNucleotide(SequenceI seq)
+ {
+ return isNucleotide(new SequenceI[] { seq });
+ }
+
+ /**
* Answers true if more than 85% of the sequence residues (ignoring gaps) are
* A, G, C, T or U, else false. This is just a heuristic guess and may give a
* wrong answer (as AGCT are also amino acid codes).
}
/**
+ * Returns those DBRefEntry objects whose source identifier (once converted to
+ * Jalview's canonical form) is in the list of sources to search for. Returns
+ * null if no matches found.
*
* @param dbrefs
- * array of DBRef objects to search
+ * DBRefEntry objects to search
* @param sources
- * String[] array of source DBRef IDs to retrieve
+ * array of sources to select
* @return
*/
public static DBRefEntry[] selectRefs(DBRefEntry[] dbrefs,
}
/**
- * Returns an array of those references that match the given entry, or null if
- * no matches. Currently uses a comparator which matches if
+ * Returns a (possibly empty) list of those references that match the given
+ * entry. Currently uses a comparator which matches if
* <ul>
* <li>database sources are the same</li>
* <li>accession ids are the same</li>
* pattern to match
* @return
*/
- public static DBRefEntry[] searchRefs(DBRefEntry[] ref, DBRefEntry entry)
+ public static List<DBRefEntry> searchRefs(DBRefEntry[] ref,
+ DBRefEntry entry)
{
return searchRefs(ref, entry,
matchDbAndIdAndEitherMapOrEquivalentMapList);
}
/**
- * Returns an array of those references that match the given accession id
+ * Returns a list of those references that match the given accession id
* <ul>
* <li>database sources are the same</li>
* <li>accession ids are the same</li>
* <li>both have no mapping, or the mappings are the same</li>
* </ul>
*
- * @param ref
+ * @param refs
* Set of references to search
- * @param entry
- * pattern to match
+ * @param accId
+ * accession id to match
* @return
*/
- public static DBRefEntry[] searchRefs(DBRefEntry[] ref, String accId)
+ public static List<DBRefEntry> searchRefs(DBRefEntry[] refs, String accId)
{
- return searchRefs(ref, new DBRefEntry("", "", accId), matchId);
+ return searchRefs(refs, new DBRefEntry("", "", accId), matchId);
}
/**
- * Returns an array of those references that match the given entry, according
- * to the given comparator. Returns null if no matches.
+ * Returns a (possibly empty) list of those references that match the given
+ * entry, according to the given comparator.
*
* @param refs
* an array of database references to search
* @param comparator
* @return
*/
- static DBRefEntry[] searchRefs(DBRefEntry[] refs, DBRefEntry entry,
+ static List<DBRefEntry> searchRefs(DBRefEntry[] refs, DBRefEntry entry,
DbRefComp comparator)
{
+ List<DBRefEntry> rfs = new ArrayList<DBRefEntry>();
if (refs == null || entry == null)
{
- return null;
+ return rfs;
}
- List<DBRefEntry> rfs = new ArrayList<DBRefEntry>();
for (int i = 0; i < refs.length; i++)
{
if (comparator.matches(entry, refs[i]))
rfs.add(refs[i]);
}
}
- return rfs.size() == 0 ? null : rfs.toArray(new DBRefEntry[rfs.size()]);
+ return rfs;
}
interface DbRefComp
};
/**
- * accession ID and DB must be identical. Version is ignored. No map on either
- * or map but no maplist on either or maplist of map on a is equivalent to the
- * maplist of map on b.
+ * accession ID and DB must be identical, or null on a. Version is ignored. No
+ * map on either or map but no maplist on either or maplist of map on a is
+ * equivalent to the maplist of map on b.
*/
public static DbRefComp matchDbAndIdAndEitherMapOrEquivalentMapList = new DbRefComp()
{
&& refb.getSource().equals(refa.getSource()))
{
// We dont care about version
- if (refa.getAccessionId() != null && refb.getAccessionId() != null
- && refb.getAccessionId().equals(refa.getAccessionId()))
+
+ if (refa.getAccessionId() == null
+ || refa.getAccessionId().equals(refb.getAccessionId()))
{
if (refa.getMap() == null || refb.getMap() == null)
{
|| (refb.getMap().getMap() != null
&& refa.getMap().getMap() != null && (refb
.getMap().getMap().equals(refa.getMap().getMap()))))
- { // getMap().getMap().containsEither(false,refa.getMap().getMap())
+ {
return true;
}
}
return (o1 == null ? o2.equals(o1) : o1.equals(o2));
}
+ /**
+ * Selects just the DNA or protein references from a set of references
+ *
+ * @param selectDna
+ * if true, select references to 'standard' DNA databases, else to
+ * 'standard' peptide databases
+ * @param refs
+ * a set of references to select from
+ * @return
+ */
+ public static DBRefEntry[] selectDbRefs(boolean selectDna,
+ DBRefEntry[] refs)
+ {
+ return selectRefs(refs, selectDna ? DBRefSource.DNACODINGDBS
+ : DBRefSource.PROTEINDBS);
+ // could attempt to find other cross
+ // refs here - ie PDB xrefs
+ // (not dna, not protein seq)
+ }
+
+ /**
+ * Returns the (possibly empty) list of those supplied dbrefs which have the
+ * specified source databse
+ *
+ * @param dbRefs
+ * @param source
+ * @return
+ */
+ public static List<DBRefEntry> searchRefsForSource(DBRefEntry[] dbRefs,
+ String source)
+ {
+ List<DBRefEntry> matches = new ArrayList<DBRefEntry>();
+ if (dbRefs != null && source != null)
+ {
+ for (DBRefEntry dbref : dbRefs)
+ {
+ if (source.equals(dbref.getSource()))
+ {
+ matches.add(dbref);
+ }
+ }
+ }
+ return matches;
+ }
+
}
@Override
public boolean equals(Object o)
{
- // TODO should also override hashCode to ensure equal objects have equal
- // hashcodes
if (o == null || !(o instanceof MapList))
{
return false;
}
/**
+ * Returns a hashcode made from the fromRatio, toRatio, and from/to ranges
+ */
+ @Override
+ public int hashCode()
+ {
+ int hashCode = 31 * fromRatio;
+ hashCode = 31 * hashCode + toRatio;
+ hashCode = 31 * hashCode + fromShifts.toArray().hashCode();
+ hashCode = 31 * hashCode + toShifts.toArray().hashCode();
+ return hashCode;
+ }
+
+ /**
* Returns the 'from' ranges as {[start1, end1], [start2, end2], ...}
*
* @return
{
/*
* note lowest and highest values - bearing in mind the
- * direction may be revesed
+ * direction may be reversed
*/
fromLowest = Math.min(fromLowest, Math.min(from[i], from[i + 1]));
fromHighest = Math.max(fromHighest, Math.max(from[i], from[i + 1]));
*/
public void addMapList(MapList map)
{
+ if (this.equals(map))
+ {
+ return;
+ }
this.fromLowest = Math.min(fromLowest, map.fromLowest);
this.toLowest = Math.min(toLowest, map.toLowest);
this.fromHighest = Math.max(fromHighest, map.fromHighest);
}
return forwardStrand;
}
+
}
import jalview.util.MessageManager;
import jalview.viewmodel.seqfeatures.FeatureRendererSettings;
-import java.util.LinkedHashSet;
+import java.util.ArrayList;
import java.util.List;
-import java.util.Set;
public abstract class AWSThread extends Thread
{
/**
* dataset sequence relationships to be propagated onto new results
*/
- protected Set<AlignedCodonFrame> codonframe = null;
+ protected List<AlignedCodonFrame> codonframe = null;
/**
* are there jobs still running in this thread.
.getCodonFrames();
if (cf != null)
{
- codonframe = new LinkedHashSet<AlignedCodonFrame>();
+ codonframe = new ArrayList<AlignedCodonFrame>();
codonframe.addAll(cf);
}
}
--- /dev/null
+package jalview.ws;
+
+import jalview.ws.seqfetcher.ASequenceFetcher;
+
+public class SequenceFetcherFactory
+{
+
+ private static SequenceFetcher instance;
+
+ /**
+ * Returns a new SequenceFetcher object, or a mock object if one has been set
+ *
+ * @return
+ */
+ public static ASequenceFetcher getSequenceFetcher()
+ {
+ return instance == null ? new SequenceFetcher() : instance;
+ }
+
+ /**
+ * Set the instance object to use (intended for unit testing with mock
+ * objects).
+ *
+ * Be sure to reset to null in the tearDown method of any tests!
+ *
+ * @param sf
+ */
+ public static void setSequenceFetcher(SequenceFetcher sf)
+ {
+ instance = sf;
+ }
+}
*/
package jalview.ws.jws1;
-import jalview.datamodel.Alignment;
+import jalview.datamodel.AlignmentI;
import jalview.datamodel.AlignmentView;
import jalview.gui.AlignFrame;
import jalview.gui.Desktop;
import javax.swing.JMenuItem;
import javax.swing.JOptionPane;
-import ext.vamsas.MuscleWS;
import ext.vamsas.MuscleWSServiceLocator;
import ext.vamsas.MuscleWSSoapBindingStub;
import ext.vamsas.ServiceHandle;
public MsaWSClient(ext.vamsas.ServiceHandle sh, String altitle,
jalview.datamodel.AlignmentView msa, boolean submitGaps,
- boolean preserveOrder, Alignment seqdataset,
+ boolean preserveOrder, AlignmentI seqdataset,
AlignFrame _alignFrame)
{
super();
}
private void startMsaWSClient(String altitle, AlignmentView msa,
- boolean submitGaps, boolean preserveOrder, Alignment seqdataset)
+ boolean submitGaps, boolean preserveOrder, AlignmentI seqdataset)
{
if (!locateWebService())
{
try
{
- this.server = (MuscleWS) loc.getMuscleWS(new java.net.URL(WsURL));
+ this.server = loc.getMuscleWS(new java.net.URL(WsURL));
((MuscleWSSoapBindingStub) this.server).setTimeout(60000); // One minute
// timeout
} catch (Exception ex)
return (WebServiceName.indexOf("lustal") > -1); // cheat!
}
+ @Override
public void attachWSMenuEntry(JMenu msawsmenu,
final ServiceHandle serviceHandle, final AlignFrame alignFrame)
{
method.setToolTipText(WsURL);
method.addActionListener(new ActionListener()
{
+ @Override
public void actionPerformed(ActionEvent e)
{
AlignmentView msa = alignFrame.gatherSequencesForAlignment();
methodR.setToolTipText(WsURL);
methodR.addActionListener(new ActionListener()
{
+ @Override
public void actionPerformed(ActionEvent e)
{
AlignmentView msa = alignFrame.gatherSequencesForAlignment();
import jalview.analysis.AlignSeq;
import jalview.bin.Cache;
import jalview.datamodel.Alignment;
+import jalview.datamodel.AlignmentI;
import jalview.datamodel.AlignmentOrder;
import jalview.datamodel.AlignmentView;
import jalview.datamodel.ColumnSelection;
*
* @return true if getAlignment will return a valid alignment result.
*/
+ @Override
public boolean hasResults()
{
if (subjobComplete && result != null && result.isFinished()
*
* @return boolean true if job can be submitted.
*/
+ @Override
public boolean hasValidInput()
{
if (seqs.getSeqs() != null)
String alTitle; // name which will be used to form new alignment window.
- Alignment dataset; // dataset to which the new alignment will be
+ AlignmentI dataset; // dataset to which the new alignment will be
// associated.
MsaWSThread(ext.vamsas.MuscleWS server, String wsUrl,
WebserviceInfo wsinfo, jalview.gui.AlignFrame alFrame,
String wsname, String title, AlignmentView _msa, boolean subgaps,
- boolean presorder, Alignment seqset)
+ boolean presorder, AlignmentI seqset)
{
this(server, wsUrl, wsinfo, alFrame, _msa, wsname, subgaps, presorder);
OutputHeader = wsInfo.getProgressText();
}
}
+ @Override
public boolean isCancellable()
{
return true;
}
+ @Override
public void cancelJob()
{
if (!jobComplete && jobs != null)
}
}
+ @Override
public void pollJob(AWsJob job) throws Exception
{
((MsaWSJob) job).result = server.getResult(((MsaWSJob) job).getJobId());
}
+ @Override
public void StartJob(AWsJob job)
{
if (!(job instanceof MsaWSJob))
return msa;
}
+ @Override
public void parseResult()
{
int results = 0; // number of result sets received
wsInfo.showResultsNewFrame
.addActionListener(new java.awt.event.ActionListener()
{
+ @Override
public void actionPerformed(java.awt.event.ActionEvent evt)
{
displayResults(true);
wsInfo.mergeResults
.addActionListener(new java.awt.event.ActionListener()
{
+ @Override
public void actionPerformed(java.awt.event.ActionEvent evt)
{
displayResults(false);
while (j < l)
{
if (((AlignmentOrder) alorders.get(i))
- .equals(((AlignmentOrder) alorders.get(j))))
+ .equals((alorders.get(j))))
{
alorders.remove(j);
l--;
}
}
+ @Override
public boolean canMergeResults()
{
return false;
*/
package jalview.ws.jws1;
-import jalview.datamodel.Alignment;
+import jalview.datamodel.AlignmentI;
import jalview.datamodel.AlignmentView;
import jalview.gui.AlignFrame;
import jalview.gui.Desktop;
import javax.swing.JMenuItem;
import javax.swing.JOptionPane;
-import ext.vamsas.SeqSearchI;
import ext.vamsas.SeqSearchServiceLocator;
import ext.vamsas.SeqSearchServiceSoapBindingStub;
import ext.vamsas.ServiceHandle;
public SeqSearchWSClient(ext.vamsas.ServiceHandle sh, String altitle,
jalview.datamodel.AlignmentView msa, String db,
- Alignment seqdataset, AlignFrame _alignFrame)
+ AlignmentI seqdataset, AlignFrame _alignFrame)
{
super();
alignFrame = _alignFrame;
}
private void startSeqSearchClient(String altitle, AlignmentView msa,
- String db, Alignment seqdataset)
+ String db, AlignmentI seqdataset)
{
if (!locateWebService())
{
try
{
- this.server = (SeqSearchI) loc.getSeqSearchService(new java.net.URL(
+ this.server = loc.getSeqSearchService(new java.net.URL(
WsURL));
((SeqSearchServiceSoapBindingStub) this.server).setTimeout(60000); // One
// minute
return dbs;
}
+ @Override
public void attachWSMenuEntry(JMenu wsmenu, final ServiceHandle sh,
final AlignFrame af)
{
method.setToolTipText(sh.getEndpointURL());
method.addActionListener(new ActionListener()
{
+ @Override
public void actionPerformed(ActionEvent e)
{
// use same input gatherer as for secondary structure prediction
final String searchdb = dbs[db];
method.addActionListener(new ActionListener()
{
+ @Override
public void actionPerformed(ActionEvent e)
{
AlignmentView msa = af.gatherSeqOrMsaForSecStrPrediction();
import jalview.api.FeatureColourI;
import jalview.bin.Cache;
import jalview.datamodel.Alignment;
+import jalview.datamodel.AlignmentI;
import jalview.datamodel.AlignmentView;
import jalview.datamodel.SequenceI;
import jalview.gui.AlignFrame;
*
* @return null or { Alignment(+features and annotation), NewickFile)}
*/
- public Object[] getAlignment(Alignment dataset,
+ public Object[] getAlignment(AlignmentI dataset,
Map<String, FeatureColourI> featureColours)
{
String alTitle; // name which will be used to form new alignment window.
- Alignment dataset; // dataset to which the new alignment will be
+ AlignmentI dataset; // dataset to which the new alignment will be
// associated.
SeqSearchWSThread(ext.vamsas.SeqSearchI server, String wsUrl,
WebserviceInfo wsinfo, jalview.gui.AlignFrame alFrame,
String wsname, String title, AlignmentView _msa, String db,
- Alignment seqset)
+ AlignmentI seqset)
{
this(server, wsUrl, wsinfo, alFrame, _msa, wsname, db);
OutputHeader = wsInfo.getProgressText();
*/
package jalview.ws.jws2;
-import jalview.datamodel.Alignment;
+import jalview.datamodel.AlignmentI;
import jalview.datamodel.AlignmentView;
import jalview.gui.AlignFrame;
import jalview.gui.Desktop;
public MsaWSClient(Jws2Instance sh, String altitle,
jalview.datamodel.AlignmentView msa, boolean submitGaps,
- boolean preserveOrder, Alignment seqdataset,
+ boolean preserveOrder, AlignmentI seqdataset,
AlignFrame _alignFrame)
{
this(sh, null, null, false, altitle, msa, submitGaps, preserveOrder,
public MsaWSClient(Jws2Instance sh, WsParamSetI preset, String altitle,
jalview.datamodel.AlignmentView msa, boolean submitGaps,
- boolean preserveOrder, Alignment seqdataset,
+ boolean preserveOrder, AlignmentI seqdataset,
AlignFrame _alignFrame)
{
this(sh, preset, null, false, altitle, msa, submitGaps, preserveOrder,
public MsaWSClient(Jws2Instance sh, WsParamSetI preset,
List<Argument> arguments, boolean editParams, String altitle,
jalview.datamodel.AlignmentView msa, boolean submitGaps,
- boolean preserveOrder, Alignment seqdataset,
+ boolean preserveOrder, AlignmentI seqdataset,
AlignFrame _alignFrame)
{
super(_alignFrame, preset, arguments);
}
private void startMsaWSClient(String altitle, AlignmentView msa,
- boolean submitGaps, boolean preserveOrder, Alignment seqdataset)
+ boolean submitGaps, boolean preserveOrder, AlignmentI seqdataset)
{
// if (!locateWebService())
// {
*
* @return true if getAlignment will return a valid alignment result.
*/
+ @Override
public boolean hasResults()
{
if (subjobComplete
*
* @return boolean true if job can be submitted.
*/
+ @Override
public boolean hasValidInput()
{
// TODO: get attributes for this MsaWS instance to check if it can do two
String alTitle; // name which will be used to form new alignment window.
- Alignment dataset; // dataset to which the new alignment will be
+ AlignmentI dataset; // dataset to which the new alignment will be
// associated.
String wsUrl, WebserviceInfo wsinfo,
jalview.gui.AlignFrame alFrame, String wsname, String title,
AlignmentView _msa, boolean subgaps, boolean presorder,
- Alignment seqset)
+ AlignmentI seqset)
{
this(server2, wsUrl, wsinfo, alFrame, _msa, wsname, subgaps, presorder);
OutputHeader = wsInfo.getProgressText();
return validInput;
}
+ @Override
public boolean isCancellable()
{
return true;
}
+ @Override
public void cancelJob()
{
if (!jobComplete && jobs != null)
}
}
+ @Override
public void pollJob(AWsJob job) throws Exception
{
// TODO: investigate if we still need to cast here in J1.6
return changed;
}
+ @Override
public void StartJob(AWsJob job)
{
Exception lex = null;
}
}
+ @Override
public void parseResult()
{
long progbar = System.currentTimeMillis();
wsInfo.showResultsNewFrame
.addActionListener(new java.awt.event.ActionListener()
{
+ @Override
public void actionPerformed(java.awt.event.ActionEvent evt)
{
displayResults(true);
wsInfo.mergeResults
.addActionListener(new java.awt.event.ActionListener()
{
+ @Override
public void actionPerformed(java.awt.event.ActionEvent evt)
{
displayResults(false);
}
}
+ @Override
public boolean canMergeResults()
{
return false;
/**
* Constructor
*/
- public ASequenceFetcher()
+ protected ASequenceFetcher()
{
super();
* if true, only fetch from nucleotide data sources, else peptide
* @return
*/
- public SequenceI[] getSequences(DBRefEntry[] refs, boolean dna)
+ public SequenceI[] getSequences(List<DBRefEntry> refs, boolean dna)
{
Vector<SequenceI> rseqs = new Vector<SequenceI>();
Hashtable<String, List<String>> queries = new Hashtable<String, List<String>>();
- for (int r = 0; r < refs.length; r++)
+ for (DBRefEntry ref : refs)
{
- if (!queries.containsKey(refs[r].getSource()))
+ if (!queries.containsKey(ref.getSource()))
{
- queries.put(refs[r].getSource(), new ArrayList<String>());
+ queries.put(ref.getSource(), new ArrayList<String>());
}
- List<String> qset = queries.get(refs[r].getSource());
- if (!qset.contains(refs[r].getAccessionId()))
+ List<String> qset = queries.get(ref.getSource());
+ if (!qset.contains(ref.getAccessionId()))
{
- qset.add(refs[r].getAccessionId());
+ qset.add(ref.getAccessionId());
}
}
Enumeration<String> e = queries.keys();
for (int is = 0; is < seqs.length; is++)
{
rseqs.addElement(seqs[is]);
- DBRefEntry[] frefs = DBRefUtils.searchRefs(seqs[is]
+ List<DBRefEntry> frefs = DBRefUtils.searchRefs(seqs[is]
.getDBRefs(), new DBRefEntry(db, null, null));
- if (frefs != null)
+ for (DBRefEntry dbr : frefs)
{
- for (DBRefEntry dbr : frefs)
- {
- queriesFound.add(dbr.getAccessionId());
- queriesMade.remove(dbr.getAccessionId());
- }
+ queriesFound.add(dbr.getAccessionId());
+ queriesMade.remove(dbr.getAccessionId());
}
seqs[is] = null;
}
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
-import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
AlignmentI dna = new Alignment(new SequenceI[] { dna1, dna2 });
dna.setDataset(null);
- List<AlignedCodonFrame> mappings = new ArrayList<AlignedCodonFrame>();
MapList map = new MapList(new int[] { 4, 6, 10, 12 },
new int[] { 1, 2 }, 3, 1);
AlignedCodonFrame acf = new AlignedCodonFrame();
acf.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(), map);
- mappings.add(acf);
+ dna.addCodonFrame(acf);
map = new MapList(new int[] { 1, 3, 7, 9, 13, 15 }, new int[] { 1, 3 },
3, 1);
acf = new AlignedCodonFrame();
acf.addMap(dna2.getDatasetSequence(), pep2.getDatasetSequence(), map);
- mappings.add(acf);
+ dna.addCodonFrame(acf);
/*
* execute method under test:
*/
AlignmentI cds = AlignmentUtils.makeCdsAlignment(new SequenceI[] {
- dna1, dna2 }, mappings, dna);
+ dna1, dna2 }, dna.getDataset());
assertEquals(2, cds.getSequences().size());
- assertEquals("GGGTTT", cds.getSequenceAt(0)
- .getSequenceAsString());
- assertEquals("GGGTTTCCC", cds.getSequenceAt(1)
- .getSequenceAsString());
+ assertEquals("GGGTTT", cds.getSequenceAt(0).getSequenceAsString());
+ assertEquals("GGGTTTCCC", cds.getSequenceAt(1).getSequenceAsString());
/*
* verify shared, extended alignment dataset
.contains(cds.getSequenceAt(1).getDatasetSequence()));
/*
- * Verify mappings from CDS to peptide and cDNA to CDS
+ * Verify mappings from CDS to peptide, cDNA to CDS, and cDNA to peptide
* the mappings are on the shared alignment dataset
*/
- assertSame(dna.getCodonFrames(), cds.getCodonFrames());
- List<AlignedCodonFrame> cdsMappings = cds.getCodonFrames();
- assertEquals(2, cdsMappings.size());
-
+ List<AlignedCodonFrame> cdsMappings = cds.getDataset().getCodonFrames();
+ /*
+ * 6 mappings, 2*(DNA->CDS), 2*(DNA->Pep), 2*(CDS->Pep)
+ */
+ assertEquals(6, cdsMappings.size());
+
/*
+ * verify that mapping sets for dna and cds alignments are different
+ * [not current behaviour - all mappings are on the alignment dataset]
+ */
+ // select -> subselect type to test.
+ // Assert.assertNotSame(dna.getCodonFrames(), cds.getCodonFrames());
+ // assertEquals(4, dna.getCodonFrames().size());
+ // assertEquals(4, cds.getCodonFrames().size());
+
+ /*
+ * Two mappings involve pep1 (dna to pep1, cds to pep1)
* Mapping from pep1 to GGGTTT in first new exon sequence
*/
- List<AlignedCodonFrame> pep1Mapping = MappingUtils
+ List<AlignedCodonFrame> pep1Mappings = MappingUtils
.findMappingsForSequence(pep1, cdsMappings);
- assertEquals(1, pep1Mapping.size());
+ assertEquals(2, pep1Mappings.size());
+ List<AlignedCodonFrame> mappings = MappingUtils
+ .findMappingsForSequence(cds.getSequenceAt(0), pep1Mappings);
+ assertEquals(1, mappings.size());
+
// map G to GGG
- SearchResults sr = MappingUtils
- .buildSearchResults(pep1, 1, cdsMappings);
+ SearchResults sr = MappingUtils.buildSearchResults(pep1, 1, mappings);
assertEquals(1, sr.getResults().size());
Match m = sr.getResults().get(0);
- assertSame(cds.getSequenceAt(0).getDatasetSequence(),
- m.getSequence());
+ assertSame(cds.getSequenceAt(0).getDatasetSequence(), m.getSequence());
assertEquals(1, m.getStart());
assertEquals(3, m.getEnd());
// map F to TTT
- sr = MappingUtils.buildSearchResults(pep1, 2, cdsMappings);
+ sr = MappingUtils.buildSearchResults(pep1, 2, mappings);
m = sr.getResults().get(0);
- assertSame(cds.getSequenceAt(0).getDatasetSequence(),
- m.getSequence());
+ assertSame(cds.getSequenceAt(0).getDatasetSequence(), m.getSequence());
assertEquals(4, m.getStart());
assertEquals(6, m.getEnd());
/*
- * Mapping from pep2 to GGGTTTCCC in second new exon sequence
+ * Two mappings involve pep2 (dna to pep2, cds to pep2)
+ * Verify mapping from pep2 to GGGTTTCCC in second new exon sequence
*/
- List<AlignedCodonFrame> pep2Mapping = MappingUtils
+ List<AlignedCodonFrame> pep2Mappings = MappingUtils
.findMappingsForSequence(pep2, cdsMappings);
- assertEquals(1, pep2Mapping.size());
+ assertEquals(2, pep2Mappings.size());
+ mappings = MappingUtils.findMappingsForSequence(cds.getSequenceAt(1),
+ pep2Mappings);
+ assertEquals(1, mappings.size());
// map G to GGG
- sr = MappingUtils.buildSearchResults(pep2, 1, cdsMappings);
+ sr = MappingUtils.buildSearchResults(pep2, 1, mappings);
assertEquals(1, sr.getResults().size());
m = sr.getResults().get(0);
- assertSame(cds.getSequenceAt(1).getDatasetSequence(),
- m.getSequence());
+ assertSame(cds.getSequenceAt(1).getDatasetSequence(), m.getSequence());
assertEquals(1, m.getStart());
assertEquals(3, m.getEnd());
// map F to TTT
- sr = MappingUtils.buildSearchResults(pep2, 2, cdsMappings);
+ sr = MappingUtils.buildSearchResults(pep2, 2, mappings);
m = sr.getResults().get(0);
- assertSame(cds.getSequenceAt(1).getDatasetSequence(),
- m.getSequence());
+ assertSame(cds.getSequenceAt(1).getDatasetSequence(), m.getSequence());
assertEquals(4, m.getStart());
assertEquals(6, m.getEnd());
// map P to CCC
- sr = MappingUtils.buildSearchResults(pep2, 3, cdsMappings);
+ sr = MappingUtils.buildSearchResults(pep2, 3, mappings);
m = sr.getResults().get(0);
- assertSame(cds.getSequenceAt(1).getDatasetSequence(),
- m.getSequence());
+ assertSame(cds.getSequenceAt(1).getDatasetSequence(), m.getSequence());
assertEquals(7, m.getStart());
assertEquals(9, m.getEnd());
}
new DBRefEntry("EMBLCDS", "4", "A12347"));
/*
+ * Create the CDS alignment
+ */
+ AlignmentI dna = new Alignment(new SequenceI[] { dna1 });
+ dna.setDataset(null);
+
+ /*
* Make the mappings from dna to protein
*/
- List<AlignedCodonFrame> mappings = new ArrayList<AlignedCodonFrame>();
// map ...GGG...TTT to GF
MapList map = new MapList(new int[] { 4, 6, 10, 12 },
new int[] { 1, 2 }, 3, 1);
AlignedCodonFrame acf = new AlignedCodonFrame();
acf.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(), map);
- mappings.add(acf);
+ dna.addCodonFrame(acf);
// map aaa...ccc to KP
map = new MapList(new int[] { 1, 3, 7, 9 }, new int[] { 1, 2 }, 3, 1);
acf = new AlignedCodonFrame();
acf.addMap(dna1.getDatasetSequence(), pep2.getDatasetSequence(), map);
- mappings.add(acf);
+ dna.addCodonFrame(acf);
// map aaa......TTT to KF
map = new MapList(new int[] { 1, 3, 10, 12 }, new int[] { 1, 2 }, 3, 1);
acf = new AlignedCodonFrame();
acf.addMap(dna1.getDatasetSequence(), pep3.getDatasetSequence(), map);
- mappings.add(acf);
-
- /*
- * Create the CDS alignment; also augments the dna-to-protein mappings with
- * exon-to-protein and exon-to-dna mappings
- */
- AlignmentI dna = new Alignment(new SequenceI[] { dna1 });
- dna.setDataset(null);
+ dna.addCodonFrame(acf);
/*
* execute method under test
*/
AlignmentI cdsal = AlignmentUtils.makeCdsAlignment(
- new SequenceI[] { dna1 }, mappings, dna);
+ new SequenceI[] { dna1 }, dna.getDataset());
/*
* Verify we have 3 cds sequences, mapped to pep1/2/3 respectively
* Verify there are mappings from each cds sequence to its protein product
* and also to its dna source
*/
- Iterator<AlignedCodonFrame> newMappingsIterator = cdsal
- .getCodonFrames().iterator();
+ List<AlignedCodonFrame> newMappings = cdsal.getCodonFrames();
- // mappings for dna1 - exon1 - pep1
- AlignedCodonFrame cdsMapping = newMappingsIterator.next();
- List<Mapping> dnaMappings = cdsMapping.getMappingsFromSequence(dna1);
- assertEquals(3, dnaMappings.size());
- assertSame(cds.get(0).getDatasetSequence(), dnaMappings.get(0)
- .getTo());
- assertEquals("G(1) in CDS should map to G(4) in DNA", 4, dnaMappings
- .get(0).getMap().getToPosition(1));
- List<Mapping> peptideMappings = cdsMapping.getMappingsFromSequence(cds
- .get(0).getDatasetSequence());
- assertEquals(1, peptideMappings.size());
- assertSame(pep1.getDatasetSequence(), peptideMappings.get(0).getTo());
-
- // mappings for dna1 - cds2 - pep2
- assertSame(cds.get(1).getDatasetSequence(), dnaMappings.get(1)
- .getTo());
- assertEquals("c(4) in CDS should map to c(7) in DNA", 7, dnaMappings
- .get(1).getMap().getToPosition(4));
- peptideMappings = cdsMapping.getMappingsFromSequence(cds.get(1)
- .getDatasetSequence());
- assertEquals(1, peptideMappings.size());
- assertSame(pep2.getDatasetSequence(), peptideMappings.get(0).getTo());
-
- // mappings for dna1 - cds3 - pep3
- assertSame(cds.get(2).getDatasetSequence(), dnaMappings.get(2)
+ /*
+ * 6 mappings involve dna1 (to pep1/2/3, cds1/2/3)
+ */
+ List<AlignedCodonFrame> dnaMappings = MappingUtils
+ .findMappingsForSequence(dna1, newMappings);
+ assertEquals(6, dnaMappings.size());
+
+ /*
+ * dna1 to pep1
+ */
+ List<AlignedCodonFrame> mappings = MappingUtils
+ .findMappingsForSequence(pep1, dnaMappings);
+ assertEquals(1, mappings.size());
+ assertEquals(1, mappings.get(0).getMappings().size());
+ assertSame(pep1.getDatasetSequence(), mappings.get(0).getMappings()
+ .get(0).getMapping().getTo());
+
+ /*
+ * dna1 to cds1
+ */
+ List<AlignedCodonFrame> dnaToCds1Mappings = MappingUtils
+ .findMappingsForSequence(cds.get(0), dnaMappings);
+ Mapping mapping = dnaToCds1Mappings.get(0).getMappings().get(0)
+ .getMapping();
+ assertSame(cds.get(0).getDatasetSequence(), mapping
.getTo());
- assertEquals("T(4) in CDS should map to T(10) in DNA", 10, dnaMappings
- .get(2).getMap().getToPosition(4));
- peptideMappings = cdsMapping.getMappingsFromSequence(cds.get(2)
- .getDatasetSequence());
- assertEquals(1, peptideMappings.size());
- assertSame(pep3.getDatasetSequence(), peptideMappings.get(0).getTo());
+ assertEquals("G(1) in CDS should map to G(4) in DNA", 4, mapping
+ .getMap().getToPosition(1));
+
+ /*
+ * dna1 to pep2
+ */
+ mappings = MappingUtils.findMappingsForSequence(pep2, dnaMappings);
+ assertEquals(1, mappings.size());
+ assertEquals(1, mappings.get(0).getMappings().size());
+ assertSame(pep2.getDatasetSequence(), mappings.get(0).getMappings()
+ .get(0).getMapping().getTo());
+
+ /*
+ * dna1 to cds2
+ */
+ List<AlignedCodonFrame> dnaToCds2Mappings = MappingUtils
+ .findMappingsForSequence(cds.get(1), dnaMappings);
+ mapping = dnaToCds2Mappings.get(0).getMappings().get(0).getMapping();
+ assertSame(cds.get(1).getDatasetSequence(), mapping.getTo());
+ assertEquals("c(4) in CDS should map to c(7) in DNA", 7, mapping
+ .getMap().getToPosition(4));
+
+ /*
+ * dna1 to pep3
+ */
+ mappings = MappingUtils.findMappingsForSequence(pep3, dnaMappings);
+ assertEquals(1, mappings.size());
+ assertEquals(1, mappings.get(0).getMappings().size());
+ assertSame(pep3.getDatasetSequence(), mappings.get(0).getMappings()
+ .get(0).getMapping().getTo());
+
+ /*
+ * dna1 to cds3
+ */
+ List<AlignedCodonFrame> dnaToCds3Mappings = MappingUtils
+ .findMappingsForSequence(cds.get(2), dnaMappings);
+ mapping = dnaToCds3Mappings.get(0).getMappings().get(0).getMapping();
+ assertSame(cds.get(2).getDatasetSequence(), mapping.getTo());
+ assertEquals("T(4) in CDS should map to T(10) in DNA", 10, mapping
+ .getMap().getToPosition(4));
}
@Test(groups = { "Functional" })
null));
dna2.addSequenceFeature(new SequenceFeature("CDS", "cds", 16, 18, 0f,
null));
+
+ AlignmentI dna = new Alignment(new SequenceI[] { dna1, dna2, dna3 });
+ dna.setDataset(null);
- List<AlignedCodonFrame> mappings = new ArrayList<AlignedCodonFrame>();
MapList map = new MapList(new int[] { 4, 12, 16, 18 },
new int[] { 1, 4 }, 3, 1);
AlignedCodonFrame acf = new AlignedCodonFrame();
acf.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(), map);
- mappings.add(acf);
+ dna.addCodonFrame(acf);
map = new MapList(new int[] { 4, 8, 12, 12, 16, 18 },
new int[] { 1, 3 },
3, 1);
acf = new AlignedCodonFrame();
acf.addMap(dna2.getDatasetSequence(), pep2.getDatasetSequence(), map);
- mappings.add(acf);
+ dna.addCodonFrame(acf);
- AlignmentI dna = new Alignment(new SequenceI[] { dna1, dna2, dna3 });
- dna.setDataset(null);
AlignmentI cds = AlignmentUtils.makeCdsAlignment(new SequenceI[] {
- dna1, dna2, dna3 }, mappings, dna);
+ dna1, dna2, dna3 }, dna.getDataset());
List<SequenceI> cdsSeqs = cds.getSequences();
assertEquals(2, cdsSeqs.size());
assertEquals("GGGCCCTTTGGG", cdsSeqs.get(0).getSequenceAsString());
.contains(cdsSeqs.get(1).getDatasetSequence()));
/*
- * Verify updated mappings
+ * Verify 6 mappings: dna1 to cds1, cds1 to pep1, dna1 to pep1
+ * and the same for dna2/cds2/pep2
*/
- List<AlignedCodonFrame> cdsMappings = cds.getCodonFrames();
- assertEquals(2, cdsMappings.size());
+ List<AlignedCodonFrame> mappings = cds.getCodonFrames();
+ assertEquals(6, mappings.size());
/*
- * Mapping from pep1 to GGGTTT in first new CDS sequence
+ * 2 mappings involve pep1
*/
- List<AlignedCodonFrame> pep1Mapping = MappingUtils
- .findMappingsForSequence(pep1, cdsMappings);
- assertEquals(1, pep1Mapping.size());
+ List<AlignedCodonFrame> pep1Mappings = MappingUtils
+ .findMappingsForSequence(pep1, mappings);
+ assertEquals(2, pep1Mappings.size());
+
/*
+ * Get mapping of pep1 to cds1 and verify it
* maps GPFG to 1-3,4-6,7-9,10-12
*/
- SearchResults sr = MappingUtils
- .buildSearchResults(pep1, 1, cdsMappings);
+ List<AlignedCodonFrame> pep1CdsMappings = MappingUtils
+ .findMappingsForSequence(cds.getSequenceAt(0), pep1Mappings);
+ assertEquals(1, pep1CdsMappings.size());
+ SearchResults sr = MappingUtils.buildSearchResults(pep1, 1,
+ pep1CdsMappings);
assertEquals(1, sr.getResults().size());
Match m = sr.getResults().get(0);
assertEquals(cds.getSequenceAt(0).getDatasetSequence(),
m.getSequence());
assertEquals(1, m.getStart());
assertEquals(3, m.getEnd());
- sr = MappingUtils.buildSearchResults(pep1, 2, cdsMappings);
+ sr = MappingUtils.buildSearchResults(pep1, 2, pep1CdsMappings);
m = sr.getResults().get(0);
assertEquals(4, m.getStart());
assertEquals(6, m.getEnd());
- sr = MappingUtils.buildSearchResults(pep1, 3, cdsMappings);
+ sr = MappingUtils.buildSearchResults(pep1, 3, pep1CdsMappings);
m = sr.getResults().get(0);
assertEquals(7, m.getStart());
assertEquals(9, m.getEnd());
- sr = MappingUtils.buildSearchResults(pep1, 4, cdsMappings);
+ sr = MappingUtils.buildSearchResults(pep1, 4, pep1CdsMappings);
m = sr.getResults().get(0);
assertEquals(10, m.getStart());
assertEquals(12, m.getEnd());
/*
- * GPG in pep2 map to 1-3,4-6,7-9 in second CDS sequence
+ * Get mapping of pep2 to cds2 and verify it
+ * maps GPG in pep2 to 1-3,4-6,7-9 in second CDS sequence
*/
- List<AlignedCodonFrame> pep2Mapping = MappingUtils
- .findMappingsForSequence(pep2, cdsMappings);
- assertEquals(1, pep2Mapping.size());
- sr = MappingUtils.buildSearchResults(pep2, 1, cdsMappings);
+ List<AlignedCodonFrame> pep2Mappings = MappingUtils
+ .findMappingsForSequence(pep2, mappings);
+ assertEquals(2, pep2Mappings.size());
+ List<AlignedCodonFrame> pep2CdsMappings = MappingUtils
+ .findMappingsForSequence(cds.getSequenceAt(1), pep2Mappings);
+ assertEquals(1, pep2CdsMappings.size());
+ sr = MappingUtils.buildSearchResults(pep2, 1, pep2CdsMappings);
assertEquals(1, sr.getResults().size());
m = sr.getResults().get(0);
assertEquals(cds.getSequenceAt(1).getDatasetSequence(),
m.getSequence());
assertEquals(1, m.getStart());
assertEquals(3, m.getEnd());
- sr = MappingUtils.buildSearchResults(pep2, 2, cdsMappings);
+ sr = MappingUtils.buildSearchResults(pep2, 2, pep2CdsMappings);
m = sr.getResults().get(0);
assertEquals(4, m.getStart());
assertEquals(6, m.getEnd());
- sr = MappingUtils.buildSearchResults(pep2, 3, cdsMappings);
+ sr = MappingUtils.buildSearchResults(pep2, 3, pep2CdsMappings);
m = sr.getResults().get(0);
assertEquals(7, m.getStart());
assertEquals(9, m.getEnd());
package jalview.analysis;
import static org.testng.AssertJUnit.assertEquals;
+import static org.testng.AssertJUnit.assertFalse;
+import static org.testng.AssertJUnit.assertNotNull;
+import static org.testng.AssertJUnit.assertNotSame;
+import static org.testng.AssertJUnit.assertNull;
import static org.testng.AssertJUnit.assertSame;
+import static org.testng.AssertJUnit.assertTrue;
+import static org.testng.internal.junit.ArrayAsserts.assertArrayEquals;
+import jalview.datamodel.Alignment;
+import jalview.datamodel.AlignmentI;
import jalview.datamodel.DBRefEntry;
+import jalview.datamodel.Mapping;
+import jalview.datamodel.Sequence;
+import jalview.datamodel.SequenceFeature;
+import jalview.datamodel.SequenceI;
+import jalview.util.DBRefUtils;
+import jalview.util.MapList;
+import jalview.ws.SequenceFetcher;
+import jalview.ws.SequenceFetcherFactory;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.testng.annotations.AfterClass;
import org.testng.annotations.Test;
public class CrossRefTest
DBRefEntry ref6 = new DBRefEntry("emblCDS", "1", "A123");
DBRefEntry ref7 = new DBRefEntry("GeneDB", "1", "A123");
DBRefEntry ref8 = new DBRefEntry("PFAM", "1", "A123");
+ // ENSEMBL is a source of either dna or protein sequence data
+ DBRefEntry ref9 = new DBRefEntry("ENSEMBL", "1", "A123");
DBRefEntry[] refs = new DBRefEntry[] { ref1, ref2, ref3, ref4, ref5,
- ref6, ref7, ref8 };
+ ref6, ref7, ref8, ref9 };
/*
* Just the DNA refs:
*/
- DBRefEntry[] found = CrossRef.findXDbRefs(false, refs);
- assertEquals(3, found.length);
+ DBRefEntry[] found = DBRefUtils.selectDbRefs(true, refs);
+ assertEquals(4, found.length);
assertSame(ref5, found[0]);
assertSame(ref6, found[1]);
assertSame(ref7, found[2]);
+ assertSame(ref9, found[3]);
/*
* Just the protein refs:
*/
- found = CrossRef.findXDbRefs(true, refs);
- assertEquals(4, found.length);
+ found = DBRefUtils.selectDbRefs(false, refs);
+ assertEquals(5, found.length);
assertSame(ref1, found[0]);
assertSame(ref2, found[1]);
assertSame(ref3, found[2]);
assertSame(ref4, found[3]);
+ assertSame(ref9, found[4]);
+ }
+
+ /**
+ * Test the method that finds a sequence's "product" xref source databases,
+ * which may be direct (dbrefs on the sequence), or indirect (dbrefs on
+ * sequences which share a dbref with the sequence
+ */
+ @Test(groups = { "Functional" }, enabled = true)
+ public void testFindXrefSourcesForSequence_proteinToDna()
+ {
+ SequenceI seq = new Sequence("Seq1", "MGKYQARLSS");
+ List<String> sources = new ArrayList<String>();
+ AlignmentI al = new Alignment(new SequenceI[] {});
+
+ /*
+ * first with no dbrefs to search
+ */
+ sources = new CrossRef(new SequenceI[] { seq }, al)
+ .findXrefSourcesForSequences(false);
+ assertTrue(sources.isEmpty());
+
+ /*
+ * add some dbrefs to sequence
+ */
+ // protein db is not a candidate for findXrefSources
+ seq.addDBRef(new DBRefEntry("UNIPROT", "0", "A1234"));
+ // dna coding databatases are
+ seq.addDBRef(new DBRefEntry("EMBL", "0", "E2345"));
+ // a second EMBL xref should not result in a duplicate
+ seq.addDBRef(new DBRefEntry("EMBL", "0", "E2346"));
+ seq.addDBRef(new DBRefEntry("EMBLCDS", "0", "E2347"));
+ seq.addDBRef(new DBRefEntry("GENEDB", "0", "E2348"));
+ seq.addDBRef(new DBRefEntry("ENSEMBL", "0", "E2349"));
+ seq.addDBRef(new DBRefEntry("ENSEMBLGENOMES", "0", "E2350"));
+ sources = new CrossRef(new SequenceI[] { seq }, al)
+ .findXrefSourcesForSequences(false);
+ assertEquals(4, sources.size());
+ assertEquals("[EMBL, EMBLCDS, GENEDB, ENSEMBL]", sources.toString());
+
+ /*
+ * add a sequence to the alignment which has a dbref to UNIPROT|A1234
+ * and others to dna coding databases
+ */
+ sources.clear();
+ seq.setDBRefs(null);
+ seq.addDBRef(new DBRefEntry("UNIPROT", "0", "A1234"));
+ seq.addDBRef(new DBRefEntry("EMBLCDS", "0", "E2347"));
+ SequenceI seq2 = new Sequence("Seq2", "MGKYQARLSS");
+ seq2.addDBRef(new DBRefEntry("UNIPROT", "0", "A1234"));
+ seq2.addDBRef(new DBRefEntry("EMBL", "0", "E2345"));
+ seq2.addDBRef(new DBRefEntry("GENEDB", "0", "E2348"));
+ // TODO include ENSEMBLGENOMES in DBRefSource.DNACODINGDBS ?
+ al.addSequence(seq2);
+ sources = new CrossRef(new SequenceI[] { seq, seq2 }, al)
+ .findXrefSourcesForSequences(false);
+ assertEquals(3, sources.size());
+ assertEquals("[EMBLCDS, EMBL, GENEDB]", sources.toString());
+ }
+
+ /**
+ * Test for finding 'product' sequences for the case where only an indirect
+ * xref is found - not on the nucleotide sequence but on a peptide sequence in
+ * the alignment which which it shares a nucleotide dbref
+ */
+ @Test(groups = { "Functional" }, enabled = true)
+ public void testFindXrefSequences_indirectDbrefToProtein()
+ {
+ /*
+ * Alignment setup:
+ * - nucleotide dbref EMBL|AF039662
+ * - peptide dbrefs EMBL|AF039662, UNIPROT|Q9ZTS2
+ */
+ SequenceI emblSeq = new Sequence("AF039662", "GGGGCAGCACAAGAAC");
+ emblSeq.addDBRef(new DBRefEntry("EMBL", "0", "AF039662"));
+ SequenceI uniprotSeq = new Sequence("Q9ZTS2", "MASVSATMISTS");
+ uniprotSeq.addDBRef(new DBRefEntry("EMBL", "0", "AF039662"));
+ uniprotSeq.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2"));
+
+ /*
+ * Find UNIPROT xrefs for nucleotide
+ * - it has no UNIPROT dbref of its own
+ * - but peptide with matching nucleotide dbref does, so is returned
+ */
+ AlignmentI al = new Alignment(new SequenceI[] { emblSeq, uniprotSeq });
+ Alignment xrefs = new CrossRef(new SequenceI[] { emblSeq }, al)
+ .findXrefSequences("UNIPROT", true);
+ assertEquals(1, xrefs.getHeight());
+ assertSame(uniprotSeq, xrefs.getSequenceAt(0));
+ }
+
+ /**
+ * Test for finding 'product' sequences for the case where only an indirect
+ * xref is found - not on the peptide sequence but on a nucleotide sequence in
+ * the alignment which which it shares a protein dbref
+ */
+ @Test(groups = { "Functional" }, enabled = true)
+ public void testFindXrefSequences_indirectDbrefToNucleotide()
+ {
+ /*
+ * Alignment setup:
+ * - peptide dbref UNIPROT|Q9ZTS2
+ * - nucleotide dbref EMBL|AF039662, UNIPROT|Q9ZTS2
+ */
+ SequenceI uniprotSeq = new Sequence("Q9ZTS2", "MASVSATMISTS");
+ uniprotSeq.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2"));
+ SequenceI emblSeq = new Sequence("AF039662", "GGGGCAGCACAAGAAC");
+ emblSeq.addDBRef(new DBRefEntry("EMBL", "0", "AF039662"));
+ emblSeq.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2"));
+
+ /*
+ * find EMBL xrefs for peptide sequence - it has no direct
+ * dbrefs, but the 'corresponding' nucleotide sequence does, so is returned
+ */
+ /*
+ * Find EMBL xrefs for peptide
+ * - it has no EMBL dbref of its own
+ * - but nucleotide with matching peptide dbref does, so is returned
+ */
+ AlignmentI al = new Alignment(new SequenceI[] { emblSeq, uniprotSeq });
+ Alignment xrefs = new CrossRef(new SequenceI[] { uniprotSeq }, al)
+ .findXrefSequences("EMBL", false);
+ assertEquals(1, xrefs.getHeight());
+ assertSame(emblSeq, xrefs.getSequenceAt(0));
+ }
+
+ /**
+ * Test for finding 'product' sequences for the case where the selected
+ * sequence has no dbref to the desired source, and there are no indirect
+ * references via another sequence in the alignment
+ */
+ @Test(groups = { "Functional" })
+ public void testFindXrefSequences_noDbrefs()
+ {
+ /*
+ * two nucleotide sequences, one with UNIPROT dbref
+ */
+ SequenceI dna1 = new Sequence("AF039662", "GGGGCAGCACAAGAAC");
+ dna1.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2"));
+ SequenceI dna2 = new Sequence("AJ307031", "AAACCCTTT");
+
+ /*
+ * find UNIPROT xrefs for peptide sequence - it has no direct
+ * dbrefs, and the other sequence (which has a UNIPROT dbref) is not
+ * equatable to it, so no results found
+ */
+ AlignmentI al = new Alignment(new SequenceI[] { dna1, dna2 });
+ Alignment xrefs = new CrossRef(new SequenceI[] { dna2 }, al)
+ .findXrefSequences("UNIPROT", true);
+ assertNull(xrefs);
+ }
+
+ /**
+ * Tests for the method that searches an alignment (with one sequence
+ * excluded) for protein/nucleotide sequences with a given cross-reference
+ */
+ @Test(groups = { "Functional" }, enabled = true)
+ public void testSearchDataset()
+ {
+ /*
+ * nucleotide sequence with UNIPROT AND EMBL dbref
+ * peptide sequence with UNIPROT dbref
+ */
+ SequenceI dna1 = new Sequence("AF039662", "GGGGCAGCACAAGAAC");
+ dna1.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2"));
+ dna1.addDBRef(new DBRefEntry("EMBL", "0", "AF039662"));
+ SequenceI pep1 = new Sequence("Q9ZTS2", "MLAVSRGQ");
+ pep1.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2"));
+ AlignmentI al = new Alignment(new SequenceI[] { dna1, pep1 });
+
+ List<SequenceI> result = new ArrayList<SequenceI>();
+
+ /*
+ * first search for a dbref nowhere on the alignment:
+ */
+ DBRefEntry dbref = new DBRefEntry("UNIPROT", "0", "P30419");
+ CrossRef testee = new CrossRef(al.getSequencesArray(), al);
+ boolean found = testee.searchDataset(true, dna1, dbref, result, null,
+ true);
+ assertFalse(found);
+ assertTrue(result.isEmpty());
+
+ // TODO we are setting direct=true here but it is set to
+ // false in Jalview code...
+
+ /*
+ * search for a protein sequence with dbref UNIPROT:Q9ZTS2
+ */
+ dbref = new DBRefEntry("UNIPROT", "0", "Q9ZTS2");
+ found = testee.searchDataset(!dna1.isProtein(), dna1, dbref, result,
+ null, false); // search dataset with a protein xref from a dna
+ // sequence to locate the protein product
+ assertTrue(found);
+ assertEquals(1, result.size());
+ assertSame(pep1, result.get(0));
+
+ /*
+ * search for a nucleotide sequence with dbref UNIPROT:Q9ZTS2
+ */
+ result.clear();
+ dbref = new DBRefEntry("UNIPROT", "0", "Q9ZTS2");
+ found = testee.searchDataset(!pep1.isProtein(), pep1, dbref, result,
+ null, false); // search dataset with a protein's direct dbref to
+ // locate dna sequences with matching xref
+ assertTrue(found);
+ assertEquals(1, result.size());
+ assertSame(dna1, result.get(0));
+ }
+
+ /**
+ * Test for finding 'product' sequences for the case where the selected
+ * sequence has a dbref with a mapping to a sequence
+ */
+ @Test(groups = { "Functional" })
+ public void testFindXrefSequences_fromDbRefMap()
+ {
+ /*
+ * two peptide sequences each with a DBRef and SequenceFeature
+ */
+ SequenceI pep1 = new Sequence("Q9ZTS2", "MALFQRSV");
+ pep1.addDBRef(new DBRefEntry("Pfam", "0", "PF00111"));
+ pep1.addSequenceFeature(new SequenceFeature("type", "desc", 12, 14, 1f,
+ "group"));
+ SequenceI pep2 = new Sequence("P30419", "MTRRSQIF");
+ pep2.addDBRef(new DBRefEntry("PDB", "0", "3JTK"));
+ pep2.addSequenceFeature(new SequenceFeature("type2", "desc2", 13, 15,
+ 12f, "group2"));
+
+ /*
+ * nucleotide sequence (to go in the alignment)
+ */
+ SequenceI dna1 = new Sequence("AF039662", "GGGGCAGCACAAGAAC");
+
+ /*
+ * add DBRefEntry's to dna1 with mappings from dna to both peptides
+ */
+ MapList mapList = new MapList(new int[] { 1, 24 }, new int[] { 1, 3 },
+ 3, 1);
+ Mapping map = new Mapping(pep1, mapList);
+ DBRefEntry dbRef1 = new DBRefEntry("UNIPROT", "0", "Q9ZTS2", map);
+ dna1.addDBRef(dbRef1);
+ mapList = new MapList(new int[] { 1, 24 }, new int[] { 1, 3 }, 3, 1);
+ map = new Mapping(pep2, mapList);
+ DBRefEntry dbRef2 = new DBRefEntry("UNIPROT", "0", "P30419", map);
+ dna1.addDBRef(dbRef2);
+
+ /*
+ * find UNIPROT xrefs for nucleotide sequence - it should pick up
+ * mapped sequences
+ */
+ AlignmentI al = new Alignment(new SequenceI[] { dna1 });
+ Alignment xrefs = new CrossRef(new SequenceI[] { dna1 }, al)
+ .findXrefSequences("UNIPROT", true);
+ assertEquals(2, xrefs.getHeight());
+
+ /*
+ * cross-refs alignment holds copies of the mapped sequences
+ * including copies of their dbrefs and features
+ */
+ checkCopySequence(pep1, xrefs.getSequenceAt(0));
+ checkCopySequence(pep2, xrefs.getSequenceAt(1));
+ }
+
+ /**
+ * Helper method to assert seq1 looks like a copy of seq2
+ *
+ * @param seq1
+ * @param seq2
+ */
+ private void checkCopySequence(SequenceI seq1, SequenceI seq2)
+ {
+ assertNotSame(seq1, seq2);
+ assertEquals(seq1.getName(), seq2.getName());
+ assertEquals(seq1.getStart(), seq2.getStart());
+ assertEquals(seq1.getEnd(), seq2.getEnd());
+ assertEquals(seq1.getSequenceAsString(), seq2.getSequenceAsString());
+
+ /*
+ * compare dbrefs
+ */
+ assertArrayEquals(seq1.getDBRefs(), seq2.getDBRefs());
+ // check one to verify a copy, not the same object
+ if (seq1.getDBRefs().length > 0)
+ {
+ assertNotSame(seq1.getDBRefs()[0], seq2.getDBRefs()[0]);
+ }
+
+ /*
+ * compare features
+ */
+ assertArrayEquals(seq1.getSequenceFeatures(),
+ seq2.getSequenceFeatures());
+ if (seq1.getSequenceFeatures().length > 0)
+ {
+ assertNotSame(seq1.getSequenceFeatures()[0],
+ seq2.getSequenceFeatures()[0]);
+ }
+ }
+
+ /**
+ * Test for finding 'product' sequences for the case where the selected
+ * sequence has a dbref with no mapping, triggering a fetch from database
+ */
+ @Test(groups = { "Functional" })
+ public void testFindXrefSequences_withFetch()
+ {
+ SequenceI dna1 = new Sequence("AF039662", "GGGGCAGCACAAGAAC");
+ dna1.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2"));
+ dna1.addDBRef(new DBRefEntry("UNIPROT", "0", "P30419"));
+ dna1.addDBRef(new DBRefEntry("UNIPROT", "0", "P00314"));
+ final SequenceI pep1 = new Sequence("Q9ZTS2", "MYQLIRSSW");
+ final SequenceI pep2 = new Sequence("P00314", "MRKLLAASG");
+
+ /*
+ * argument false suppresses adding DAS sources
+ * todo: define an interface type SequenceFetcherI and mock that
+ */
+ SequenceFetcher mockFetcher = new SequenceFetcher(false)
+ {
+ @Override
+ public boolean isFetchable(String source)
+ {
+ return true;
+ }
+
+ @Override
+ public SequenceI[] getSequences(List<DBRefEntry> refs, boolean dna)
+ {
+ return new SequenceI[] { pep1, pep2 };
+ }
+ };
+ SequenceFetcherFactory.setSequenceFetcher(mockFetcher);
+
+ /*
+ * find UNIPROT xrefs for nucleotide sequence
+ */
+ AlignmentI al = new Alignment(new SequenceI[] { dna1 });
+ Alignment xrefs = new CrossRef(new SequenceI[] { dna1 }, al)
+ .findXrefSequences("UNIPROT", true);
+ assertEquals(2, xrefs.getHeight());
+ assertSame(pep1, xrefs.getSequenceAt(0));
+ assertSame(pep2, xrefs.getSequenceAt(1));
+ }
+
+ @AfterClass
+ public void tearDown()
+ {
+ SequenceFetcherFactory.setSequenceFetcher(null);
+ }
+
+ /**
+ * Test for finding 'product' sequences for the case where both gene and
+ * transcript sequences have dbrefs to Uniprot.
+ */
+ @Test(groups = { "Functional" })
+ public void testFindXrefSequences_forGeneAndTranscripts()
+ {
+ /*
+ * 'gene' sequence
+ */
+ SequenceI gene = new Sequence("ENSG00000157764", "CGCCTCCCTTCCCC");
+ gene.addDBRef(new DBRefEntry("UNIPROT", "0", "P15056"));
+ gene.addDBRef(new DBRefEntry("UNIPROT", "0", "H7C5K3"));
+
+ /*
+ * 'transcript' with CDS feature (supports mapping to protein)
+ */
+ SequenceI braf001 = new Sequence("ENST00000288602", "taagATGGCGGCGCTGa");
+ braf001.addDBRef(new DBRefEntry("UNIPROT", "0", "P15056"));
+ braf001.addSequenceFeature(new SequenceFeature("CDS", "", 5, 16, 0f,
+ null));
+
+ /*
+ * 'spliced transcript' with CDS ranges
+ */
+ SequenceI braf002 = new Sequence("ENST00000497784", "gCAGGCtaTCTGTTCaa");
+ braf002.addDBRef(new DBRefEntry("UNIPROT", "0", "H7C5K3"));
+ braf002.addSequenceFeature(new SequenceFeature("CDS", "", 2, 6, 0f,
+ null));
+ braf002.addSequenceFeature(new SequenceFeature("CDS", "", 9, 15, 0f,
+ null));
+
+ /*
+ * TODO code is fragile - use of SequenceIdMatcher depends on fetched
+ * sequences having a name starting Source|Accession
+ * which happens to be true for Uniprot,PDB,EMBL but not Pfam,Rfam,Ensembl
+ */
+ final SequenceI pep1 = new Sequence("UNIPROT|P15056", "MAAL");
+ final SequenceI pep2 = new Sequence("UNIPROT|H7C5K3", "QALF");
+
+ /*
+ * argument false suppresses adding DAS sources
+ * todo: define an interface type SequenceFetcherI and mock that
+ */
+ SequenceFetcher mockFetcher = new SequenceFetcher(false)
+ {
+ @Override
+ public boolean isFetchable(String source)
+ {
+ return true;
+ }
+
+ @Override
+ public SequenceI[] getSequences(List<DBRefEntry> refs, boolean dna)
+ {
+ return new SequenceI[] { pep1, pep2 };
+ }
+ };
+ SequenceFetcherFactory.setSequenceFetcher(mockFetcher);
+
+ /*
+ * find UNIPROT xrefs for gene and transcripts
+ * verify that
+ * - the two proteins are retrieved but not duplicated
+ * - mappings are built from transcript (CDS) to proteins
+ * - no mappings from gene to proteins
+ */
+ SequenceI[] seqs = new SequenceI[] { gene, braf001, braf002 };
+ AlignmentI al = new Alignment(seqs);
+ Alignment xrefs = new CrossRef(seqs, al).findXrefSequences("UNIPROT",
+ true);
+ assertEquals(2, xrefs.getHeight());
+ assertSame(pep1, xrefs.getSequenceAt(0));
+ assertSame(pep2, xrefs.getSequenceAt(1));
+ }
+
+ /**
+ * <pre>
+ * Test that emulates this (real but simplified) case:
+ * Alignment: DBrefs
+ * UNIPROT|P0CE19 EMBL|J03321, EMBL|X06707, EMBL|M19487
+ * UNIPROT|P0CE20 EMBL|J03321, EMBL|X06707, EMBL|X07547
+ * Find cross-references for EMBL. These are mocked here as
+ * EMBL|J03321 with mappings to P0CE18, P0CE19, P0CE20
+ * EMBL|X06707 with mappings to P0CE17, P0CE19, P0CE20
+ * EMBL|M19487 with mappings to P0CE19, Q46432
+ * EMBL|X07547 with mappings to P0CE20, B0BCM4
+ * EMBL sequences are first 'fetched' (mocked here) for P0CE19.
+ * The 3 EMBL sequences are added to the alignment dataset.
+ * Their dbrefs to Uniprot products P0CE19 and P0CE20 should be matched in the
+ * alignment dataset and updated to reference the original Uniprot sequences.
+ * For the second Uniprot sequence, the J03321 and X06707 xrefs should be
+ * resolved from the dataset, and only the X07547 dbref fetched.
+ * So the end state to verify is:
+ * - 4 cross-ref sequences returned: J03321, X06707, M19487, X07547
+ * - P0CE19/20 dbrefs to EMBL sequences now have mappings
+ * - J03321 dbrefs to P0CE19/20 mapped to original Uniprot sequences
+ * - X06707 dbrefs to P0CE19/20 mapped to original Uniprot sequences
+ * </pre>
+ */
+ @Test(groups = { "Functional" })
+ public void testFindXrefSequences_uniprotEmblManyToMany()
+ {
+ /*
+ * Uniprot sequences, both with xrefs to EMBL|J03321
+ * and EMBL|X07547
+ */
+ SequenceI p0ce19 = new Sequence("UNIPROT|P0CE19", "KPFG");
+ p0ce19.addDBRef(new DBRefEntry("EMBL", "0", "J03321"));
+ p0ce19.addDBRef(new DBRefEntry("EMBL", "0", "X06707"));
+ p0ce19.addDBRef(new DBRefEntry("EMBL", "0", "M19487"));
+ SequenceI p0ce20 = new Sequence("UNIPROT|P0CE20", "PFGK");
+ p0ce20.addDBRef(new DBRefEntry("EMBL", "0", "J03321"));
+ p0ce20.addDBRef(new DBRefEntry("EMBL", "0", "X06707"));
+ p0ce20.addDBRef(new DBRefEntry("EMBL", "0", "X07547"));
+
+ /*
+ * EMBL sequences to be 'fetched', complete with dbrefs and mappings
+ * to their protein products (CDS location and translations are provided
+ * in EMBL XML); these should be matched to, and replaced with,
+ * the corresponding uniprot sequences after fetching
+ */
+
+ /*
+ * J03321 with mappings to P0CE19 and P0CE20
+ */
+ final SequenceI j03321 = new Sequence("EMBL|J03321", "AAACCCTTTGGGAAAA");
+ DBRefEntry dbref1 = new DBRefEntry("UNIPROT", "0", "P0CE19");
+ MapList mapList = new MapList(new int[] { 1, 12 }, new int[] { 1, 4 },
+ 3, 1);
+ Mapping map = new Mapping(new Sequence("UNIPROT|P0CE19", "KPFG"),
+ mapList);
+ // add a dbref to the mapped to sequence - should get copied to p0ce19
+ map.getTo().addDBRef(new DBRefEntry("PIR", "0", "S01875"));
+ dbref1.setMap(map);
+ j03321.addDBRef(dbref1);
+ DBRefEntry dbref2 = new DBRefEntry("UNIPROT", "0", "P0CE20");
+ mapList = new MapList(new int[] { 4, 15 }, new int[] { 2, 5 }, 3, 1);
+ dbref2.setMap(new Mapping(new Sequence("UNIPROT|P0CE20", "PFGK"),
+ new MapList(mapList)));
+ j03321.addDBRef(dbref2);
+
+ /*
+ * X06707 with mappings to P0CE19 and P0CE20
+ */
+ final SequenceI x06707 = new Sequence("EMBL|X06707", "atgAAACCCTTTGGG");
+ DBRefEntry dbref3 = new DBRefEntry("UNIPROT", "0", "P0CE19");
+ MapList map2 = new MapList(new int[] { 4, 15 }, new int[] { 1, 4 }, 3,
+ 1);
+ dbref3.setMap(new Mapping(new Sequence("UNIPROT|P0CE19", "KPFG"), map2));
+ x06707.addDBRef(dbref3);
+ DBRefEntry dbref4 = new DBRefEntry("UNIPROT", "0", "P0CE20");
+ MapList map3 = new MapList(new int[] { 4, 15 }, new int[] { 1, 4 }, 3,
+ 1);
+ dbref4.setMap(new Mapping(new Sequence("UNIPROT|P0CE20", "PFGK"), map3));
+ x06707.addDBRef(dbref4);
+
+ /*
+ * M19487 with mapping to P0CE19 and Q46432
+ */
+ final SequenceI m19487 = new Sequence("EMBL|M19487", "AAACCCTTTGGG");
+ DBRefEntry dbref5 = new DBRefEntry("UNIPROT", "0", "P0CE19");
+ dbref5.setMap(new Mapping(new Sequence("UNIPROT|P0CE19", "KPFG"),
+ new MapList(mapList)));
+ m19487.addDBRef(dbref5);
+ DBRefEntry dbref6 = new DBRefEntry("UNIPROT", "0", "Q46432");
+ dbref6.setMap(new Mapping(new Sequence("UNIPROT|Q46432", "KPFG"),
+ new MapList(mapList)));
+ m19487.addDBRef(dbref6);
+
+ /*
+ * X07547 with mapping to P0CE20 and B0BCM4
+ */
+ final SequenceI x07547 = new Sequence("EMBL|X07547", "cccAAACCCTTTGGG");
+ DBRefEntry dbref7 = new DBRefEntry("UNIPROT", "0", "P0CE20");
+ dbref7.setMap(new Mapping(new Sequence("UNIPROT|P0CE19", "KPFG"),
+ new MapList(map2)));
+ x07547.addDBRef(dbref7);
+ DBRefEntry dbref8 = new DBRefEntry("UNIPROT", "0", "B0BCM4");
+ dbref8.setMap(new Mapping(new Sequence("UNIPROT|B0BCM4", "KPFG"),
+ new MapList(map2)));
+ x07547.addDBRef(dbref8);
+
+ /*
+ * mock sequence fetcher to 'return' the EMBL sequences
+ * TODO: Mockito would allow .thenReturn().thenReturn() here,
+ * and also capture and verification of the parameters
+ * passed in calls to getSequences() - important to verify that
+ * duplicate sequence fetches are not requested
+ */
+ SequenceFetcher mockFetcher = new SequenceFetcher(false)
+ {
+ int call = 0;
+
+ @Override
+ public boolean isFetchable(String source)
+ {
+ return true;
+ }
+
+ @Override
+ public SequenceI[] getSequences(List<DBRefEntry> refs, boolean dna)
+ {
+ call++;
+ if (call == 1)
+ {
+ assertEquals("Expected 3 embl seqs in first fetch", 3,
+ refs.size());
+ return new SequenceI[] { j03321, x06707, m19487 };
+ }
+ else
+ {
+ assertEquals("Expected 1 embl seq in second fetch", 1,
+ refs.size());
+ return new SequenceI[] { x07547 };
+ }
+ }
+ };
+ SequenceFetcherFactory.setSequenceFetcher(mockFetcher);
+
+ /*
+ * find EMBL xrefs for Uniprot seqs and verify that
+ * - the EMBL xref'd sequences are retrieved without duplicates
+ * - mappings are added to the Uniprot dbrefs
+ * - mappings in the EMBL-to-Uniprot dbrefs are updated to the
+ * alignment sequences
+ * - dbrefs on the EMBL sequences are added to the original dbrefs
+ */
+ SequenceI[] seqs = new SequenceI[] { p0ce19, p0ce20 };
+ AlignmentI al = new Alignment(seqs);
+ Alignment xrefs = new CrossRef(seqs, al).findXrefSequences("EMBL",
+ false);
+
+ /*
+ * verify retrieved sequences
+ */
+ assertNotNull(xrefs);
+ assertEquals(4, xrefs.getHeight());
+ assertSame(j03321, xrefs.getSequenceAt(0));
+ assertSame(x06707, xrefs.getSequenceAt(1));
+ assertSame(m19487, xrefs.getSequenceAt(2));
+ assertSame(x07547, xrefs.getSequenceAt(3));
+
+ /*
+ * verify mappings added to Uniprot-to-EMBL dbrefs
+ */
+ Mapping mapping = p0ce19.getDBRefs()[0].getMap();
+ assertSame(j03321, mapping.getTo());
+ mapping = p0ce19.getDBRefs()[1].getMap();
+ assertSame(x06707, mapping.getTo());
+ mapping = p0ce20.getDBRefs()[0].getMap();
+ assertSame(j03321, mapping.getTo());
+ mapping = p0ce20.getDBRefs()[1].getMap();
+ assertSame(x06707, mapping.getTo());
+
+ /*
+ * verify dbrefs on EMBL are mapped to alignment seqs
+ */
+ assertSame(p0ce19, j03321.getDBRefs()[0].getMap().getTo());
+ assertSame(p0ce20, j03321.getDBRefs()[1].getMap().getTo());
+ assertSame(p0ce19, x06707.getDBRefs()[0].getMap().getTo());
+ assertSame(p0ce20, x06707.getDBRefs()[1].getMap().getTo());
+
+ /*
+ * verify new dbref on EMBL dbref mapping is copied to the
+ * original Uniprot sequence
+ */
+ assertEquals(4, p0ce19.getDBRefs().length);
+ assertEquals("PIR", p0ce19.getDBRefs()[3].getSource());
+ assertEquals("S01875", p0ce19.getDBRefs()[3].getAccessionId());
}
+ @Test(groups = "Functional")
+ public void testSameSequence()
+ {
+ assertTrue(CrossRef.sameSequence(null, null));
+ SequenceI seq1 = new Sequence("seq1", "ABCDEF");
+ assertFalse(CrossRef.sameSequence(seq1, null));
+ assertFalse(CrossRef.sameSequence(null, seq1));
+ assertTrue(CrossRef.sameSequence(seq1, new Sequence("seq2", "ABCDEF")));
+ assertTrue(CrossRef.sameSequence(seq1, new Sequence("seq2", "abcdef")));
+ assertFalse(CrossRef
+ .sameSequence(seq1, new Sequence("seq2", "ABCDE-F")));
+ assertFalse(CrossRef.sameSequence(seq1, new Sequence("seq2", "BCDEF")));
+ }
}
* case insensitive matching
*/
assertTrue(testee.equals("a12345"));
+
+ testee = sequenceIdMatcher.new SeqIdName("UNIPROT|A12345");
+ assertFalse(testee.equals("A12345"));
+ assertFalse(testee.equals("UNIPROT|B98765"));
+ assertFalse(testee.equals("UNIPROT|"));
+ assertTrue(testee.equals("UNIPROT"));
}
}
assertArrayEquals(new int[] { 2, 2 },
acf.getMappedRegion(seq2, seq1, 6));
}
+
+ /**
+ * Tests for addMap. See also tests for MapList.addMapList
+ */
+ @Test(groups = { "Functional" })
+ public void testAddMap()
+ {
+ final Sequence seq1 = new Sequence("Seq1", "c-G-TA-gC-gT-T");
+ seq1.createDatasetSequence();
+ final Sequence aseq1 = new Sequence("Seq1", "-V-L");
+ aseq1.createDatasetSequence();
+
+ AlignedCodonFrame acf = new AlignedCodonFrame();
+ MapList map = new MapList(new int[] { 2, 4, 6, 6, 8, 9 }, new int[] {
+ 1, 2 }, 3, 1);
+ acf.addMap(seq1.getDatasetSequence(), aseq1.getDatasetSequence(), map);
+ assertEquals(1, acf.getMappingsFromSequence(seq1).size());
+ Mapping before = acf.getMappingsFromSequence(seq1).get(0);
+
+ /*
+ * add the same map again, verify it doesn't get duplicated
+ */
+ acf.addMap(seq1.getDatasetSequence(), aseq1.getDatasetSequence(), map);
+ assertEquals(1, acf.getMappingsFromSequence(seq1).size());
+ assertSame(before, acf.getMappingsFromSequence(seq1).get(0));
+ }
}
// TODO should the copy constructor copy the dataset?
// or make a new one referring to the same dataset sequences??
assertNull(copy.getDataset());
+ // TODO test metadata is copied when AlignmentI is a dataset
+
// assertArrayEquals(copy.getDataset().getSequencesArray(), protein
// .getDataset().getSequencesArray());
}
// TODO promote this method to AlignmentI
((Alignment) protein).createDatasetAlignment();
- // TODO this method should return AlignmentI not Alignment !!
- Alignment ds = protein.getDataset();
+ AlignmentI ds = protein.getDataset();
// side-effect: dataset created on second sequence
assertNotNull(protein.getSequenceAt(1).getDatasetSequence());
assertEquals("Gap interval 2 end wrong", 8, gapInt.get(1)[1]);
}
+ @Test(groups = ("Functional"))
+ public void testIsProtein()
+ {
+ // test Protein
+ assertTrue(new Sequence("prot","ASDFASDFASDF").isProtein());
+ // test DNA
+ assertFalse(new Sequence("prot","ACGTACGTACGT").isProtein());
+ // test RNA
+ SequenceI sq = new Sequence("prot","ACGUACGUACGU");
+ assertFalse(sq.isProtein());
+ // change sequence, should trigger an update of cached result
+ sq.setSequence("ASDFASDFADSF");
+ assertTrue(sq.isProtein());
+ }
@Test(groups = { "Functional" })
public void testGetAnnotation()
{
}
/**
+ * test createDatasetSequence behaves to doc
+ */
+ @Test(groups = { "Functional" })
+ public void testCreateDatasetSequence()
+ {
+ SequenceI sq = new Sequence("my","ASDASD");
+ assertNull(sq.getDatasetSequence());
+ SequenceI rds = sq.createDatasetSequence();
+ assertNotNull(rds);
+ assertNull(rds.getDatasetSequence());
+ assertEquals(sq.getDatasetSequence(), rds);
+ }
+
+ /**
* Test for deriveSequence applied to a sequence with a dataset
*/
@Test(groups = { "Functional" })
import jalview.io.FileLoader;
import jalview.io.FormatAdapter;
import jalview.structure.StructureSelectionManager;
+import jalview.util.MapList;
import java.util.ArrayList;
import java.util.List;
AlignFrame af1 = new FileLoader().LoadFileWaitTillLoaded(
">Seq1\nCAGT\n", FormatAdapter.PASTE);
+ SequenceI s1 = af1.getViewport().getAlignment().getSequenceAt(0);
AlignedCodonFrame acf1 = new AlignedCodonFrame();
+ acf1.addMap(s1, s1, new MapList(new int[] { 1, 4 }, new int[] { 1, 4 },
+ 1, 1));
AlignedCodonFrame acf2 = new AlignedCodonFrame();
+ acf2.addMap(s1, s1, new MapList(new int[] { 1, 4 }, new int[] { 4, 1 },
+ 1, 1));
List<AlignedCodonFrame> mappings = new ArrayList<AlignedCodonFrame>();
mappings.add(acf1);
">Seq1\nRSVQ\n", FormatAdapter.PASTE);
AlignFrame af2 = new FileLoader().LoadFileWaitTillLoaded(
">Seq2\nDGEL\n", FormatAdapter.PASTE);
-
+ SequenceI cs1 = new Sequence("cseq1", "CCCGGGTTTAAA");
+ SequenceI cs2 = new Sequence("cseq2", "CTTGAGTCTAGA");
+ SequenceI s1 = af1.getViewport().getAlignment().getSequenceAt(0);
+ SequenceI s2 = af2.getViewport().getAlignment().getSequenceAt(0);
+ // need to be distinct
AlignedCodonFrame acf1 = new AlignedCodonFrame();
+ acf1.addMap(cs1, s1, new MapList(new int[] { 1, 4 },
+ new int[] { 1, 12 }, 1, 3));
AlignedCodonFrame acf2 = new AlignedCodonFrame();
+ acf2.addMap(cs2, s2, new MapList(new int[] { 1, 4 },
+ new int[] { 1, 12 }, 1, 3));
AlignedCodonFrame acf3 = new AlignedCodonFrame();
+ acf3.addMap(cs2, cs2, new MapList(new int[] { 1, 12 }, new int[] { 1,
+ 12 }, 1, 1));
List<AlignedCodonFrame> mappings1 = new ArrayList<AlignedCodonFrame>();
mappings1.add(acf1);
">Seq1\nRSVQ\n", FormatAdapter.PASTE);
AlignFrame af2 = new FileLoader().LoadFileWaitTillLoaded(
">Seq2\nDGEL\n", FormatAdapter.PASTE);
-
+ SequenceI cs1 = new Sequence("cseq1", "CCCGGGTTTAAA");
+ SequenceI cs2 = new Sequence("cseq2", "CTTGAGTCTAGA");
+ SequenceI s1 = af1.getViewport().getAlignment().getSequenceAt(0);
+ SequenceI s2 = af2.getViewport().getAlignment().getSequenceAt(0);
+ // need to be distinct
AlignedCodonFrame acf1 = new AlignedCodonFrame();
+ acf1.addMap(cs1, s1, new MapList(new int[] { 1, 4 },
+ new int[] { 1, 12 }, 1, 3));
AlignedCodonFrame acf2 = new AlignedCodonFrame();
+ acf2.addMap(cs2, s2, new MapList(new int[] { 1, 4 },
+ new int[] { 1, 12 }, 1, 3));
AlignedCodonFrame acf3 = new AlignedCodonFrame();
+ acf3.addMap(cs2, cs2, new MapList(new int[] { 1, 12 }, new int[] { 1,
+ 12 }, 1, 1));
List<AlignedCodonFrame> mappings1 = new ArrayList<AlignedCodonFrame>();
mappings1.add(acf1);
public class UserColourSchemeTest
{
- @Test(groups = "functional")
+ @Test(groups = "Functional")
public void testGetColourFromString()
{
/*
import jalview.datamodel.SequenceI;
import jalview.io.FormatAdapter;
import jalview.io.StructureFile;
+import jalview.util.MapList;
import java.util.ArrayList;
import java.util.List;
public void testRegisterMapping()
{
AlignedCodonFrame acf1 = new AlignedCodonFrame();
+ acf1.addMap(new Sequence("s1", "ttt"), new Sequence("p1", "p"),
+ new MapList(new int[] { 1, 3 }, new int[] { 1, 1 }, 1, 1));
AlignedCodonFrame acf2 = new AlignedCodonFrame();
+ acf2.addMap(new Sequence("s2", "ttt"), new Sequence("p2", "p"),
+ new MapList(new int[] { 1, 3 }, new int[] { 1, 1 }, 1, 1));
ssm.registerMapping(acf1);
assertEquals(1, ssm.getSequenceMappings().size());
public void testRegisterMappings()
{
AlignedCodonFrame acf1 = new AlignedCodonFrame();
+ acf1.addMap(new Sequence("s1", "ttt"), new Sequence("p1", "p"),
+ new MapList(new int[] { 1, 3 }, new int[] { 1, 1 }, 1, 1));
AlignedCodonFrame acf2 = new AlignedCodonFrame();
+ acf2.addMap(new Sequence("s2", "ttt"), new Sequence("p2", "p"),
+ new MapList(new int[] { 1, 3 }, new int[] { 1, 1 }, 1, 1));
AlignedCodonFrame acf3 = new AlignedCodonFrame();
+ acf3.addMap(new Sequence("s3", "ttt"), new Sequence("p3", "p"),
+ new MapList(new int[] { 1, 3 }, new int[] { 1, 1 }, 1, 1));
List<AlignedCodonFrame> set1 = new ArrayList<AlignedCodonFrame>();
set1.add(acf1);
import jalview.datamodel.Sequence;
import jalview.datamodel.SequenceI;
+import java.util.List;
+
import org.testng.annotations.Test;
public class DBRefUtilsTest
ref5.setMap(new Mapping(new MapList(new int[] { 1, 1 }, new int[] { 1,
1 }, 1, 1)));
- DBRefEntry[] matches = DBRefUtils.searchRefs(new DBRefEntry[] { ref1,
+ List<DBRefEntry> matches = DBRefUtils.searchRefs(new DBRefEntry[] {
+ ref1,
ref2, ref3, ref4, ref5 }, target);
- assertEquals(3, matches.length);
- assertSame(ref1, matches[0]);
- assertSame(ref2, matches[1]);
- assertSame(ref5, matches[2]);
+ assertEquals(3, matches.size());
+ assertSame(ref1, matches.get(0));
+ assertSame(ref2, matches.get(1));
+ assertSame(ref5, matches.get(2));
}
/**
new int[] { 1, 1 }, 2, 2));
ref3.setMap(map3);
- DBRefEntry[] matches = DBRefUtils.searchRefs(new DBRefEntry[] { ref1,
+ List<DBRefEntry> matches = DBRefUtils.searchRefs(new DBRefEntry[] {
+ ref1,
ref2, ref3 }, target);
- assertEquals(2, matches.length);
- assertSame(ref1, matches[0]);
- assertSame(ref2, matches[1]);
+ assertEquals(2, matches.size());
+ assertSame(ref1, matches.get(0));
+ assertSame(ref2, matches.get(1));
}
/**
ref5.setMap(new Mapping(new MapList(new int[] { 1, 1 }, new int[] { 1,
1 }, 1, 1)));
- DBRefEntry[] matches = DBRefUtils.searchRefs(new DBRefEntry[] { ref1,
- ref2, ref3, ref4, ref5 }, "A1234");
- assertEquals(3, matches.length);
- assertSame(ref1, matches[0]);
- assertSame(ref2, matches[1]);
- assertSame(ref5, matches[2]);
+ DBRefEntry[] dbrefs = new DBRefEntry[] { ref1,
+ ref2, ref3, ref4, ref5 };
+ List<DBRefEntry> matches = DBRefUtils.searchRefs(dbrefs, "A1234");
+ assertEquals(3, matches.size());
+ assertSame(ref1, matches.get(0));
+ assertSame(ref2, matches.get(1));
+ assertSame(ref5, matches.get(2));
+ }
+
+ /**
+ * Test the method that searches for matches references - case when we are
+ * matching a reference with null (any) accession id
+ */
+ @Test(groups = { "Functional" })
+ public void testSearchRefs_wildcardAccessionid()
+ {
+ DBRefEntry target = new DBRefEntry("EMBL", "2", null);
+
+ DBRefEntry ref1 = new DBRefEntry("EMBL", "1", "A1234"); // matches
+ // constructor changes embl to EMBL
+ DBRefEntry ref2 = new DBRefEntry("embl", "1", "A1235"); // matches
+ // constructor does not upper-case accession id
+ DBRefEntry ref3 = new DBRefEntry("EMBL", "1", "A1236"); // matches
+ DBRefEntry ref4 = new DBRefEntry("EMBLCDS", "1", "A1234"); // no match
+ // ref5 matches although it has a mapping - ignored
+ DBRefEntry ref5 = new DBRefEntry("EMBL", "1", "A1237");
+ ref5.setMap(new Mapping(new MapList(new int[] { 1, 1 }, new int[] { 1,
+ 1 }, 1, 1)));
+
+ List<DBRefEntry> matches = DBRefUtils.searchRefs(new DBRefEntry[] {
+ ref1,
+ ref2, ref3, ref4, ref5 }, target);
+ assertEquals(4, matches.size());
+ assertSame(ref1, matches.get(0));
+ assertSame(ref2, matches.get(1));
+ assertSame(ref3, matches.get(2));
+ assertSame(ref5, matches.get(3));
}
}
s);
}
+ /**
+ * Test that confirms adding a map twice does nothing
+ */
+ @Test(groups = { "Functional" })
+ public void testAddMapList_sameMap()
+ {
+ MapList ml = new MapList(new int[] { 11, 15, 20, 25, 35, 30 },
+ new int[] { 72, 22 }, 1, 3);
+ String before = ml.toString();
+ ml.addMapList(ml);
+ assertEquals(before, ml.toString());
+ ml.addMapList(new MapList(ml));
+ assertEquals(before, ml.toString());
+ }
+
@Test(groups = { "Functional" })
public void testAddMapList_contiguous()
{
package jalview.ws;
+import jalview.analysis.CrossRef;
import jalview.datamodel.Alignment;
import jalview.datamodel.AlignmentI;
import jalview.datamodel.DBRefSource;
// TODO: extracted from SequenceFetcher - convert to proper unit test with
// assertions
- AlignmentI ds = null;
- Vector<Object[]> noProds = new Vector<Object[]>();
String usage = "SequenceFetcher.main [-nodas] [<DBNAME> [<ACCNO>]]\n"
+ "With no arguments, all DbSources will be queried with their test Accession number.\n"
+ "With one argument, the argument will be resolved to one or more db sources and each will be queried with their test accession only.\n"
{
List<DbSourceProxy> sps = new SequenceFetcher(withDas)
.getSourceProxy(argv[0]);
-
+
if (sps != null)
{
for (DbSourceProxy sp : sps)
AlignmentI al = null;
try
{
- al = sp.getSequenceRecords(argv.length > 1 ? argv[1] : sp
+ testRetrieval(argv[0], sp,
+ argv.length > 1 ? argv[1] : sp
.getTestQuery());
} catch (Exception e)
{
+ (argv.length > 1 ? argv[1] : sp.getTestQuery())
+ " from " + argv[0] + "\nUsage: " + usage);
}
- SequenceI[] prod = al.getSequencesArray();
- if (al != null)
- {
- for (int p = 0; p < prod.length; p++)
- {
- System.out.println("Prod " + p + ": "
- + prod[p].getDisplayId(true) + " : "
- + prod[p].getDescription());
- }
- }
}
return;
}
}
for (DbSourceProxy sp : sfetcher.getSourceProxy(db))
{
- System.out.println("Source: " + sp.getDbName() + " (" + db
- + "): retrieving test:" + sp.getTestQuery());
- AlignmentI al = null;
- try
+ testRetrieval(db, sp, sp.getTestQuery());
+ }
+ }
+
+ }
+
+ private static void testRetrieval(String db, DbSourceProxy sp,
+ String testQuery)
+ {
+ AlignmentI ds = null;
+ Vector<Object[]> noProds = new Vector<Object[]>();
+ System.out.println("Source: " + sp.getDbName() + " (" + db
+ + "): retrieving test:" + sp.getTestQuery());
+ {
+ AlignmentI al = null;
+ try
+ {
+ al = sp.getSequenceRecords(testQuery);
+ if (al != null && al.getHeight() > 0)
{
- al = sp.getSequenceRecords(sp.getTestQuery());
- if (al != null && al.getHeight() > 0)
+ boolean dna = sp.isDnaCoding();
+ al.setDataset(null);
+ AlignmentI alds = al.getDataset();
+ // try and find products
+ CrossRef crossRef = new CrossRef(al.getSequencesArray(), alds);
+ List<String> types = crossRef.findXrefSourcesForSequences(dna);
+ if (types != null)
{
- boolean dna = sp.isDnaCoding();
- // try and find products
- String types[] = jalview.analysis.CrossRef
- .findSequenceXrefTypes(dna, al.getSequencesArray());
- if (types != null)
+ System.out.println("Xref Types for: " + (dna ? "dna" : "prot"));
+ for (String source : types)
{
- System.out.println("Xref Types for: "
- + (dna ? "dna" : "prot"));
- for (int t = 0; t < types.length; t++)
+ System.out.println("Type: " + source);
+ SequenceI[] prod = crossRef.findXrefSequences(source, dna)
+ .getSequencesArray();
+ System.out.println("Found "
+ + ((prod == null) ? "no" : "" + prod.length)
+ + " products");
+ if (prod != null)
{
- System.out.println("Type: " + types[t]);
- SequenceI[] prod = jalview.analysis.CrossRef
- .findXrefSequences(al.getSequencesArray(), dna,
- types[t], null)
- .getSequencesArray();
- System.out.println("Found "
- + ((prod == null) ? "no" : "" + prod.length)
- + " products");
- if (prod != null)
+ for (int p = 0; p < prod.length; p++)
{
- for (int p = 0; p < prod.length; p++)
- {
- System.out.println("Prod " + p + ": "
- + prod[p].getDisplayId(true));
- }
+ System.out.println("Prod " + p + ": "
+ + prod[p].getDisplayId(true));
}
}
}
- else
- {
- noProds.addElement((dna ? new Object[] { al, al }
- : new Object[] { al }));
- }
-
- }
- } catch (Exception ex)
- {
- System.out.println("ERROR:Failed to retrieve test query.");
- ex.printStackTrace(System.out);
- }
-
- if (al == null)
- {
- System.out.println("ERROR:No alignment retrieved.");
- StringBuffer raw = sp.getRawRecords();
- if (raw != null)
- {
- System.out.println(raw.toString());
}
else
{
- System.out.println("ERROR:No Raw results.");
+ noProds.addElement((dna ? new Object[] { al, al }
+ : new Object[] { al }));
}
+
+ }
+ } catch (Exception ex)
+ {
+ System.out.println("ERROR:Failed to retrieve test query.");
+ ex.printStackTrace(System.out);
+ }
+
+ if (al == null)
+ {
+ System.out.println("ERROR:No alignment retrieved.");
+ StringBuffer raw = sp.getRawRecords();
+ if (raw != null)
+ {
+ System.out.println(raw.toString());
}
else
{
- System.out.println("Retrieved " + al.getHeight() + " sequences.");
- for (int s = 0; s < al.getHeight(); s++)
- {
- SequenceI sq = al.getSequenceAt(s);
- while (sq.getDatasetSequence() != null)
- {
- sq = sq.getDatasetSequence();
-
- }
- if (ds == null)
- {
- ds = new Alignment(new SequenceI[] { sq });
-
- }
- else
- {
- ds.addSequence(sq);
- }
- }
+ System.out.println("ERROR:No Raw results.");
+ }
+ }
+ else
+ {
+ System.out.println("Retrieved " + al.getHeight() + " sequences.");
+ if (ds == null)
+ {
+ ds = al.getDataset();
+ }
+ else
+ {
+ ds.append(al.getDataset());
+ al.setDataset(ds);
}
- System.out.flush();
- System.err.flush();
-
}
- if (noProds.size() > 0)
+ System.out.flush();
+ System.err.flush();
+ }
+ if (noProds.size() > 0)
+ {
+ Enumeration<Object[]> ts = noProds.elements();
+ while (ts.hasMoreElements())
+
{
- Enumeration<Object[]> ts = noProds.elements();
- while (ts.hasMoreElements())
-
+ Object[] typeSq = ts.nextElement();
+ boolean dna = (typeSq.length > 1);
+ AlignmentI al = (AlignmentI) typeSq[0];
+ System.out.println("Trying getProducts for "
+ + al.getSequenceAt(0).getDisplayId(true));
+ System.out.println("Search DS Xref for: " + (dna ? "dna" : "prot"));
+ // have a bash at finding the products amongst all the retrieved
+ // sequences.
+ SequenceI[] seqs = al.getSequencesArray();
+ Alignment prodal = new CrossRef(seqs, ds).findXrefSequences(null,
+ dna);
+ System.out.println("Found "
+ + ((prodal == null) ? "no" : "" + prodal.getHeight())
+ + " products");
+ if (prodal != null)
{
- Object[] typeSq = ts.nextElement();
- boolean dna = (typeSq.length > 1);
- AlignmentI al = (AlignmentI) typeSq[0];
- System.out.println("Trying getProducts for "
- + al.getSequenceAt(0).getDisplayId(true));
- System.out.println("Search DS Xref for: "
- + (dna ? "dna" : "prot"));
- // have a bash at finding the products amongst all the retrieved
- // sequences.
- SequenceI[] seqs = al.getSequencesArray();
- Alignment prodal = jalview.analysis.CrossRef.findXrefSequences(
- seqs, dna, null, ds);
- System.out.println("Found "
- + ((prodal == null) ? "no" : "" + prodal.getHeight())
- + " products");
- if (prodal != null)
+ SequenceI[] prod = prodal.getSequencesArray(); // note
+ // should
+ // test
+ // rather
+ // than
+ // throw
+ // away
+ // codon
+ // mapping
+ // (if
+ // present)
+ for (int p = 0; p < prod.length; p++)
{
- SequenceI[] prod = prodal.getSequencesArray(); // note
- // should
- // test
- // rather
- // than
- // throw
- // away
- // codon
- // mapping
- // (if
- // present)
- for (int p = 0; p < prod.length; p++)
- {
- System.out.println("Prod " + p + ": "
- + prod[p].getDisplayId(true));
- }
+ System.out.println("Prod " + p + ": "
+ + prod[p].getDisplayId(true));
}
}
-
}
-
}
}
-
}
.getMap().getMappedWidth(), 1);
assertEquals("Expected local reference map to be 3 nucleotides", dr[0]
.getMap().getWidth(), 3);
- AlignmentI sprods = CrossRef.findXrefSequences(
- alsq.getSequencesArray(), true, dr[0].getSource(), alsq);
+ AlignmentI sprods = new CrossRef(alsq.getSequencesArray(), alsq)
+ .findXrefSequences(dr[0].getSource(), true);
assertNotNull(
"Couldn't recover cross reference sequence from dataset. Was it ever added ?",
sprods);