import static jalview.io.gff.GffConstants.CLINICAL_SIGNIFICANCE;
+import jalview.api.DBRefEntryI;
import jalview.datamodel.AlignedCodon;
import jalview.datamodel.AlignedCodonFrame;
+import jalview.datamodel.AlignedCodonFrame.SequenceToSequenceMapping;
import jalview.datamodel.Alignment;
import jalview.datamodel.AlignmentAnnotation;
import jalview.datamodel.AlignmentI;
import jalview.io.gff.SequenceOntologyI;
import jalview.schemes.ResidueProperties;
import jalview.util.Comparison;
+import jalview.util.DBRefUtils;
import jalview.util.MapList;
import jalview.util.MappingUtils;
import jalview.util.StringUtils;
*/
public static int alignProteinAsDna(AlignmentI protein, AlignmentI dna)
{
+ if (protein.isNucleotide() || !dna.isNucleotide())
+ {
+ System.err.println("Wrong alignment type in alignProteinAsDna");
+ return 0;
+ }
List<SequenceI> unmappedProtein = new ArrayList<SequenceI>();
Map<AlignedCodon, Map<SequenceI, AlignedCodon>> alignedCodons = buildCodonColumnsMap(
protein, dna, unmappedProtein);
}
/**
+ * Realigns the given dna to match the alignment of the protein, using codon
+ * mappings to translate aligned peptide positions to codons.
+ *
+ * Always produces a padded CDS alignment.
+ *
+ * @param dna
+ * the alignment whose sequences are realigned by this method
+ * @param protein
+ * the protein alignment whose alignment we are 'copying'
+ * @return the number of sequences that were realigned
+ */
+ public static int alignCdsAsProtein(AlignmentI dna, AlignmentI protein)
+ {
+ if (protein.isNucleotide() || !dna.isNucleotide())
+ {
+ System.err.println("Wrong alignment type in alignProteinAsDna");
+ return 0;
+ }
+ // todo: implement this
+ List<AlignedCodonFrame> mappings = protein.getCodonFrames();
+ int alignedCount = 0;
+ int width = 0; // alignment width for padding CDS
+ for (SequenceI dnaSeq : dna.getSequences())
+ {
+ if (alignCdsSequenceAsProtein(dnaSeq, protein, mappings,
+ dna.getGapCharacter()))
+ {
+ alignedCount++;
+ }
+ width = Math.max(dnaSeq.getLength(), width);
+ }
+ int oldwidth, diff;
+ for (SequenceI dnaSeq : dna.getSequences())
+ {
+ oldwidth = dnaSeq.getLength();
+ diff = width - oldwidth;
+ if (diff > 0)
+ {
+ dnaSeq.insertCharAt(oldwidth, diff, dna.getGapCharacter());
+ }
+ }
+ return alignedCount;
+ }
+
+ /**
+ * Helper method to align (if possible) the dna sequence to match the
+ * alignment of a mapped protein sequence. This is currently limited to
+ * handling coding sequence only.
+ *
+ * @param cdsSeq
+ * @param protein
+ * @param mappings
+ * @param gapChar
+ * @return
+ */
+ static boolean alignCdsSequenceAsProtein(SequenceI cdsSeq,
+ AlignmentI protein, List<AlignedCodonFrame> mappings, char gapChar)
+ {
+ SequenceI cdsDss = cdsSeq.getDatasetSequence();
+ if (cdsDss == null)
+ {
+ System.err
+ .println("alignCdsSequenceAsProtein needs aligned sequence!");
+ return false;
+ }
+
+ List<AlignedCodonFrame> dnaMappings = MappingUtils
+ .findMappingsForSequence(cdsSeq, mappings);
+ for (AlignedCodonFrame mapping : dnaMappings)
+ {
+ SequenceI peptide = mapping.findAlignedSequence(cdsSeq, protein);
+ int peptideLength = peptide.getLength();
+ if (peptide != null)
+ {
+ Mapping map = mapping.getMappingBetween(cdsSeq, peptide);
+ if (map != null)
+ {
+ MapList mapList = map.getMap();
+ if (map.getTo() == peptide.getDatasetSequence())
+ {
+ mapList = mapList.getInverse();
+ }
+ int cdsLength = cdsDss.getLength();
+ int mappedFromLength = MappingUtils.getLength(mapList
+ .getFromRanges());
+ int mappedToLength = MappingUtils
+ .getLength(mapList.getToRanges());
+ boolean addStopCodon = (cdsLength == mappedFromLength * 3 + 3)
+ || (peptide.getDatasetSequence().getLength() == mappedFromLength - 1);
+ if (cdsLength != mappedToLength && !addStopCodon)
+ {
+ System.err
+ .println(String
+ .format("Can't align cds as protein (length mismatch %d/%d): %s",
+ cdsLength, mappedToLength,
+ cdsSeq.getName()));
+ }
+
+ /*
+ * pre-fill the aligned cds sequence with gaps
+ */
+ char[] alignedCds = new char[peptideLength * 3
+ + (addStopCodon ? 3 : 0)];
+ Arrays.fill(alignedCds, gapChar);
+
+ /*
+ * walk over the aligned peptide sequence and insert mapped
+ * codons for residues in the aligned cds sequence
+ */
+ char[] alignedPeptide = peptide.getSequence();
+ char[] nucleotides = cdsDss.getSequence();
+ int copiedBases = 0;
+ int cdsStart = cdsDss.getStart();
+ int proteinPos = peptide.getStart() - 1;
+ int cdsCol = 0;
+ for (char residue : alignedPeptide)
+ {
+ if (Comparison.isGap(residue))
+ {
+ cdsCol += 3;
+ }
+ else
+ {
+ proteinPos++;
+ int[] codon = mapList.locateInTo(proteinPos, proteinPos);
+ if (codon == null)
+ {
+ // e.g. incomplete start codon, X in peptide
+ cdsCol += 3;
+ }
+ else
+ {
+ for (int j = codon[0]; j <= codon[1]; j++)
+ {
+ char mappedBase = nucleotides[j - cdsStart];
+ alignedCds[cdsCol++] = mappedBase;
+ copiedBases++;
+ }
+ }
+ }
+ }
+
+ /*
+ * append stop codon if not mapped from protein,
+ * closing it up to the end of the mapped sequence
+ */
+ if (copiedBases == nucleotides.length - 3)
+ {
+ for (int i = alignedCds.length - 1; i >= 0; i--)
+ {
+ if (!Comparison.isGap(alignedCds[i]))
+ {
+ cdsCol = i + 1; // gap just after end of sequence
+ break;
+ }
+ }
+ for (int i = nucleotides.length - 3; i < nucleotides.length; i++)
+ {
+ alignedCds[cdsCol++] = nucleotides[i];
+ }
+ }
+ cdsSeq.setSequence(new String(alignedCds));
+ return true;
+ }
+ }
+ }
+ return false;
+ }
+
+ /**
* Builds a map whose key is an aligned codon position (3 alignment column
* numbers base 0), and whose value is a map from protein sequence to each
* protein's peptide residue for that codon. The map generates an ordering of
* added to the alignment dataset.
*
* @param dna
- * aligned dna sequences
- * @param mappings
- * from dna to protein
- * @param al
+ * aligned nucleotide (dna or cds) sequences
+ * @param dataset
+ * the alignment dataset the sequences belong to
+ * @param products
+ * (optional) to restrict results to CDS that map to specified
+ * protein products
* @return an alignment whose sequences are the cds-only parts of the dna
* sequences (or null if no mappings are found)
*/
public static AlignmentI makeCdsAlignment(SequenceI[] dna,
- List<AlignedCodonFrame> mappings, AlignmentI al)
+ AlignmentI dataset, SequenceI[] products)
{
+ if (dataset == null || dataset.getDataset() != null)
+ {
+ throw new IllegalArgumentException(
+ "IMPLEMENTATION ERROR: dataset.getDataset() must be null!");
+ }
+ List<SequenceI> foundSeqs = new ArrayList<SequenceI>();
List<SequenceI> cdsSeqs = new ArrayList<SequenceI>();
-
- for (SequenceI seq : dna)
+ List<AlignedCodonFrame> mappings = dataset.getCodonFrames();
+ HashSet<SequenceI> productSeqs = null;
+ if (products != null)
+ {
+ productSeqs = new HashSet<SequenceI>();
+ for (SequenceI seq : products)
+ {
+ productSeqs.add(seq.getDatasetSequence() == null ? seq : seq
+ .getDatasetSequence());
+ }
+ }
+
+ /*
+ * Construct CDS sequences from mappings on the alignment dataset.
+ * The logic is:
+ * - find the protein product(s) mapped to from each dna sequence
+ * - if the mapping covers the whole dna sequence (give or take start/stop
+ * codon), take the dna as the CDS sequence
+ * - else search dataset mappings for a suitable dna sequence, i.e. one
+ * whose whole sequence is mapped to the protein
+ * - if no sequence found, construct one from the dna sequence and mapping
+ * (and add it to dataset so it is found if this is repeated)
+ */
+ for (SequenceI dnaSeq : dna)
{
- AlignedCodonFrame cdsMappings = new AlignedCodonFrame();
+ SequenceI dnaDss = dnaSeq.getDatasetSequence() == null ? dnaSeq
+ : dnaSeq.getDatasetSequence();
+
List<AlignedCodonFrame> seqMappings = MappingUtils
- .findMappingsForSequence(seq, mappings);
- List<AlignedCodonFrame> alignmentMappings = al.getCodonFrames();
+ .findMappingsForSequence(dnaSeq, mappings);
for (AlignedCodonFrame mapping : seqMappings)
{
- for (Mapping aMapping : mapping.getMappingsFromSequence(seq))
+ List<Mapping> mappingsFromSequence = mapping
+ .getMappingsFromSequence(dnaSeq);
+
+ for (Mapping aMapping : mappingsFromSequence)
{
- SequenceI cdsSeq = makeCdsSequence(seq.getDatasetSequence(),
- aMapping);
+ MapList mapList = aMapping.getMap();
+ if (mapList.getFromRatio() == 1)
+ {
+ /*
+ * not a dna-to-protein mapping (likely dna-to-cds)
+ */
+ continue;
+ }
+
+ /*
+ * skip if mapping is not to one of the target set of proteins
+ */
+ SequenceI proteinProduct = aMapping.getTo();
+ if (productSeqs != null && !productSeqs.contains(proteinProduct))
+ {
+ continue;
+ }
+
+ /*
+ * try to locate the CDS from the dataset mappings;
+ * guard against duplicate results (for the case that protein has
+ * dbrefs to both dna and cds sequences)
+ */
+ SequenceI cdsSeq = findCdsForProtein(mappings, dnaSeq,
+ seqMappings, aMapping);
+ if (cdsSeq != null)
+ {
+ if (!foundSeqs.contains(cdsSeq))
+ {
+ foundSeqs.add(cdsSeq);
+ SequenceI derivedSequence = cdsSeq.deriveSequence();
+ cdsSeqs.add(derivedSequence);
+ if (!dataset.getSequences().contains(cdsSeq))
+ {
+ dataset.addSequence(cdsSeq);
+ }
+ }
+ continue;
+ }
+
+ /*
+ * didn't find mapped CDS sequence - construct it and add
+ * its dataset sequence to the dataset
+ */
+ cdsSeq = makeCdsSequence(dnaSeq.getDatasetSequence(), aMapping);
+ SequenceI cdsSeqDss = cdsSeq.createDatasetSequence();
cdsSeqs.add(cdsSeq);
-
+ if (!dataset.getSequences().contains(cdsSeqDss))
+ {
+ dataset.addSequence(cdsSeqDss);
+ }
+
/*
* add a mapping from CDS to the (unchanged) mapped to range
*/
List<int[]> cdsRange = Collections.singletonList(new int[] { 1,
cdsSeq.getLength() });
- MapList map = new MapList(cdsRange, aMapping.getMap()
- .getToRanges(), aMapping.getMap().getFromRatio(),
- aMapping.getMap().getToRatio());
- cdsMappings.addMap(cdsSeq, aMapping.getTo(), map);
+ MapList cdsToProteinMap = new MapList(cdsRange, mapList.getToRanges(),
+ mapList.getFromRatio(), mapList.getToRatio());
+ AlignedCodonFrame cdsToProteinMapping = new AlignedCodonFrame();
+ cdsToProteinMapping.addMap(cdsSeq, proteinProduct, cdsToProteinMap);
+
+ /*
+ * guard against duplicating the mapping if repeating this action
+ */
+ if (!mappings.contains(cdsToProteinMapping))
+ {
+ mappings.add(cdsToProteinMapping);
+ }
+
+ /*
+ * copy protein's dbrefs to CDS sequence
+ * this enables Get Cross-References from CDS alignment
+ */
+ DBRefEntry[] proteinRefs = DBRefUtils.selectDbRefs(false,
+ proteinProduct.getDBRefs());
+ if (proteinRefs != null)
+ {
+ for (DBRefEntry ref : proteinRefs)
+ {
+ DBRefEntry cdsToProteinRef = new DBRefEntry(ref);
+ cdsToProteinRef.setMap(new Mapping(proteinProduct,
+ cdsToProteinMap));
+ cdsSeqDss.addDBRef(cdsToProteinRef);
+ }
+ }
/*
* add another mapping from original 'from' range to CDS
*/
- map = new MapList(aMapping.getMap().getFromRanges(), cdsRange, 1,
+ AlignedCodonFrame dnaToCdsMapping = new AlignedCodonFrame();
+ MapList dnaToCdsMap = new MapList(mapList.getFromRanges(),
+ cdsRange, 1,
1);
- cdsMappings.addMap(seq.getDatasetSequence(), cdsSeq, map);
+ dnaToCdsMapping.addMap(dnaSeq.getDatasetSequence(), cdsSeq,
+ dnaToCdsMap);
+ if (!mappings.contains(dnaToCdsMapping))
+ {
+ mappings.add(dnaToCdsMapping);
+ }
- alignmentMappings.add(cdsMappings);
+ /*
+ * add DBRef with mapping from protein to CDS
+ * (this enables Get Cross-References from protein alignment)
+ * This is tricky because we can't have two DBRefs with the
+ * same source and accession, so need a different accession for
+ * the CDS from the dna sequence
+ */
+ DBRefEntryI dnaRef = dnaDss.getSourceDBRef();
+ if (dnaRef != null)
+ {
+ // assuming cds version same as dna ?!?
+ DBRefEntry proteinToCdsRef = new DBRefEntry(dnaRef.getSource(),
+ dnaRef.getVersion(), cdsSeq.getName());
+ proteinToCdsRef.setMap(new Mapping(cdsSeqDss, cdsToProteinMap
+ .getInverse()));
+ proteinProduct.addDBRef(proteinToCdsRef);
+ }
/*
* transfer any features on dna that overlap the CDS
*/
- transferFeatures(seq, cdsSeq, map, null, SequenceOntologyI.CDS);
+ transferFeatures(dnaSeq, cdsSeq, cdsToProteinMap, null,
+ SequenceOntologyI.CDS);
}
}
}
+ AlignmentI cds = new Alignment(cdsSeqs.toArray(new SequenceI[cdsSeqs
+ .size()]));
+ cds.setDataset(dataset);
+
+ return cds;
+ }
+
+ /**
+ * A helper method that finds a CDS sequence in the alignment dataset that is
+ * mapped to the given protein sequence, and either is, or has a mapping from,
+ * the given dna sequence.
+ *
+ * @param mappings
+ * set of all mappings on the dataset
+ * @param dnaSeq
+ * a dna (or cds) sequence we are searching from
+ * @param seqMappings
+ * the set of mappings involving dnaSeq
+ * @param aMapping
+ * an initial candidate from seqMappings
+ * @return
+ */
+ static SequenceI findCdsForProtein(List<AlignedCodonFrame> mappings,
+ SequenceI dnaSeq, List<AlignedCodonFrame> seqMappings,
+ Mapping aMapping)
+ {
+ /*
+ * TODO a better dna-cds-protein mapping data representation to allow easy
+ * navigation; until then this clunky looping around lists of mappings
+ */
+ SequenceI seqDss = dnaSeq.getDatasetSequence() == null ? dnaSeq
+ : dnaSeq.getDatasetSequence();
+ SequenceI proteinProduct = aMapping.getTo();
+
+ /*
+ * is this mapping from the whole dna sequence (i.e. CDS)?
+ * allowing for possible stop codon on dna but not peptide
+ */
+ int mappedFromLength = MappingUtils.getLength(aMapping.getMap()
+ .getFromRanges());
+ int dnaLength = seqDss.getLength();
+ if (mappedFromLength == dnaLength || mappedFromLength == dnaLength - 3)
+ {
+ return seqDss;
+ }
+
/*
- * add CDS seqs to shared dataset
+ * looks like we found the dna-to-protein mapping; search for the
+ * corresponding cds-to-protein mapping
*/
- Alignment dataset = al.getDataset();
- for (SequenceI seq : cdsSeqs)
+ List<AlignedCodonFrame> mappingsToPeptide = MappingUtils
+ .findMappingsForSequence(proteinProduct, mappings);
+ for (AlignedCodonFrame acf : mappingsToPeptide)
{
- if (!dataset.getSequences().contains(seq.getDatasetSequence()))
+ for (SequenceToSequenceMapping map : acf.getMappings())
{
- dataset.addSequence(seq.getDatasetSequence());
+ Mapping mapping = map.getMapping();
+ if (mapping != aMapping && mapping.getMap().getFromRatio() == 3
+ && proteinProduct == mapping.getTo()
+ && seqDss != map.getFromSeq())
+ {
+ mappedFromLength = MappingUtils.getLength(mapping.getMap()
+ .getFromRanges());
+ if (mappedFromLength == map.getFromSeq().getLength())
+ {
+ /*
+ * found a 3:1 mapping to the protein product which covers
+ * the whole dna sequence i.e. is from CDS; finally check it
+ * is from the dna start sequence
+ */
+ SequenceI cdsSeq = map.getFromSeq();
+ List<AlignedCodonFrame> dnaToCdsMaps = MappingUtils
+ .findMappingsForSequence(cdsSeq, seqMappings);
+ if (!dnaToCdsMaps.isEmpty())
+ {
+ return cdsSeq;
+ }
+ }
+ }
}
}
- AlignmentI cds = new Alignment(cdsSeqs.toArray(new SequenceI[cdsSeqs
- .size()]));
- cds.setDataset(dataset);
-
- return cds;
+ return null;
}
/**
*
* @param seq
* @param mapping
- * @return
+ * @return CDS sequence (as a dataset sequence)
*/
static SequenceI makeCdsSequence(SequenceI seq, Mapping mapping)
{
}
}
- SequenceI newSeq = new Sequence(seq.getName() + "|"
- + mapping.getTo().getName(), newSeqChars, 1, newPos);
- newSeq.createDatasetSequence();
+ /*
+ * assign 'from id' held in the mapping if set (e.g. EMBL protein_id),
+ * else generate a sequence name
+ */
+ String mapFromId = mapping.getMappedFromId();
+ String seqId = "CDS|" + (mapFromId != null ? mapFromId : seq.getName());
+ SequenceI newSeq = new Sequence(seqId, newSeqChars, 1, newPos);
+ // newSeq.setDescription(mapFromId);
+
return newSeq;
}
* sort to get sequence features in start position order
* - would be better to store in Sequence as a TreeSet or NCList?
*/
- Arrays.sort(peptide.getSequenceFeatures(),
- new Comparator<SequenceFeature>()
- {
- @Override
- public int compare(SequenceFeature o1, SequenceFeature o2)
+ if (peptide.getSequenceFeatures() != null)
+ {
+ Arrays.sort(peptide.getSequenceFeatures(),
+ new Comparator<SequenceFeature>()
{
- int c = Integer.compare(o1.getBegin(), o2.getBegin());
- return c == 0 ? Integer.compare(o1.getEnd(), o2.getEnd())
- : c;
- }
- });
+ @Override
+ public int compare(SequenceFeature o1, SequenceFeature o2)
+ {
+ int c = Integer.compare(o1.getBegin(), o2.getBegin());
+ return c == 0 ? Integer.compare(o1.getEnd(), o2.getEnd())
+ : c;
+ }
+ });
+ }
return count;
}
*
* @param seqs
* @param xrefs
+ * @param dataset
+ * the alignment dataset shared by the new copy
* @return
*/
public static AlignmentI makeCopyAlignment(SequenceI[] seqs,
- SequenceI[] xrefs)
+ SequenceI[] xrefs, AlignmentI dataset)
{
AlignmentI copy = new Alignment(new Alignment(seqs));
-
- /*
- * add mappings between sequences to the new alignment
- */
- AlignedCodonFrame mappings = new AlignedCodonFrame();
- copy.addCodonFrame(mappings);
- for (int i = 0; i < copy.getHeight(); i++)
- {
- SequenceI from = seqs[i];
- SequenceI to = copy.getSequenceAt(i);
- if (to.getDatasetSequence() != null)
- {
- to = to.getDatasetSequence();
- }
- int start = from.getStart();
- int end = from.getEnd();
- MapList map = new MapList(new int[] { start, end }, new int[] {
- start, end }, 1, 1);
- mappings.addMap(to, from, map);
- }
+ copy.setDataset(dataset);
SequenceIdMatcher matcher = new SequenceIdMatcher(seqs);
if (xrefs != null)
*/
public static int alignAs(AlignmentI unaligned, AlignmentI aligned)
{
+ /*
+ * easy case - aligning a copy of aligned sequences
+ */
+ if (alignAsSameSequences(unaligned, aligned))
+ {
+ return unaligned.getHeight();
+ }
+
+ /*
+ * fancy case - aligning via mappings between sequences
+ */
List<SequenceI> unmapped = new ArrayList<SequenceI>();
Map<Integer, Map<SequenceI, Character>> columnMap = buildMappedColumnsMap(
unaligned, aligned, unmapped);
int width = columnMap.size();
char gap = unaligned.getGapCharacter();
int realignedCount = 0;
+ // TODO: verify this loop scales sensibly for very wide/high alignments
for (SequenceI seq : unaligned.getSequences())
{
if (!unmapped.contains(seq))
{
char[] newSeq = new char[width];
- Arrays.fill(newSeq, gap);
+ Arrays.fill(newSeq, gap); // JBPComment - doubt this is faster than the
+ // Integer iteration below
int newCol = 0;
int lastCol = 0;
System.arraycopy(newSeq, 0, tmp, 0, lastCol + 1);
newSeq = tmp;
}
+ // TODO: optimise SequenceI to avoid char[]->String->char[]
seq.setSequence(String.valueOf(newSeq));
realignedCount++;
}
}
/**
+ * If unaligned and aligned sequences share the same dataset sequences, then
+ * simply copies the aligned sequences to the unaligned sequences and returns
+ * true; else returns false
+ *
+ * @param unaligned
+ * - sequences to be aligned based on aligned
+ * @param aligned
+ * - 'guide' alignment containing sequences derived from same dataset
+ * as unaligned
+ * @return
+ */
+ static boolean alignAsSameSequences(AlignmentI unaligned,
+ AlignmentI aligned)
+ {
+ if (aligned.getDataset() == null || unaligned.getDataset() == null)
+ {
+ return false; // should only pass alignments with datasets here
+ }
+
+ // map from dataset sequence to alignment sequence
+ Map<SequenceI, SequenceI> alignedDatasets = new HashMap<SequenceI, SequenceI>();
+ for (SequenceI seq : aligned.getSequences())
+ {
+ // JAL-2110: fail if two or more alignment sequences have a common dataset
+ // sequence.
+ alignedDatasets.put(seq.getDatasetSequence(), seq);
+ }
+
+ /*
+ * first pass - check whether all sequences to be aligned share a dataset
+ * sequence with an aligned sequence
+ */
+ for (SequenceI seq : unaligned.getSequences())
+ {
+ if (!alignedDatasets.containsKey(seq.getDatasetSequence()))
+ {
+ return false;
+ }
+ }
+
+ /*
+ * second pass - copy aligned sequences
+ */
+ for (SequenceI seq : unaligned.getSequences())
+ {
+ SequenceI alignedSequence = alignedDatasets.get(seq
+ .getDatasetSequence());
+ // JAL-2110: fail if two or more alignment sequences have common dataset
+ // sequence.
+ // TODO: getSequenceAsString() will be deprecated in the future
+ // TODO: need to leave to SequenceI implementor to update gaps
+ seq.setSequence(alignedSequence.getSequenceAsString());
+ }
+
+ return true;
+ }
+
+ /**
* Returns a map whose key is alignment column number (base 1), and whose
* values are a map of sequence characters in that column.
*
{
/*
* Map will hold, for each aligned column position, a map of
- * {unalignedSequence, sequenceCharacter} at that position.
+ * {unalignedSequence, characterPerSequence} at that position.
* TreeMap keeps the entries in ascending column order.
*/
Map<Integer, Map<SequenceI, Character>> map = new TreeMap<Integer, Map<SequenceI, Character>>();
/*
- * r any sequences that have no mapping so can't be realigned
+ * record any sequences that have no mapping so can't be realigned
*/
unmapped.addAll(unaligned.getSequences());
return false;
}
+ /*
+ * invert mapping if it is from unaligned to aligned sequence
+ */
+ if (seqMap.getTo() == fromSeq.getDatasetSequence())
+ {
+ seqMap = new Mapping(seq.getDatasetSequence(), seqMap.getMap()
+ .getInverse());
+ }
+
char[] fromChars = fromSeq.getSequence();
int toStart = seq.getStart();
char[] toChars = seq.getSequence();
* of the next character of the mapped-to sequence; stop when all
* the characters of the range have been counted
*/
- while (mappedCharPos <= range[1])
+ while (mappedCharPos <= range[1] && fromCol <= fromChars.length
+ && fromCol >= 0)
{
if (!Comparison.isGap(fromChars[fromCol - 1]))
{
import jalview.datamodel.Alignment;
import jalview.datamodel.AlignmentI;
import jalview.datamodel.DBRefEntry;
-import jalview.datamodel.DBRefSource;
import jalview.datamodel.Mapping;
import jalview.datamodel.Sequence;
import jalview.datamodel.SequenceFeature;
import jalview.datamodel.SequenceI;
import jalview.util.DBRefUtils;
import jalview.util.MapList;
-import jalview.ws.SequenceFetcher;
+import jalview.ws.SequenceFetcherFactory;
import jalview.ws.seqfetcher.ASequenceFetcher;
import java.util.ArrayList;
+import java.util.Iterator;
import java.util.List;
-import java.util.Vector;
/**
- * Functions for cross-referencing sequence databases. user must first specify
- * if cross-referencing from protein or dna (set dna==true)
+ * Functions for cross-referencing sequence databases.
*
* @author JimP
*
public class CrossRef
{
/*
- * A sub-class that ignores Parent attribute when comparing sequence
- * features. This avoids 'duplicate' CDS features that only
- * differ in their parent Transcript ids.
+ * the dataset of the alignment for which we are searching for
+ * cross-references; in some cases we may resolve xrefs by
+ * searching in the dataset
*/
- class MySequenceFeature extends SequenceFeature
- {
- private SequenceFeature feat;
+ private AlignmentI dataset;
- MySequenceFeature(SequenceFeature sf)
- {
- this.feat = sf;
- }
+ /*
+ * the sequences for which we are seeking cross-references
+ */
+ private SequenceI[] fromSeqs;
- @Override
- public boolean equals(Object o)
- {
- return feat.equals(o, true);
- }
- }
+ /**
+ * matcher built from dataset
+ */
+ SequenceIdMatcher matcher;
/**
- * Select just the DNA or protein references for a protein or dna sequence
- *
- * @param fromDna
- * if true, select references from DNA (i.e. Protein databases), else
- * DNA database references
- * @param refs
- * a set of references to select from
- * @return
+ * sequences found by cross-ref searches to fromSeqs
*/
- public static DBRefEntry[] findXDbRefs(boolean fromDna, DBRefEntry[] refs)
- {
- return DBRefUtils.selectRefs(refs, fromDna ? DBRefSource.PROTEINDBS
- : DBRefSource.DNACODINGDBS);
- // could attempt to find other cross
- // refs here - ie PDB xrefs
- // (not dna, not protein seq)
- }
+ List<SequenceI> rseqs;
/**
- * @param dna
- * true if seqs are DNA seqs
+ * Constructor
+ *
* @param seqs
- * @return a list of sequence database cross reference source types
+ * the sequences for which we are seeking cross-references
+ * @param ds
+ * the containing alignment dataset (may be searched to resolve
+ * cross-references)
*/
- public static String[] findSequenceXrefTypes(boolean dna, SequenceI[] seqs)
+ public CrossRef(SequenceI[] seqs, AlignmentI ds)
{
- return findSequenceXrefTypes(dna, seqs, null);
+ fromSeqs = seqs;
+ dataset = ds.getDataset() == null ? ds : ds.getDataset();
}
/**
- * Indirect references are references from other sequences from the dataset to
- * any of the direct DBRefEntrys on the given sequences.
+ * Returns a list of distinct database sources for which sequences have either
+ * <ul>
+ * <li>a (dna-to-protein or protein-to-dna) cross-reference</li>
+ * <li>an indirect cross-reference - a (dna-to-protein or protein-to-dna)
+ * reference from another sequence in the dataset which has a cross-reference
+ * to a direct DBRefEntry on the given sequence</li>
+ * </ul>
*
* @param dna
- * true if seqs are DNA seqs
- * @param seqs
- * @return a list of sequence database cross reference source types
+ * - when true, cross-references *from* dna returned. When false,
+ * cross-references *from* protein are returned
+ * @return
*/
- public static String[] findSequenceXrefTypes(boolean dna,
- SequenceI[] seqs, AlignmentI dataset)
+ public List<String> findXrefSourcesForSequences(boolean dna)
{
- String[] dbrefs = null;
- List<String> refs = new ArrayList<String>();
- for (SequenceI seq : seqs)
+ List<String> sources = new ArrayList<String>();
+ for (SequenceI seq : fromSeqs)
{
if (seq != null)
{
- SequenceI dss = seq;
- while (dss.getDatasetSequence() != null)
- {
- dss = dss.getDatasetSequence();
- }
- DBRefEntry[] rfs = findXDbRefs(dna, dss.getDBRefs());
- if (rfs != null)
- {
- for (DBRefEntry ref : rfs)
- {
- if (!refs.contains(ref.getSource()))
- {
- refs.add(ref.getSource());
- }
- }
- }
- if (dataset != null)
- {
- // search for references to this sequence's direct references.
- DBRefEntry[] lrfs = CrossRef.findXDbRefs(!dna, seq.getDBRefs());
- List<SequenceI> rseqs = new ArrayList<SequenceI>();
- CrossRef.searchDatasetXrefs(seq, !dna, lrfs, dataset, rseqs,
- null); // don't need to specify codon frame for mapping here
- for (SequenceI rs : rseqs)
- {
- DBRefEntry[] xrs = findXDbRefs(dna, rs.getDBRefs());
- if (xrs != null)
- {
- for (DBRefEntry ref : xrs)
- {
- if (!refs.contains(ref.getSource()))
- {
- refs.add(ref.getSource());
- }
- }
- }
- // looks like copy and paste - change rfs to xrs?
- // for (int r = 0; rfs != null && r < rfs.length; r++)
- // {
- // if (!refs.contains(rfs[r].getSource()))
- // {
- // refs.add(rfs[r].getSource());
- // }
- // }
- }
- }
+ findXrefSourcesForSequence(seq, dna, sources);
}
}
- if (refs.size() > 0)
- {
- dbrefs = new String[refs.size()];
- refs.toArray(dbrefs);
- }
- return dbrefs;
+ return sources;
}
- public static boolean hasCdnaMap(SequenceI[] seqs)
+ /**
+ * Returns a list of distinct database sources for which a sequence has either
+ * <ul>
+ * <li>a (dna-to-protein or protein-to-dna) cross-reference</li>
+ * <li>an indirect cross-reference - a (dna-to-protein or protein-to-dna)
+ * reference from another sequence in the dataset which has a cross-reference
+ * to a direct DBRefEntry on the given sequence</li>
+ * </ul>
+ *
+ * @param seq
+ * the sequence whose dbrefs we are searching against
+ * @param fromDna
+ * when true, context is DNA - so sources identifying protein
+ * products will be returned.
+ * @param sources
+ * a list of sources to add matches to
+ */
+ void findXrefSourcesForSequence(SequenceI seq, boolean fromDna,
+ List<String> sources)
{
- // TODO unused - remove?
- String[] reftypes = findSequenceXrefTypes(false, seqs);
- for (int s = 0; s < reftypes.length; s++)
+ /*
+ * first find seq's xrefs (dna-to-peptide or peptide-to-dna)
+ */
+ DBRefEntry[] rfs = DBRefUtils.selectDbRefs(!fromDna, seq.getDBRefs());
+ addXrefsToSources(rfs, sources);
+ if (dataset != null)
{
- if (reftypes.equals(DBRefSource.EMBLCDS))
+ /*
+ * find sequence's direct (dna-to-dna, peptide-to-peptide) xrefs
+ */
+ DBRefEntry[] lrfs = DBRefUtils.selectDbRefs(fromDna, seq.getDBRefs());
+ List<SequenceI> foundSeqs = new ArrayList<SequenceI>();
+
+ /*
+ * find sequences in the alignment which xref one of these DBRefs
+ * i.e. is xref-ed to a common sequence identifier
+ */
+ searchDatasetXrefs(fromDna, seq, lrfs, foundSeqs, null);
+
+ /*
+ * add those sequences' (dna-to-peptide or peptide-to-dna) dbref sources
+ */
+ for (SequenceI rs : foundSeqs)
{
- return true;
- // no map
+ DBRefEntry[] xrs = DBRefUtils
+ .selectDbRefs(!fromDna, rs.getDBRefs());
+ addXrefsToSources(xrs, sources);
}
}
- return false;
}
- public static SequenceI[] getCdnaMap(SequenceI[] seqs)
+ /**
+ * Helper method that adds the source identifiers of some cross-references to
+ * a (non-redundant) list of database sources
+ *
+ * @param xrefs
+ * @param sources
+ */
+ void addXrefsToSources(DBRefEntry[] xrefs, List<String> sources)
{
- // TODO unused - remove?
- Vector cseqs = new Vector();
- for (int s = 0; s < seqs.length; s++)
+ if (xrefs != null)
{
- DBRefEntry[] cdna = findXDbRefs(true, seqs[s].getDBRefs());
- for (int c = 0; c < cdna.length; c++)
+ for (DBRefEntry ref : xrefs)
{
- if (cdna[c].getSource().equals(DBRefSource.EMBLCDS))
+ /*
+ * avoid duplication e.g. ENSEMBL and Ensembl
+ */
+ String source = DBRefUtils.getCanonicalName(ref.getSource());
+ if (!sources.contains(source))
{
- System.err
- .println("TODO: unimplemented sequence retrieval for coding region sequence.");
- // TODO: retrieve CDS dataset sequences
- // need global dataset sequence retriever/resolver to reuse refs
- // and construct Mapping entry.
- // insert gaps in CDS according to peptide gaps.
- // add gapped sequence to cseqs
+ sources.add(source);
}
}
}
- if (cseqs.size() > 0)
- {
- SequenceI[] rsqs = new SequenceI[cseqs.size()];
- cseqs.copyInto(rsqs);
- return rsqs;
- }
- return null;
-
}
/**
+ * Attempts to find cross-references from the sequences provided in the
+ * constructor to the given source database. Cross-references may be found
+ * <ul>
+ * <li>in dbrefs on the sequence which hold a mapping to a sequence
+ * <ul>
+ * <li>provided with a fetched sequence (e.g. ENA translation), or</li>
+ * <li>populated previously after getting cross-references</li>
+ * </ul>
+ * <li>as other sequences in the alignment which share a dbref identifier with
+ * the sequence</li>
+ * <li>by fetching from the remote database</li>
+ * </ul>
+ * The cross-referenced sequences, and mappings to them, are added to the
+ * alignment dataset.
*
- * @param seqs
- * sequences whose xrefs are being retrieved
- * @param dna
- * true if sequences are nucleotide
* @param source
- * @param al
- * alignment to search for cross-referenced sequences (and possibly
- * add to)
- * @return products (as dataset sequences)
+ * @return cross-referenced sequences (as dataset sequences)
*/
- public static Alignment findXrefSequences(SequenceI[] seqs,
- final boolean dna, final String source, AlignmentI al)
+ public Alignment findXrefSequences(String source, boolean fromDna)
{
- AlignmentI dataset = al.getDataset() == null ? al : al.getDataset();
- List<SequenceI> rseqs = new ArrayList<SequenceI>();
+
+ rseqs = new ArrayList<SequenceI>();
AlignedCodonFrame cf = new AlignedCodonFrame();
- for (SequenceI seq : seqs)
+ matcher = new SequenceIdMatcher(
+ dataset.getSequences());
+
+ for (SequenceI seq : fromSeqs)
{
SequenceI dss = seq;
while (dss.getDatasetSequence() != null)
dss = dss.getDatasetSequence();
}
boolean found = false;
- DBRefEntry[] xrfs = CrossRef.findXDbRefs(dna, dss.getDBRefs());
+ DBRefEntry[] xrfs = DBRefUtils
+ .selectDbRefs(!fromDna, dss.getDBRefs());
if ((xrfs == null || xrfs.length == 0) && dataset != null)
{
- System.out.println("Attempting to find ds Xrefs refs.");
- // FIXME should be dss not seq here?
- DBRefEntry[] lrfs = CrossRef.findXDbRefs(!dna, seq.getDBRefs());
- // less ambiguous would be a 'find primary dbRefEntry' method.
- // filter for desired source xref here
- found = CrossRef.searchDatasetXrefs(dss, !dna, lrfs, dataset,
- rseqs, cf);
+ /*
+ * found no suitable dbrefs on sequence - look for sequences in the
+ * alignment which share a dbref with this one
+ */
+ DBRefEntry[] lrfs = DBRefUtils.selectDbRefs(fromDna,
+ seq.getDBRefs());
+
+ /*
+ * find sequences (except this one!), of complementary type,
+ * which have a dbref to an accession id for this sequence,
+ * and add them to the results
+ */
+ found = searchDatasetXrefs(fromDna, dss, lrfs, rseqs, cf);
}
- for (int r = 0; xrfs != null && r < xrfs.length; r++)
+ if (xrfs == null && !found)
{
- DBRefEntry xref = xrfs[r];
- if (source != null && !source.equals(xref.getSource()))
- {
- continue;
- }
+ /*
+ * no dbref to source on this sequence or matched
+ * complementary sequence in the dataset
+ */
+ continue;
+ }
+ List<DBRefEntry> sourceRefs = DBRefUtils.searchRefsForSource(xrfs,
+ source);
+ Iterator<DBRefEntry> refIterator = sourceRefs.iterator();
+ while (refIterator.hasNext())
+ {
+ DBRefEntry xref = refIterator.next();
+ found = false;
if (xref.hasMap())
{
- if (xref.getMap().getTo() != null)
+ SequenceI mappedTo = xref.getMap().getTo();
+ if (mappedTo != null)
{
- SequenceI rsq = new Sequence(xref.getMap().getTo());
+ /*
+ * dbref contains the sequence it maps to; add it to the
+ * results unless we have done so already (could happen if
+ * fetching xrefs for sequences which have xrefs in common)
+ * for example: UNIPROT {P0CE19, P0CE20} -> EMBL {J03321, X06707}
+ */
+ found = true;
+ /*
+ * problem: matcher.findIdMatch() is lenient - returns a sequence
+ * with a dbref to the search arg e.g. ENST for ENSP - wrong
+ * but findInDataset() matches ENSP when looking for Uniprot...
+ */
+ SequenceI matchInDataset = findInDataset(xref);
+ /*matcher.findIdMatch(mappedTo);*/
+ if (matchInDataset != null)
+ {
+ if (!rseqs.contains(matchInDataset))
+ {
+ rseqs.add(matchInDataset);
+ }
+ refIterator.remove();
+ continue;
+ }
+ SequenceI rsq = new Sequence(mappedTo);
rseqs.add(rsq);
- if (xref.getMap().getMap().getFromRatio() != xref
- .getMap().getMap().getToRatio())
+ if (xref.getMap().getMap().getFromRatio() != xref.getMap()
+ .getMap().getToRatio())
{
// get sense of map correct for adding to product alignment.
- if (dna)
+ if (fromDna)
{
// map is from dna seq to a protein product
- cf.addMap(dss, rsq, xref.getMap().getMap());
+ cf.addMap(dss, rsq, xref.getMap().getMap(), xref.getMap()
+ .getMappedFromId());
}
else
{
// map should be from protein seq to its coding dna
- cf.addMap(rsq, dss, xref.getMap().getMap().getInverse());
+ cf.addMap(rsq, dss, xref.getMap().getMap().getInverse(),
+ xref.getMap().getMappedFromId());
}
}
- found = true;
}
}
+
if (!found)
{
- // do a bit more work - search for sequences with references matching
- // xrefs on this sequence.
- if (dataset != null)
+ SequenceI matchedSeq = matcher.findIdMatch(xref.getSource() + "|"
+ + xref.getAccessionId());
+ if (matchedSeq != null)
{
- found |= searchDataset(dss, xref, dataset, rseqs, cf, false,
- !dna);
- if (found)
+ if (constructMapping(seq, matchedSeq, xref, cf, fromDna))
{
- xrfs[r] = null; // we've recovered seqs for this one.
+ found = true;
}
}
}
+
+ if (!found)
+ {
+ // do a bit more work - search for sequences with references matching
+ // xrefs on this sequence.
+ found = searchDataset(fromDna, dss, xref, rseqs, cf, false);
+ }
+ if (found)
+ {
+ refIterator.remove();
+ }
+ }
+
+ /*
+ * fetch from source database any dbrefs we haven't resolved up to here
+ */
+ if (!sourceRefs.isEmpty())
+ {
+ retrieveCrossRef(sourceRefs, seq, xrfs, fromDna, cf);
+ }
+ }
+
+ Alignment ral = null;
+ if (rseqs.size() > 0)
+ {
+ ral = new Alignment(rseqs.toArray(new SequenceI[rseqs.size()]));
+ if (!cf.isEmpty())
+ {
+ dataset.addCodonFrame(cf);
}
- if (!found)
+ }
+ return ral;
+ }
+
+ private void retrieveCrossRef(List<DBRefEntry> sourceRefs, SequenceI seq,
+ DBRefEntry[] xrfs, boolean fromDna, AlignedCodonFrame cf)
+ {
+ ASequenceFetcher sftch = SequenceFetcherFactory.getSequenceFetcher();
+ SequenceI[] retrieved = null;
+ SequenceI dss = seq.getDatasetSequence() == null ? seq : seq
+ .getDatasetSequence();
+ try
+ {
+ retrieved = sftch.getSequences(sourceRefs, !fromDna);
+ } catch (Exception e)
+ {
+ System.err
+ .println("Problem whilst retrieving cross references for Sequence : "
+ + seq.getName());
+ e.printStackTrace();
+ }
+
+ if (retrieved != null)
+ {
+ updateDbrefMappings(seq, xrfs, retrieved, cf, fromDna);
+ for (SequenceI retrievedSequence : retrieved)
{
- if (xrfs != null && xrfs.length > 0)
+ // dataset gets contaminated ccwith non-ds sequences. why ??!
+ // try: Ensembl -> Nuc->Ensembl, Nuc->Uniprot-->Protein->EMBL->
+ SequenceI retrievedDss = retrievedSequence.getDatasetSequence() == null ? retrievedSequence
+ : retrievedSequence.getDatasetSequence();
+ DBRefEntry[] dbr = retrievedSequence.getDBRefs();
+ if (dbr != null)
{
- // Try and get the sequence reference...
- /*
- * Ideal world - we ask for a sequence fetcher implementation here if
- * (jalview.io.RunTimeEnvironment.getSequenceFetcher()) (
- */
- ASequenceFetcher sftch = new SequenceFetcher();
- SequenceI[] retrieved = null;
- int l = xrfs.length;
- for (int r = 0; r < xrfs.length; r++)
- {
- // filter out any irrelevant or irretrievable references
- if (xrfs[r] == null
- || ((source != null && !source.equals(xrfs[r]
- .getSource())) || !sftch.isFetchable(xrfs[r]
- .getSource())))
- {
- l--;
- xrfs[r] = null;
- }
- }
- if (l > 0)
+ for (DBRefEntry dbref : dbr)
{
- // System.out
- // .println("Attempting to retrieve cross referenced sequences.");
- DBRefEntry[] t = new DBRefEntry[l];
- l = 0;
- for (int r = 0; r < xrfs.length; r++)
+ // find any entry where we should put in the sequence being
+ // cross-referenced into the map
+ Mapping map = dbref.getMap();
+ if (map != null)
{
- if (xrfs[r] != null)
+ if (map.getTo() != null && map.getMap() != null)
{
- t[l++] = xrfs[r];
- }
- }
- xrfs = t;
- try
- {
- retrieved = sftch.getSequences(xrfs, !dna);
- // problem here is we don't know which of xrfs resulted in which
- // retrieved element
- } catch (Exception e)
- {
- System.err
- .println("Problem whilst retrieving cross references for Sequence : "
- + seq.getName());
- e.printStackTrace();
- }
-
- if (retrieved != null)
- {
- updateDbrefMappings(dna, seq, xrfs, retrieved, cf);
-
- SequenceIdMatcher matcher = new SequenceIdMatcher(
- dataset.getSequences());
- List<SequenceFeature> copiedFeatures = new ArrayList<SequenceFeature>();
- CrossRef me = new CrossRef();
- for (int rs = 0; rs < retrieved.length; rs++)
- {
- // TODO: examine each sequence for 'redundancy'
- DBRefEntry[] dbr = retrieved[rs].getDBRefs();
- if (dbr != null && dbr.length > 0)
+ // TODO findInDataset requires exact sequence match but
+ // 'congruent' test is only for the mapped part
+ // maybe not a problem in practice since only ENA provide a
+ // mapping and it is to the full protein translation of CDS
+ SequenceI matched = findInDataset(dbref);
+ // matcher.findIdMatch(map.getTo());
+ if (matched != null)
{
- for (int di = 0; di < dbr.length; di++)
+ /*
+ * already got an xref to this sequence; update this
+ * map to point to the same sequence, and add
+ * any new dbrefs to it
+ */
+ DBRefEntry[] toRefs = map.getTo().getDBRefs();
+ if (toRefs != null)
{
- // find any entry where we should put in the sequence being
- // cross-referenced into the map
- Mapping map = dbr[di].getMap();
- if (map != null)
+ for (DBRefEntry ref : toRefs)
{
- if (map.getTo() != null && map.getMap() != null)
+ matched.addDBRef(ref); // add or update mapping
+ }
+ }
+ map.setTo(matched);
+ }
+ else
+ {
+ matcher.add(map.getTo());
+ }
+ try
+ {
+ // compare ms with dss and replace with dss in mapping
+ // if map is congruent
+ SequenceI ms = map.getTo();
+ int sf = map.getMap().getToLowest();
+ int st = map.getMap().getToHighest();
+ SequenceI mappedrg = ms.getSubSequence(sf, st);
+ // SequenceI loc = dss.getSubSequence(sf, st);
+ if (mappedrg.getLength() > 0
+ && ms.getSequenceAsString().equals(
+ dss.getSequenceAsString()))
+ // && mappedrg.getSequenceAsString().equals(
+ // loc.getSequenceAsString()))
+ {
+ String msg = "Mapping updated from " + ms.getName()
+ + " to retrieved crossreference "
+ + dss.getName();
+ System.out.println(msg);
+ map.setTo(dss);
+
+ /*
+ * give the reverse reference the inverse mapping
+ * (if it doesn't have one already)
+ */
+ setReverseMapping(dss, dbref, cf);
+
+ /*
+ * copy sequence features as well, avoiding
+ * duplication (e.g. same variation from two
+ * transcripts)
+ */
+ SequenceFeature[] sfs = ms.getSequenceFeatures();
+ if (sfs != null)
+ {
+ for (SequenceFeature feat : sfs)
{
- SequenceI matched = matcher
- .findIdMatch(map.getTo());
- if (matched != null)
- {
- /*
- * already got an xref to this sequence; update this
- * map to point to the same sequence, and add
- * any new dbrefs to it
- */
- for (DBRefEntry ref : map.getTo().getDBRefs())
- {
- matched.addDBRef(ref); // add or update mapping
- }
- map.setTo(matched);
- }
- else
+ /*
+ * make a flyweight feature object which ignores Parent
+ * attribute in equality test; this avoids creating many
+ * otherwise duplicate exon features on genomic sequence
+ */
+ SequenceFeature newFeature = new SequenceFeature(
+ feat)
{
- matcher.add(map.getTo());
- }
- try
- {
- // compare ms with dss and replace with dss in mapping
- // if map is congruent
- SequenceI ms = map.getTo();
- int sf = map.getMap().getToLowest();
- int st = map.getMap().getToHighest();
- SequenceI mappedrg = ms.getSubSequence(sf, st);
- // SequenceI loc = dss.getSubSequence(sf, st);
- if (mappedrg.getLength() > 0
- && ms.getSequenceAsString().equals(
- dss.getSequenceAsString()))
- // && mappedrg.getSequenceAsString().equals(
- // loc.getSequenceAsString()))
- {
- String msg = "Mapping updated from "
- + ms.getName()
- + " to retrieved crossreference "
- + dss.getName();
- System.out.println(msg);
- // method to update all refs of existing To on
- // retrieved sequence with dss and merge any props
- // on To onto dss.
- map.setTo(dss);
- /*
- * copy sequence features as well, avoiding
- * duplication (e.g. same variation from 2
- * transcripts)
- */
- SequenceFeature[] sfs = ms
- .getSequenceFeatures();
- if (sfs != null)
- {
- for (SequenceFeature feat : sfs)
- {
- /*
- * we override SequenceFeature.equals here (but
- * not elsewhere) to ignore Parent attribute
- * TODO not quite working yet!
- */
- if (!copiedFeatures
- .contains(me.new MySequenceFeature(
- feat)))
- {
- dss.addSequenceFeature(feat);
- copiedFeatures.add(feat);
- }
- }
- }
- cf.addMap(retrieved[rs].getDatasetSequence(),
- dss, map.getMap());
- }
- else
+ @Override
+ public boolean equals(Object o)
{
- cf.addMap(retrieved[rs].getDatasetSequence(),
- map.getTo(), map.getMap());
+ return super.equals(o, true);
}
- } catch (Exception e)
- {
- System.err
- .println("Exception when consolidating Mapped sequence set...");
- e.printStackTrace(System.err);
- }
+ };
+ dss.addSequenceFeature(newFeature);
}
}
}
+ cf.addMap(retrievedDss, map.getTo(), map.getMap());
+ } catch (Exception e)
+ {
+ System.err
+ .println("Exception when consolidating Mapped sequence set...");
+ e.printStackTrace(System.err);
}
- retrieved[rs].updatePDBIds();
- rseqs.add(retrieved[rs]);
}
}
}
}
+ retrievedSequence.updatePDBIds();
+ rseqs.add(retrievedDss);
+ dataset.addSequence(retrievedDss);
+ matcher.add(retrievedDss);
+ }
+ }
+ }
+ /**
+ * Sets the inverse sequence mapping in the corresponding dbref of the mapped
+ * to sequence (if any). This is used after fetching a cross-referenced
+ * sequence, if the fetched sequence has a mapping to the original sequence,
+ * to set the mapping in the original sequence's dbref.
+ *
+ * @param mapFrom
+ * the sequence mapped from
+ * @param dbref
+ * @param mappings
+ */
+ void setReverseMapping(SequenceI mapFrom, DBRefEntry dbref,
+ AlignedCodonFrame mappings)
+ {
+ SequenceI mapTo = dbref.getMap().getTo();
+ if (mapTo == null)
+ {
+ return;
+ }
+ DBRefEntry[] dbrefs = mapTo.getDBRefs();
+ if (dbrefs == null)
+ {
+ return;
+ }
+ for (DBRefEntry toRef : dbrefs)
+ {
+ if (toRef.hasMap() && mapFrom == toRef.getMap().getTo())
+ {
+ /*
+ * found the reverse dbref; update its mapping if null
+ */
+ if (toRef.getMap().getMap() == null)
+ {
+ MapList inverse = dbref.getMap().getMap().getInverse();
+ toRef.getMap().setMap(inverse);
+ mappings.addMap(mapTo, mapFrom, inverse);
+ }
}
}
+ }
- Alignment ral = null;
- if (rseqs.size() > 0)
+ /**
+ * Returns the first identical sequence in the dataset if any, else null
+ *
+ * @param xref
+ * @return
+ */
+ SequenceI findInDataset(DBRefEntry xref)
+ {
+ if (xref == null || !xref.hasMap() || xref.getMap().getTo() == null)
{
- ral = new Alignment(rseqs.toArray(new SequenceI[rseqs.size()]));
- if (cf != null && !cf.isEmpty())
+ return null;
+ }
+ SequenceI mapsTo = xref.getMap().getTo();
+ String name = xref.getAccessionId();
+ String name2 = xref.getSource() + "|" + name;
+ SequenceI dss = mapsTo.getDatasetSequence() == null ? mapsTo : mapsTo
+ .getDatasetSequence();
+ for (SequenceI seq : dataset.getSequences())
+ {
+ /*
+ * clumsy alternative to using SequenceIdMatcher which currently
+ * returns sequences with a dbref to the matched accession id
+ * which we don't want
+ */
+ if (name.equals(seq.getName()) || seq.getName().startsWith(name2))
{
- ral.addCodonFrame(cf);
+ if (sameSequence(seq, dss))
+ {
+ return seq;
+ }
}
}
- return ral;
+ return null;
+ }
+
+ /**
+ * Answers true if seq1 and seq2 contain exactly the same characters (ignoring
+ * case), else false. This method compares the lengths, then each character in
+ * turn, in order to 'fail fast'. For case-sensitive comparison, it would be
+ * possible to use Arrays.equals(seq1.getSequence(), seq2.getSequence()).
+ *
+ * @param seq1
+ * @param seq2
+ * @return
+ */
+ // TODO move to Sequence / SequenceI
+ static boolean sameSequence(SequenceI seq1, SequenceI seq2)
+ {
+ if (seq1 == seq2)
+ {
+ return true;
+ }
+ if (seq1 == null || seq2 == null)
+ {
+ return false;
+ }
+ char[] c1 = seq1.getSequence();
+ char[] c2 = seq2.getSequence();
+ if (c1.length != c2.length)
+ {
+ return false;
+ }
+ for (int i = 0; i < c1.length; i++)
+ {
+ int diff = c1[i] - c2[i];
+ /*
+ * same char or differ in case only ('a'-'A' == 32)
+ */
+ if (diff != 0 && diff != 32 && diff != -32)
+ {
+ return false;
+ }
+ }
+ return true;
}
/**
* retrieved sequence if found, and adds any new mappings to the
* AlignedCodonFrame
*
- * @param dna
* @param mapFrom
* @param xrefs
* @param retrieved
* @param acf
*/
- static void updateDbrefMappings(boolean dna, SequenceI mapFrom,
- DBRefEntry[] xrefs, SequenceI[] retrieved, AlignedCodonFrame acf)
+ void updateDbrefMappings(SequenceI mapFrom, DBRefEntry[] xrefs,
+ SequenceI[] retrieved, AlignedCodonFrame acf, boolean fromDna)
{
- SequenceIdMatcher matcher = new SequenceIdMatcher(retrieved);
+ SequenceIdMatcher idMatcher = new SequenceIdMatcher(retrieved);
for (DBRefEntry xref : xrefs)
{
if (!xref.hasMap())
{
String targetSeqName = xref.getSource() + "|"
+ xref.getAccessionId();
- SequenceI[] matches = matcher.findAllIdMatches(targetSeqName);
+ SequenceI[] matches = idMatcher.findAllIdMatches(targetSeqName);
if (matches == null)
{
return;
}
for (SequenceI seq : matches)
{
- MapList mapping = null;
- if (dna)
- {
- mapping = AlignmentUtils.mapCdnaToProtein(seq, mapFrom);
- }
- else
- {
- mapping = AlignmentUtils.mapCdnaToProtein(mapFrom, seq);
- if (mapping != null)
- {
- mapping = mapping.getInverse();
- }
- }
- if (mapping != null)
- {
- xref.setMap(new Mapping(seq, mapping));
- if (dna)
- {
- AlignmentUtils.computeProteinFeatures(mapFrom, seq, mapping);
- }
- if (dna)
- {
- acf.addMap(mapFrom, seq, mapping);
- }
- else
- {
- acf.addMap(seq, mapFrom, mapping.getInverse());
- }
- continue;
- }
+ constructMapping(mapFrom, seq, xref, acf, fromDna);
+ }
+ }
+ }
+ }
+
+ /**
+ * Tries to make a mapping between sequences. If successful, adds the mapping
+ * to the dbref and the mappings collection and answers true, otherwise
+ * answers false. The following methods of making are mapping are tried in
+ * turn:
+ * <ul>
+ * <li>if 'mapTo' holds a mapping to 'mapFrom', take the inverse; this is, for
+ * example, the case after fetching EMBL cross-references for a Uniprot
+ * sequence</li>
+ * <li>else check if the dna translates exactly to the protein (give or take
+ * start and stop codons></li>
+ * <li>else try to map based on CDS features on the dna sequence</li>
+ * </ul>
+ *
+ * @param mapFrom
+ * @param mapTo
+ * @param xref
+ * @param mappings
+ * @return
+ */
+ boolean constructMapping(SequenceI mapFrom, SequenceI mapTo,
+ DBRefEntry xref, AlignedCodonFrame mappings, boolean fromDna)
+ {
+ MapList mapping = null;
+
+ /*
+ * look for a reverse mapping, if found make its inverse
+ */
+ if (mapTo.getDBRefs() != null)
+ {
+ for (DBRefEntry dbref : mapTo.getDBRefs())
+ {
+ String name = dbref.getSource() + "|" + dbref.getAccessionId();
+ if (dbref.hasMap() && mapFrom.getName().startsWith(name))
+ {
+ /*
+ * looks like we've found a map from 'mapTo' to 'mapFrom'
+ * - invert it to make the mapping the other way
+ */
+ MapList reverse = dbref.getMap().getMap().getInverse();
+ xref.setMap(new Mapping(mapTo, reverse));
+ mappings.addMap(mapFrom, mapTo, reverse);
+ return true;
}
}
}
+
+ if (fromDna)
+ {
+ mapping = AlignmentUtils.mapCdnaToProtein(mapTo, mapFrom);
+ }
+ else
+ {
+ mapping = AlignmentUtils.mapCdnaToProtein(mapFrom, mapTo);
+ if (mapping != null)
+ {
+ mapping = mapping.getInverse();
+ }
+ }
+ if (mapping == null)
+ {
+ return false;
+ }
+ xref.setMap(new Mapping(mapTo, mapping));
+
+ /*
+ * and add a reverse DbRef with the inverse mapping
+ */
+ if (mapFrom.getDatasetSequence() != null
+ && mapFrom.getDatasetSequence().getSourceDBRef() != null)
+ {
+ DBRefEntry dbref = new DBRefEntry(mapFrom.getDatasetSequence()
+ .getSourceDBRef());
+ dbref.setMap(new Mapping(mapFrom.getDatasetSequence(), mapping
+ .getInverse()));
+ mapTo.addDBRef(dbref);
+ }
+
+ if (fromDna)
+ {
+ AlignmentUtils.computeProteinFeatures(mapFrom, mapTo, mapping);
+ mappings.addMap(mapFrom, mapTo, mapping);
+ }
+ else
+ {
+ mappings.addMap(mapTo, mapFrom, mapping.getInverse());
+ }
+
+ return true;
}
/**
* dataset (that is not equal to sequenceI) Identifies matching DBRefEntry
* based on source and accession string only - Map and Version are nulled.
*
+ * @param fromDna
+ * - true if context was searching from Dna sequences, false if
+ * context was searching from Protein sequences
* @param sequenceI
* @param lrfs
- * @param dataset
- * @param rseqs
+ * @param foundSeqs
* @return true if matches were found.
*/
- private static boolean searchDatasetXrefs(SequenceI sequenceI,
- boolean dna, DBRefEntry[] lrfs, AlignmentI dataset,
- List<SequenceI> rseqs, AlignedCodonFrame cf)
+ private boolean searchDatasetXrefs(boolean fromDna, SequenceI sequenceI,
+ DBRefEntry[] lrfs, List<SequenceI> foundSeqs, AlignedCodonFrame cf)
{
boolean found = false;
if (lrfs == null)
// add in wildcards
xref.setVersion(null);
xref.setMap(null);
- found = searchDataset(sequenceI, xref, dataset, rseqs, cf, false, dna);
+ found |= searchDataset(fromDna, sequenceI, xref, foundSeqs, cf, false);
}
return found;
}
/**
- * search a given sequence dataset for references matching cross-references to
- * the given sequence
- *
- * @param sequenceI
- * @param xrf
- * @param dataset
- * @param rseqs
- * set of unique sequences
- * @param cf
- * @return true if one or more unique sequences were found and added
- */
- public static boolean searchDataset(SequenceI sequenceI, DBRefEntry xrf,
- AlignmentI dataset, List<SequenceI> rseqs, AlignedCodonFrame cf)
- {
- return searchDataset(sequenceI, xrf, dataset, rseqs, cf, true, false);
- }
-
- /**
- * TODO: generalise to different protein classifications Search dataset for
- * DBRefEntrys matching the given one (xrf) and add the associated sequence to
- * rseq.
+ * Searches dataset for DBRefEntrys matching the given one (xrf) and adds the
+ * associated sequence to rseqs
*
- * @param sequenceI
+ * @param fromDna
+ * true if context was searching for refs *from* dna sequence, false
+ * if context was searching for refs *from* protein sequence
+ * @param fromSeq
+ * a sequence to ignore (start point of search)
* @param xrf
- * @param dataset
- * @param rseqs
+ * a cross-reference to try to match
+ * @param foundSeqs
+ * result list to add to
+ * @param mappings
+ * a set of sequence mappings to add to
* @param direct
- * - search all references or only subset
- * @param dna
- * search dna or protein xrefs (if direct=false)
+ * - indicates the type of relationship between returned sequences,
+ * xrf, and sequenceI that is required.
+ * <ul>
+ * <li>direct implies xrf is a primary reference for sequenceI AND
+ * the sequences to be located (eg a uniprot ID for a protein
+ * sequence, and a uniprot ref on a transcript sequence).</li>
+ * <li>indirect means xrf is a cross reference with respect to
+ * sequenceI or all the returned sequences (eg a genomic reference
+ * associated with a locus and one or more transcripts)</li>
+ * </ul>
* @return true if relationship found and sequence added.
*/
- public static boolean searchDataset(SequenceI sequenceI, DBRefEntry xrf,
- AlignmentI dataset, List<SequenceI> rseqs, AlignedCodonFrame cf,
- boolean direct, boolean dna)
+ boolean searchDataset(boolean fromDna, SequenceI fromSeq,
+ DBRefEntry xrf, List<SequenceI> foundSeqs, AlignedCodonFrame mappings,
+ boolean direct)
{
boolean found = false;
- SequenceI[] typer = new SequenceI[1];
if (dataset == null)
{
return false;
if (nxt.getDatasetSequence() != null)
{
System.err
- .println("Implementation warning: getProducts passed a dataset alignment without dataset sequences in it!");
+ .println("Implementation warning: CrossRef initialised with a dataset alignment with non-dataset sequences in it! ("
+ + nxt.getDisplayId(true)
+ + " has ds reference "
+ + nxt.getDatasetSequence().getDisplayId(true)
+ + ")");
+ }
+ if (nxt == fromSeq || nxt == fromSeq.getDatasetSequence())
+ {
+ continue;
}
- if (nxt != sequenceI && nxt != sequenceI.getDatasetSequence())
+ /*
+ * only look at same molecule type if 'direct', or
+ * complementary type if !direct
+ */
{
- // check if this is the correct sequence type
+ boolean isDna = !nxt.isProtein();
+ if (direct ? (isDna != fromDna) : (isDna == fromDna))
{
- typer[0] = nxt;
- boolean isDna = jalview.util.Comparison.isNucleotide(typer);
- if ((direct && isDna == dna) || (!direct && isDna != dna))
- {
- // skip this sequence because it is same molecule type
- continue;
- }
+ // skip this sequence because it is wrong molecule type
+ continue;
}
+ }
- // look for direct or indirect references in common
- DBRefEntry[] poss = nxt.getDBRefs(), cands = null;
- if (direct)
- {
- cands = jalview.util.DBRefUtils.searchRefs(poss, xrf);
- }
- else
- {
- poss = CrossRef.findXDbRefs(dna, poss); //
- cands = jalview.util.DBRefUtils.searchRefs(poss, xrf);
- }
- if (cands != null)
+ // look for direct or indirect references in common
+ DBRefEntry[] poss = nxt.getDBRefs();
+ List<DBRefEntry> cands = null;
+
+ // todo: indirect specifies we select either direct references to nxt
+ // that match xrf which is indirect to sequenceI, or indirect
+ // references to nxt that match xrf which is direct to sequenceI
+ cands = DBRefUtils.searchRefs(poss, xrf);
+ // else
+ // {
+ // poss = DBRefUtils.selectDbRefs(nxt.isProtein()!fromDna, poss);
+ // cands = DBRefUtils.searchRefs(poss, xrf);
+ // }
+ if (!cands.isEmpty())
+ {
+ if (!foundSeqs.contains(nxt))
{
- if (!rseqs.contains(nxt))
+ found = true;
+ foundSeqs.add(nxt);
+ if (mappings != null && !direct)
{
- rseqs.add(nxt);
- boolean foundmap = cf != null;
- // don't search if we aren't given a codon map object
- for (int r = 0; foundmap && r < cands.length; r++)
+ /*
+ * if the matched sequence has mapped dbrefs to
+ * protein product / cdna, add equivalent mappings to
+ * our source sequence
+ */
+ for (DBRefEntry candidate : cands)
{
- if (cands[r].hasMap())
+ Mapping mapping = candidate.getMap();
+ if (mapping != null)
{
- if (cands[r].getMap().getTo() != null
- && cands[r].getMap().getMap().getFromRatio() != cands[r]
- .getMap().getMap().getToRatio())
+ MapList map = mapping.getMap();
+ if (mapping.getTo() != null
+ && map.getFromRatio() != map.getToRatio())
{
- foundmap = true;
- // get sense of map correct for adding to product
- // alignment.
- if (dna)
+ /*
+ * add a mapping, as from dna to peptide sequence
+ */
+ if (map.getFromRatio() == 3)
{
- // map is from dna seq to a protein product
- cf.addMap(sequenceI, nxt, cands[r].getMap()
- .getMap());
+ mappings.addMap(nxt, fromSeq, map);
}
else
{
- // map should be from protein seq to its coding dna
- cf.addMap(nxt, sequenceI, cands[r].getMap()
- .getMap().getInverse());
+ mappings.addMap(nxt, fromSeq, map.getInverse());
}
}
}
}
- // TODO: add mapping between sequences if necessary
- found = true;
}
}
-
}
}
}
}
return found;
}
-
- /**
- * precalculate different products that can be found for seqs in dataset and
- * return them.
- *
- * @param dna
- * @param seqs
- * @param dataset
- * @param fake
- * - don't actually build lists - just get types
- * @return public static Object[] buildXProductsList(boolean dna, SequenceI[]
- * seqs, AlignmentI dataset, boolean fake) { String types[] =
- * jalview.analysis.CrossRef.findSequenceXrefTypes( dna, seqs,
- * dataset); if (types != null) { System.out.println("Xref Types for:
- * "+(dna ? "dna" : "prot")); for (int t = 0; t < types.length; t++) {
- * System.out.println("Type: " + types[t]); SequenceI[] prod =
- * jalview.analysis.CrossRef.findXrefSequences(seqs, dna, types[t]);
- * System.out.println("Found " + ((prod == null) ? "no" : "" +
- * prod.length) + " products"); if (prod!=null) { for (int p=0;
- * p<prod.length; p++) { System.out.println("Prod "+p+":
- * "+prod[p].getDisplayId(true)); } } } } else {
- * System.out.println("Trying getProducts for
- * "+al.getSequenceAt(0).getDisplayId(true));
- * System.out.println("Search DS Xref for: "+(dna ? "dna" : "prot"));
- * // have a bash at finding the products amongst all the retrieved
- * sequences. SequenceI[] prod =
- * jalview.analysis.CrossRef.findXrefSequences(al
- * .getSequencesArray(), dna, null, ds); System.out.println("Found " +
- * ((prod == null) ? "no" : "" + prod.length) + " products"); if
- * (prod!=null) { // select non-equivalent sequences from dataset list
- * for (int p=0; p<prod.length; p++) { System.out.println("Prod "+p+":
- * "+prod[p].getDisplayId(true)); } } } }
- */
}
final private int dnaWidth;
- final private Alignment dataset;
+ final private AlignmentI dataset;
/*
* Working variables for the translation.
{
if (s != null)
{
- id = new String(s.toLowerCase());
+ id = s.toLowerCase();
}
else
{
.indexOf(s.charAt(id.length())) > -1)) : false;
}
}
+
+ /**
+ * toString method returns the wrapped sequence id. For debugging purposes
+ * only, behaviour not guaranteed not to change.
+ */
+ @Override
+ public String toString()
+ {
+ return id;
+ }
}
}
import jalview.util.MapList;
import jalview.util.MappingUtils;
+import java.util.AbstractList;
import java.util.ArrayList;
import java.util.List;
/*
* Data bean to hold mappings from one sequence to another
*/
- private class SequenceToSequenceMapping
+ public class SequenceToSequenceMapping
{
private SequenceI fromSeq;
return String.format("From %s %s", fromSeq.getName(),
mapping.toString());
}
+
+ /**
+ * Returns a hashCode derived from the hashcodes of the mappings and fromSeq
+ *
+ * @see SequenceToSequenceMapping#hashCode()
+ */
+ @Override
+ public int hashCode()
+ {
+ return (fromSeq == null ? 0 : fromSeq.hashCode() * 31)
+ + mapping.hashCode();
+ }
+
+ /**
+ * Answers true if the objects hold the same mapping between the same two
+ * sequences
+ *
+ * @see Mapping#equals
+ */
+ @Override
+ public boolean equals(Object obj)
+ {
+ if (!(obj instanceof SequenceToSequenceMapping))
+ {
+ return false;
+ }
+ SequenceToSequenceMapping that = (SequenceToSequenceMapping) obj;
+ if (this.mapping == null)
+ {
+ return that.mapping == null;
+ }
+ // TODO: can simplify by asserting fromSeq is a dataset sequence
+ return (this.fromSeq == that.fromSeq || (this.fromSeq != null
+ && that.fromSeq != null
+ && this.fromSeq.getDatasetSequence() != null && this.fromSeq
+ .getDatasetSequence() == that.fromSeq
+ .getDatasetSequence())) && this.mapping.equals(that.mapping);
+ }
+
+ public SequenceI getFromSeq()
+ {
+ return fromSeq;
+ }
+
+ public Mapping getMapping()
+ {
+ return mapping;
+ }
}
private List<SequenceToSequenceMapping> mappings;
*/
public void addMap(SequenceI dnaseq, SequenceI aaseq, MapList map)
{
+ addMap(dnaseq, aaseq, map, null);
+ }
+
+ /**
+ * Adds a mapping between the dataset sequences for the associated dna and
+ * protein sequence objects
+ *
+ * @param dnaseq
+ * @param aaseq
+ * @param map
+ * @param mapFromId
+ */
+ public void addMap(SequenceI dnaseq, SequenceI aaseq, MapList map,
+ String mapFromId)
+ {
// JBPNote DEBUG! THIS !
// dnaseq.transferAnnotation(aaseq, mp);
// aaseq.transferAnnotation(dnaseq, new Mapping(map.getInverse()));
/*
* if we already hold a mapping between these sequences, just add to it
+ * note that 'adding' a duplicate map does nothing; this protects against
+ * creating duplicate mappings in AlignedCodonFrame
*/
for (SequenceToSequenceMapping ssm : mappings)
{
* otherwise, add a new sequence mapping
*/
Mapping mp = new Mapping(toSeq, map);
+ mp.setMappedFromId(mapFromId);
mappings.add(new SequenceToSequenceMapping(fromSeq, mp));
}
for (SequenceToSequenceMapping ssm : mappings)
{
- if (ssm.mapping.to == protein)
+ if (ssm.mapping.to == protein
+ && ssm.mapping.getMap().getFromRatio() == 3)
{
ml = ssm.mapping.map;
dnaSeq = ssm.fromSeq;
}
/**
- * Returns the first mapping found that is from 'fromSeq' to 'toSeq', or null
+ * Returns the first mapping found that is between 'fromSeq' and 'toSeq', or null
* if none found
*
* @param fromSeq
*/
public Mapping getMappingBetween(SequenceI fromSeq, SequenceI toSeq)
{
+ SequenceI dssFrom = fromSeq.getDatasetSequence() == null ? fromSeq
+ : fromSeq.getDatasetSequence();
+ SequenceI dssTo = toSeq.getDatasetSequence() == null ? toSeq : toSeq
+ .getDatasetSequence();
+
for (SequenceToSequenceMapping mapping : mappings)
{
SequenceI from = mapping.fromSeq;
SequenceI to = mapping.mapping.to;
- if ((from == fromSeq || from == fromSeq.getDatasetSequence())
- && (to == toSeq || to == toSeq.getDatasetSequence()))
+ if ((from == dssFrom && to == dssTo)
+ || (from == dssTo && to == dssFrom))
{
return mapping.mapping;
}
}
return null;
}
+
+ /**
+ * Returns a hashcode derived from the list of sequence mappings
+ *
+ * @see SequenceToSequenceMapping#hashCode()
+ * @see AbstractList#hashCode()
+ */
+ @Override
+ public int hashCode()
+ {
+ return this.mappings.hashCode();
+ }
+
+ /**
+ * Two AlignedCodonFrame objects are equal if they hold the same ordered list
+ * of mappings
+ *
+ * @see SequenceToSequenceMapping#
+ */
+ @Override
+ public boolean equals(Object obj)
+ {
+ if (!(obj instanceof AlignedCodonFrame))
+ {
+ return false;
+ }
+ return this.mappings.equals(((AlignedCodonFrame) obj).mappings);
+ }
+
+ public List<SequenceToSequenceMapping> getMappings()
+ {
+ return mappings;
+ }
}
import java.util.Enumeration;
import java.util.HashSet;
import java.util.Hashtable;
-import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
*/
public class Alignment implements AlignmentI
{
- protected Alignment dataset;
+ private Alignment dataset;
protected List<SequenceI> sequences;
/*
* Share the same dataset sequence mappings (if any).
*/
- this.setCodonFrames(al.getCodonFrames());
+ if (dataset == null && al.getDataset() == null)
+ {
+ this.setCodonFrames(al.getCodonFrames());
+ }
}
/**
}
@Override
- public void setDataset(Alignment data)
+ public void setDataset(AlignmentI data)
{
if (dataset == null && data == null)
{
}
else if (dataset == null && data != null)
{
- dataset = data;
+ if (!(data instanceof Alignment))
+ {
+ throw new Error(
+ "Implementation Error: jalview.datamodel.Alignment does not yet support other implementations of AlignmentI as its dataset reference");
+ }
+ dataset = (Alignment) data;
for (int i = 0; i < getHeight(); i++)
{
SequenceI currentSeq = getSequenceAt(i);
}
}
- /**
- * adds a set of mappings (while ignoring any duplicates)
- */
- @Override
- public void addCodonFrames(Iterable<AlignedCodonFrame> codons)
- {
- if (codons != null)
- {
- Iterator<AlignedCodonFrame> it = codons.iterator();
- while (it.hasNext())
- {
- addCodonFrame(it.next());
- }
- }
- }
-
/*
* (non-Javadoc)
*
@Override
public List<AlignedCodonFrame> getCodonFrames()
{
+ // TODO: Fix this method to fix failing AlignedCodonFrame tests
+ // this behaviour is currently incorrect. method should return codon frames
+ // for just the alignment,
+ // selected from dataset
return dataset != null ? dataset.getCodonFrames() : codonFrameList;
}
@Override
public void append(AlignmentI toappend)
{
- if (toappend == this)
- {
- System.err.println("Self append may cause a deadlock.");
- }
- // TODO test this method for a future 2.5 release
+ // TODO JAL-1270 needs test coverage
// currently tested for use in jalview.gui.SequenceFetcher
boolean samegap = toappend.getGapCharacter() == getGapCharacter();
char oldc = toappend.getGapCharacter();
.getFullAlignment().getSequences() : toappend.getSequences();
if (sqs != null)
{
+ // avoid self append deadlock by
+ List<SequenceI> toappendsq = new ArrayList<SequenceI>();
synchronized (sqs)
{
for (SequenceI addedsq : sqs)
}
}
}
- addSequence(addedsq);
+ toappendsq.add(addedsq);
}
}
+ for (SequenceI addedsq : toappendsq)
+ {
+ addSequence(addedsq);
+ }
}
AlignmentAnnotation[] alan = toappend.getAlignmentAnnotation();
for (int a = 0; alan != null && a < alan.length; a++)
addAnnotation(alan[a]);
}
+ // use add method
getCodonFrames().addAll(toappend.getCodonFrames());
List<SequenceGroup> sg = toappend.getGroups();
* Parameters control whether gaps in exon (mapped) and intron (unmapped)
* regions are preserved. Gaps that connect introns to exons are treated
* conservatively, i.e. only preserved if both intron and exon gaps are
- * preserved.
+ * preserved. TODO: check caveats below where the implementation fails
*
* @param al
+ * - must have same dataset, and sequences in al must have equivalent
+ * dataset sequence and start/end bounds under given mapping
* @param preserveMappedGaps
* if true, gaps within and between mapped codons are preserved
* @param preserveUnmappedGaps
{
return AlignmentUtils.alignProteinAsDna(this, al);
}
+ else if (thatIsProtein && thisIsNucleotide)
+ {
+ return AlignmentUtils.alignCdsAsProtein(this, al);
+ }
return AlignmentUtils.alignAs(this, al);
}
* @return Alignment containing dataset sequences or null of this is a
* dataset.
*/
- Alignment getDataset();
+ AlignmentI getDataset();
/**
* Set the associated dataset for the alignment, or create one.
* @param dataset
* The dataset alignment or null to construct one.
*/
- void setDataset(Alignment dataset);
+ void setDataset(AlignmentI dataset);
/**
* pads sequences with gaps (to ensure the set looks like an alignment)
void addCodonFrame(AlignedCodonFrame codons);
/**
- * add a set of aligned codons mappings for this alignment, apart from any
- * duplicates which are ignored
- *
- * @param codons
- */
- void addCodonFrames(Iterable<AlignedCodonFrame> codons);
-
- /**
* remove a particular codon frame reference from this alignment
*
* @param codons
* otherwise the versions have to match
*/
String otherVersion = other.getVersion();
+
if ((version == null || version.equals("0") || version.endsWith(":0"))
&& otherVersion != null)
{
}
else
{
- if (!version.equalsIgnoreCase(otherVersion))
+ if (version != null
+ && (otherVersion == null || !version
+ .equalsIgnoreCase(otherVersion)))
{
return false;
}
/**
* Defines internal constants for unambiguous annotation of DbRefEntry source
* strings and describing the data retrieved from external database sources (see
- * jalview.ws.DbSourcProxy)
+ * jalview.ws.DbSourcProxy) <br/>
+ * TODO: replace with ontology to allow recognition of particular attributes
+ * (e.g. protein coding, alignment (ortholog db, paralog db, domain db),
+ * genomic, transcriptomic, 3D structure providing (PDB, MODBASE, etc) ..).
*
* @author JimP
*
*/
package jalview.datamodel;
+import jalview.util.Comparison;
import jalview.util.MapList;
import java.util.Iterator;
int truePos = sequencePos - (start - 1);
while (alignedBases < truePos && alignedColumn < alignedSeq.length)
{
- if (alignedSeq[alignedColumn++] != gap)
+ char c = alignedSeq[alignedColumn++];
+ if (c != gap && !Comparison.isGap(c))
{
alignedBases++;
}
}
- /**
+ /*
* Contains the start-end pairs mapping from the associated sequence to the
* sequence in the database coordinate system. It also takes care of step
* difference between coordinate systems.
*/
MapList map = null;
- /**
+ /*
* The sequence that map maps the associated sequence to (if any).
*/
SequenceI to = null;
+ /*
+ * optional sequence id for the 'from' ranges
+ */
+ private String mappedFromId;
+
public Mapping(MapList map)
{
super();
map = new MapList(map2.map);
}
to = map2.to;
+ mappedFromId = map2.mappedFromId;
}
}
/**
* Equals that compares both the to references and MapList mappings.
*
- * @param other
+ * @param o
* @return
+ * @see MapList#equals
*/
@Override
public boolean equals(Object o)
{
- // TODO should override Object.hashCode() to ensure that equal objects have
- // equal hashcodes
if (o == null || !(o instanceof Mapping))
{
return false;
}
/**
+ * Returns a hashCode made from the sequence and maplist
+ */
+ @Override
+ public int hashCode()
+ {
+ int hashCode = (this.to == null ? 1 : this.to.hashCode());
+ if (this.map != null)
+ {
+ hashCode = hashCode * 31 + this.map.hashCode();
+ }
+
+ return hashCode;
+ }
+
+ /**
* get the 'initial' position in the associated sequence for a position in the
* mapped reference frame
*
: this.to.getName());
}
+ /**
+ * Returns the identifier for the 'from' range sequence, or null if not set
+ *
+ * @return
+ */
+ public String getMappedFromId()
+ {
+ return mappedFromId;
+ }
+
+ /**
+ * Sets the identifier for the 'from' range sequence
+ */
+ public void setMappedFromId(String mappedFromId)
+ {
+ this.mappedFromId = mappedFromId;
+ }
+
}
return new Sequence(this);
}
+ private boolean _isNa;
+
+ private long _seqhash = 0;
+
+ @Override
+ public boolean isProtein()
+ {
+ if (datasetSequence != null)
+ {
+ return datasetSequence.isProtein();
+ }
+ if (_seqhash != sequence.hashCode())
+ {
+ _seqhash = sequence.hashCode();
+ _isNa=jalview.util.Comparison.isNucleotide(new SequenceI[] { this });
+ }
+ return !_isNa;
+ };
+
/*
* (non-Javadoc)
*
public int[] findPositionMap();
/**
+ *
+ * @return true if sequence is composed of amino acid characters
+ */
+ public boolean isProtein();
+
+ /**
* Delete a range of aligned sequence columns, creating a new dataset sequence
* if necessary and adjusting start and end positions accordingly.
*
* DOCUMENT ME!
*
* @param i
- * DOCUMENT ME!
+ * alignment column number
* @param c
- * DOCUMENT ME!
+ * character to insert
*/
public void insertCharAt(int i, char c);
/**
- * DOCUMENT ME!
+ * insert given character at alignment column position
*
* @param position
- * DOCUMENT ME!
+ * alignment column number
+ * @param count
+ * length of insert
* @param ch
- * DOCUMENT ME!
+ * character to insert
*/
public void insertCharAt(int position, int count, char ch);
*/
public SequenceI getSequence(String sourceDb, List<SequenceI> peptides)
{
- SequenceI dna = new Sequence(sourceDb + "|" + accession,
- sequence.getSequence());
+ SequenceI dna = makeSequence(sourceDb);
dna.setDescription(description);
DBRefEntry retrievedref = new DBRefEntry(sourceDb,
getSequenceVersion(), accession);
dna.addDBRef(retrievedref);
+ dna.setSourceDBRef(retrievedref);
// add map to indicate the sequence is a valid coordinate frame for the
// dbref
retrievedref.setMap(new Mapping(null, new int[] { 1, dna.getLength() },
new int[] { 1, dna.getLength() }, 1, 1));
+
+ /*
+ * transform EMBL Database refs to canonical form
+ */
if (dbRefs != null)
{
for (DBRefEntry dbref : dbRefs)
{
+ dbref.setSource(DBRefUtils.getCanonicalName(dbref.getSource()));
dna.addDBRef(dbref);
}
}
{
for (EmblFeature feature : features)
{
- if (feature.dbRefs != null)
- {
- for (DBRefEntry dbref : feature.dbRefs)
- {
- dna.addDBRef(dbref);
- }
- }
if (FeatureProperties.isCodingFeature(sourceDb, feature.getName()))
{
parseCodingFeature(feature, sourceDb, dna, peptides, matcher);
}
/**
+ * @param sourceDb
+ * @return
+ */
+ SequenceI makeSequence(String sourceDb)
+ {
+ SequenceI dna = new Sequence(sourceDb + "|" + accession,
+ sequence.getSequence());
+ return dna;
+ }
+
+ /**
* Extracts coding region and product from a CDS feature and properly decorate
* it with annotations.
*
* parent dna sequence for this record
* @param peptides
* list of protein product sequences for Embl entry
+ * @param matcher
+ * helper to match xrefs in already retrieved sequences
*/
void parseCodingFeature(EmblFeature feature, String sourceDb,
SequenceI dna, List<SequenceI> peptides, SequenceIdMatcher matcher)
{
boolean isEmblCdna = sourceDb.equals(DBRefSource.EMBLCDS);
- int[] exon = getCdsRanges(feature);
+ int[] exons = getCdsRanges(feature);
- String prseq = null;
- String prname = "";
- String prid = null;
+ String translation = null;
+ String proteinName = "";
+ String proteinId = null;
Map<String, String> vals = new Hashtable<String, String>();
/*
if (qname.equals("translation"))
{
// remove all spaces (precompiled String.replaceAll(" ", ""))
- prseq = SPACE_PATTERN.matcher(q.getValues()[0]).replaceAll("");
+ translation = SPACE_PATTERN.matcher(q.getValues()[0]).replaceAll("");
}
else if (qname.equals("protein_id"))
{
- prid = q.getValues()[0].trim();
+ proteinId = q.getValues()[0].trim();
}
else if (qname.equals("codon_start"))
{
else if (qname.equals("product"))
{
// sometimes name is returned e.g. for V00488
- prname = q.getValues()[0].trim();
+ proteinName = q.getValues()[0].trim();
}
else
{
}
}
- DBRefEntry protEMBLCDS = null;
- exon = MappingUtils.removeStartPositions(codonStart - 1, exon);
- boolean noProteinDbref = true;
+ DBRefEntry proteinToEmblProteinRef = null;
+ exons = MappingUtils.removeStartPositions(codonStart - 1, exons);
SequenceI product = null;
- Mapping map = null;
- if (prseq != null && prname != null && prid != null)
+ Mapping dnaToProteinMapping = null;
+ if (translation != null && proteinName != null && proteinId != null)
{
+ int translationLength = translation.length();
+
/*
* look for product in peptides list, if not found, add it
*/
- product = matcher.findIdMatch(prid);
+ product = matcher.findIdMatch(proteinId);
if (product == null)
{
- product = new Sequence(prid, prseq, 1, prseq.length());
- product.setDescription(((prname.length() == 0) ? "Protein Product from "
+ product = new Sequence(proteinId, translation, 1, translationLength);
+ product.setDescription(((proteinName.length() == 0) ? "Protein Product from "
+ sourceDb
- : prname));
+ : proteinName));
peptides.add(product);
matcher.add(product);
}
// we have everything - create the mapping and perhaps the protein
// sequence
- if (exon == null || exon.length == 0)
+ if (exons == null || exons.length == 0)
{
+ /*
+ * workaround until we handle dna location for CDS sequence
+ * e.g. location="X53828.1:60..1058" correctly
+ */
System.err
.println("Implementation Notice: EMBLCDS records not properly supported yet - Making up the CDNA region of this sequence... may be incorrect ("
+ sourceDb + ":" + getAccession() + ")");
- if (prseq.length() * 3 == (1 - codonStart + dna.getSequence().length))
+ if (translationLength * 3 == (1 - codonStart + dna.getSequence().length))
{
System.err
.println("Not allowing for additional stop codon at end of cDNA fragment... !");
- // this might occur for CDS sequences where no features are
- // marked.
- exon = new int[] { dna.getStart() + (codonStart - 1),
+ // this might occur for CDS sequences where no features are marked
+ exons = new int[] { dna.getStart() + (codonStart - 1),
dna.getEnd() };
- map = new Mapping(product, exon, new int[] { 1, prseq.length() },
- 3, 1);
+ dnaToProteinMapping = new Mapping(product, exons, new int[] { 1,
+ translationLength }, 3, 1);
}
- if ((prseq.length() + 1) * 3 == (1 - codonStart + dna.getSequence().length))
+ if ((translationLength + 1) * 3 == (1 - codonStart + dna
+ .getSequence().length))
{
System.err
.println("Allowing for additional stop codon at end of cDNA fragment... will probably cause an error in VAMSAs!");
- exon = new int[] { dna.getStart() + (codonStart - 1),
+ exons = new int[] { dna.getStart() + (codonStart - 1),
dna.getEnd() - 3 };
- map = new Mapping(product, exon, new int[] { 1, prseq.length() },
- 3, 1);
+ dnaToProteinMapping = new Mapping(product, exons, new int[] { 1,
+ translationLength }, 3, 1);
}
}
else
else
{
// final product length truncation check
- // TODO should from range include stop codon even if not in protein
- // in order to include stop codon in CDS sequence (as done for
- // Ensembl)?
- int[] cdsRanges = adjustForProteinLength(prseq.length(), exon);
- map = new Mapping(product, cdsRanges, new int[] { 1,
- prseq.length() }, 3, 1);
- // reconstruct the EMBLCDS entry
- // TODO: this is only necessary when there codon annotation is
- // complete (I think JBPNote)
- DBRefEntry pcdnaref = new DBRefEntry();
- pcdnaref.setAccessionId(prid);
- pcdnaref.setSource(DBRefSource.EMBLCDS);
- pcdnaref.setVersion(getSequenceVersion()); // same as parent EMBL
- // version.
- MapList mp = new MapList(new int[] { 1, prseq.length() },
- new int[] { 1 + (codonStart - 1),
- (codonStart - 1) + 3 * prseq.length() }, 1, 3);
- pcdnaref.setMap(new Mapping(mp));
+ int[] cdsRanges = adjustForProteinLength(translationLength, exons);
+ dnaToProteinMapping = new Mapping(product, cdsRanges, new int[] {
+ 1, translationLength }, 3, 1);
if (product != null)
{
- product.addDBRef(pcdnaref);
- protEMBLCDS = new DBRefEntry(pcdnaref);
- protEMBLCDS.setSource(DBRefSource.EMBLCDSProduct);
- product.addDBRef(protEMBLCDS);
+ /*
+ * make xref with mapping from protein to EMBL dna
+ */
+ DBRefEntry proteinToEmblRef = new DBRefEntry(DBRefSource.EMBL,
+ getSequenceVersion(), proteinId, new Mapping(
+ dnaToProteinMapping.getMap().getInverse()));
+ product.addDBRef(proteinToEmblRef);
+
+ /*
+ * make xref from protein to EMBLCDS; we assume here that the
+ * CDS sequence version is same as dna sequence (?!)
+ */
+ MapList proteinToCdsMapList = new MapList(new int[] { 1,
+ translationLength }, new int[] { 1 + (codonStart - 1),
+ (codonStart - 1) + 3 * translationLength }, 1, 3);
+ DBRefEntry proteinToEmblCdsRef = new DBRefEntry(
+ DBRefSource.EMBLCDS, getSequenceVersion(), proteinId,
+ new Mapping(proteinToCdsMapList));
+ product.addDBRef(proteinToEmblCdsRef);
+
+ /*
+ * make 'direct' xref from protein to EMBLCDSPROTEIN
+ */
+ proteinToEmblProteinRef = new DBRefEntry(proteinToEmblCdsRef);
+ proteinToEmblProteinRef.setSource(DBRefSource.EMBLCDSProduct);
+ proteinToEmblProteinRef.setMap(null);
+ product.addDBRef(proteinToEmblProteinRef);
}
}
}
- // add cds feature to dna seq - this may include the stop codon
- for (int xint = 0; exon != null && xint < exon.length; xint += 2)
+
+ /*
+ * add cds features to dna sequence
+ */
+ for (int xint = 0; exons != null && xint < exons.length; xint += 2)
{
- SequenceFeature sf = makeCdsFeature(exon, xint, prname, prid, vals,
- codonStart);
+ SequenceFeature sf = makeCdsFeature(exons, xint, proteinName,
+ proteinId, vals, codonStart);
sf.setType(feature.getName()); // "CDS"
sf.setEnaLocation(feature.getLocation());
sf.setFeatureGroup(sourceDb);
}
/*
- * add dbRefs to sequence, and mappings for Uniprot xrefs
+ * add feature dbRefs to sequence, and mappings for Uniprot xrefs
*/
+ boolean hasUniprotDbref = false;
if (feature.dbRefs != null)
{
boolean mappingUsed = false;
for (DBRefEntry ref : feature.dbRefs)
{
- ref.setSource(DBRefUtils.getCanonicalName(ref.getSource()));
- if (ref.getSource().equals(DBRefSource.UNIPROT))
+ /*
+ * ensure UniProtKB/Swiss-Prot converted to UNIPROT
+ */
+ String source = DBRefUtils.getCanonicalName(ref.getSource());
+ ref.setSource(source);
+ DBRefEntry proteinDbRef = new DBRefEntry(ref.getSource(), ref.getVersion(), ref
+ .getAccessionId());
+ if (source.equals(DBRefSource.UNIPROT))
{
String proteinSeqName = DBRefSource.UNIPROT + "|"
+ ref.getAccessionId();
- if (map != null && map.getTo() != null)
+ if (dnaToProteinMapping != null && dnaToProteinMapping.getTo() != null)
{
if (mappingUsed)
{
* two or more Uniprot xrefs for the same CDS -
* each needs a distinct Mapping (as to a different sequence)
*/
- map = new Mapping(map);
+ dnaToProteinMapping = new Mapping(dnaToProteinMapping);
}
mappingUsed = true;
/*
* try to locate the protein mapped to (possibly by a
- * previous CDS feature)
+ * previous CDS feature); if not found, construct it from
+ * the EMBL translation
*/
SequenceI proteinSeq = matcher.findIdMatch(proteinSeqName);
if (proteinSeq == null)
matcher.add(proteinSeq);
peptides.add(proteinSeq);
}
- map.setTo(proteinSeq);
- map.getTo().addDBRef(
- new DBRefEntry(ref.getSource(), ref.getVersion(), ref
- .getAccessionId()));
- ref.setMap(map);
+ dnaToProteinMapping.setTo(proteinSeq);
+ dnaToProteinMapping.setMappedFromId(proteinId);
+ proteinSeq.addDBRef(proteinDbRef);
+ proteinSeq.setSourceDBRef(proteinDbRef);
+ ref.setMap(dnaToProteinMapping);
}
- noProteinDbref = false;
+ hasUniprotDbref = true;
}
if (product != null)
{
- DBRefEntry pref = new DBRefEntry(ref.getSource(),
- ref.getVersion(), ref.getAccessionId());
+ /*
+ * copy feature dbref to our protein product
+ */
+ DBRefEntry pref = proteinDbRef;
pref.setMap(null); // reference is direct
product.addDBRef(pref);
// Add converse mapping reference
- if (map != null)
+ if (dnaToProteinMapping != null)
{
- Mapping pmap = new Mapping(dna, map.getMap().getInverse());
+ Mapping pmap = new Mapping(dna, dnaToProteinMapping.getMap()
+ .getInverse());
pref = new DBRefEntry(sourceDb, getSequenceVersion(),
this.getAccession());
pref.setMap(pmap);
- if (map.getTo() != null)
+ if (dnaToProteinMapping.getTo() != null)
{
- map.getTo().addDBRef(pref);
+ dnaToProteinMapping.getTo().addDBRef(pref);
}
}
}
dna.addDBRef(ref);
}
- if (noProteinDbref && product != null)
+ }
+
+ /*
+ * if we have a product (translation) but no explicit Uniprot dbref
+ * (example: EMBL AAFI02000057 protein_id EAL65544.1)
+ * then construct mappings to an assumed EMBLCDSPROTEIN accession
+ */
+ if (!hasUniprotDbref && product != null)
+ {
+ if (proteinToEmblProteinRef == null)
{
- // add protein coding reference to dna sequence so xref matches
- if (protEMBLCDS == null)
- {
- protEMBLCDS = new DBRefEntry();
- protEMBLCDS.setAccessionId(prid);
- protEMBLCDS.setSource(DBRefSource.EMBLCDSProduct);
- protEMBLCDS.setVersion(getSequenceVersion());
- protEMBLCDS
- .setMap(new Mapping(product, map.getMap().getInverse()));
- }
- product.addDBRef(protEMBLCDS);
+ // assuming CDSPROTEIN sequence version = dna version (?!)
+ proteinToEmblProteinRef = new DBRefEntry(
+ DBRefSource.EMBLCDSProduct, getSequenceVersion(), proteinId);
+ }
+ product.addDBRef(proteinToEmblProteinRef);
+ product.setSourceDBRef(proteinToEmblProteinRef);
- // Add converse mapping reference
- if (map != null)
- {
- Mapping pmap = new Mapping(product, protEMBLCDS.getMap().getMap()
- .getInverse());
- DBRefEntry ncMap = new DBRefEntry(protEMBLCDS);
- ncMap.setMap(pmap);
- if (map.getTo() != null)
- {
- dna.addDBRef(ncMap);
- }
- }
+ if (dnaToProteinMapping != null
+ && dnaToProteinMapping.getTo() != null)
+ {
+ DBRefEntry dnaToEmblProteinRef = new DBRefEntry(
+ DBRefSource.EMBLCDSProduct, getSequenceVersion(), proteinId);
+ dnaToEmblProteinRef.setMap(dnaToProteinMapping);
+ dnaToProteinMapping.setMappedFromId(proteinId);
+ dna.addDBRef(dnaToEmblProteinRef);
}
}
}
}
/**
- * truncate the last exon interval to the prlength'th codon
+ * Truncates (if necessary) the exon intervals to match 3 times the length of
+ * the protein; also accepts 3 bases longer (for stop codon not included in
+ * protein)
*
- * @param prlength
+ * @param proteinLength
* @param exon
- * @return new exon
+ * an array of [start, end, start, end...] intervals
+ * @return the same array (if unchanged) or a truncated copy
*/
- static int[] adjustForProteinLength(int prlength, int[] exon)
+ static int[] adjustForProteinLength(int proteinLength, int[] exon)
{
- if (prlength <= 0 || exon == null)
+ if (proteinLength <= 0 || exon == null)
{
return exon;
}
- int desiredCdsLength = prlength * 3;
+ int expectedCdsLength = proteinLength * 3;
int exonLength = MappingUtils.getLength(Arrays.asList(exon));
/*
- * assuming here exon might include stop codon in addition to protein codons
+ * if exon length matches protein, or is shorter, or longer by the
+ * length of a stop codon (3 bases), then leave it unchanged
*/
- if (desiredCdsLength == exonLength
- || desiredCdsLength == exonLength - 3)
+ if (expectedCdsLength >= exonLength
+ || expectedCdsLength == exonLength - 3)
{
return exon;
}
for (int x = 0; x < exon.length; x += 2)
{
cdspos += Math.abs(exon[x + 1] - exon[x]) + 1;
- if (desiredCdsLength <= cdspos)
+ if (expectedCdsLength <= cdspos)
{
// advanced beyond last codon.
sxpos = x;
- if (desiredCdsLength != cdspos)
+ if (expectedCdsLength != cdspos)
{
// System.err
// .println("Truncating final exon interval on region by "
*/
if (exon[x + 1] >= exon[x])
{
- endxon = exon[x + 1] - cdspos + desiredCdsLength;
+ endxon = exon[x + 1] - cdspos + expectedCdsLength;
}
else
{
- endxon = exon[x + 1] + cdspos - desiredCdsLength;
+ endxon = exon[x + 1] + cdspos - expectedCdsLength;
}
break;
}
import jalview.datamodel.Alignment;
import jalview.datamodel.AlignmentI;
import jalview.datamodel.DBRefEntry;
-import jalview.datamodel.DBRefSource;
import jalview.datamodel.Mapping;
import jalview.datamodel.SequenceFeature;
import jalview.datamodel.SequenceI;
for (DBRefEntry xref : xrefs)
{
seq.addDBRef(xref);
- /*
- * Save any Uniprot xref to be the reference for SIFTS mapping
- */
- if (DBRefSource.UNIPROT.equals(xref.getSource()))
- {
- seq.setSourceDBRef(xref);
- }
}
/*
DBRefEntry self = new DBRefEntry(getDbSource(),
getEnsemblDataVersion(), seq.getName());
seq.addDBRef(self);
+ seq.setSourceDBRef(self);
}
/**
if (ids.contains(name)
|| ids.contains(name.replace("ENSP", "ENST")))
{
- DBRefUtils.parseToDbRef(sq, getDbSource(),
+ DBRefEntry dbref = DBRefUtils.parseToDbRef(sq, getDbSource(),
getEnsemblDataVersion(), name);
+ sq.setSourceDBRef(dbref);
}
}
if (alignment == null)
}
/**
- * Searches selected sequences for xRef products and builds the Show
- * Cross-References menu (formerly called Show Products)
+ * Searches the alignment sequences for xRefs and builds the Show
+ * Cross-References menu (formerly called Show Products), with database
+ * sources for which cross-references are found (protein sources for a
+ * nucleotide alignment and vice versa)
*
- * @return true if Show Cross-references menu should be enabled.
+ * @return true if Show Cross-references menu should be enabled
*/
public boolean canShowProducts()
{
- SequenceI[] selection = viewport.getSequenceSelection();
+ SequenceI[] seqs = viewport.getAlignment().getSequencesArray();
AlignmentI dataset = viewport.getAlignment().getDataset();
boolean showp = false;
try
{
showProducts.removeAll();
final boolean dna = viewport.getAlignment().isNucleotide();
- String[] ptypes = (selection == null || selection.length == 0) ? null
- : CrossRef.findSequenceXrefTypes(dna, selection, dataset);
+ List<String> ptypes = (seqs == null || seqs.length == 0) ? null
+ : new CrossRef(seqs, dataset)
+ .findXrefSourcesForSequences(dna);
- for (int t = 0; ptypes != null && t < ptypes.length; t++)
+ for (final String source : ptypes)
{
showp = true;
final AlignFrame af = this;
- final String source = ptypes[t];
- JMenuItem xtype = new JMenuItem(ptypes[t]);
+ JMenuItem xtype = new JMenuItem(source);
xtype.addActionListener(new ActionListener()
{
-
@Override
public void actionPerformed(ActionEvent e)
{
showProductsFor(af.viewport.getSequenceSelection(), dna, source);
}
-
});
showProducts.add(xtype);
}
showProducts.setEnabled(showp);
} catch (Exception e)
{
- jalview.bin.Cache.log
+ Cache.log
.warn("canShowProducts threw an exception - please report to help@jalview.org",
e);
return false;
* @param source
* the database to show cross-references for
*/
- protected void showProductsFor(final SequenceI[] sel, final boolean dna,
+ protected void showProductsFor(final SequenceI[] sel, final boolean _odna,
final String source)
{
Runnable foo = new Runnable()
{
AlignmentI alignment = AlignFrame.this.getViewport()
.getAlignment();
- AlignmentI xrefs = CrossRef.findXrefSequences(sel, dna, source,
- alignment);
- if (xrefs != null)
+ AlignmentI dataset = alignment.getDataset() == null ? alignment
+ : alignment.getDataset();
+ boolean dna = alignment.isNucleotide();
+ if (_odna!=dna)
{
- /*
- * get display scheme (if any) to apply to features
- */
- FeatureSettingsModelI featureColourScheme = new SequenceFetcher()
- .getFeatureColourScheme(source);
+ System.err
+ .println("Conflict: showProducts for alignment originally "
+ + "thought to be "
+ + (_odna ? "DNA" : "Protein")
+ + " now searching for "
+ + (dna ? "DNA" : "Protein") + " Context.");
+ }
+ AlignmentI xrefs = new CrossRef(sel, dataset)
+ .findXrefSequences(source, dna);
+ if (xrefs == null)
+ {
+ return;
+ }
+ /*
+ * get display scheme (if any) to apply to features
+ */
+ FeatureSettingsModelI featureColourScheme = new SequenceFetcher()
+ .getFeatureColourScheme(source);
- AlignmentI al = makeCrossReferencesAlignment(
- alignment.getDataset(), xrefs);
+ AlignmentI xrefsAlignment = makeCrossReferencesAlignment(dataset,
+ xrefs);
+ if (!dna)
+ {
+ xrefsAlignment = AlignmentUtils.makeCdsAlignment(
+ xrefsAlignment.getSequencesArray(), dataset, sel);
+ xrefsAlignment.alignAs(alignment);
+ }
+
+ AlignFrame newFrame = new AlignFrame(xrefsAlignment, DEFAULT_WIDTH,
+ DEFAULT_HEIGHT);
+ if (Cache.getDefault("HIDE_INTRONS", true))
+ {
+ newFrame.hideFeatureColumns(SequenceOntologyI.EXON, false);
+ }
+ String newtitle = String.format("%s %s %s", MessageManager
+ .getString(dna ? "label.proteins" : "label.nucleotides"),
+ MessageManager.getString("label.for"), getTitle());
+ newFrame.setTitle(newtitle);
- AlignFrame newFrame = new AlignFrame(al, DEFAULT_WIDTH,
+ if (!Cache.getDefault(Preferences.ENABLE_SPLIT_FRAME, true))
+ {
+ /*
+ * split frame display is turned off in preferences file
+ */
+ Desktop.addInternalFrame(newFrame, newtitle, DEFAULT_WIDTH,
DEFAULT_HEIGHT);
- if (Cache.getDefault("HIDE_INTRONS", true))
- {
- newFrame.hideFeatureColumns(SequenceOntologyI.EXON, false);
- }
- String newtitle = String.format("%s %s %s",
- MessageManager.getString(dna ? "label.proteins"
- : "label.nucleotides"), MessageManager
- .getString("label.for"), getTitle());
- newFrame.setTitle(newtitle);
+ return; // via finally clause
+ }
- if (!Cache.getDefault(Preferences.ENABLE_SPLIT_FRAME, true))
+ /*
+ * Make a copy of this alignment (sharing the same dataset
+ * sequences). If we are DNA, drop introns and update mappings
+ */
+ AlignmentI copyAlignment = null;
+ boolean copyAlignmentIsAligned = false;
+ if (dna)
+ {
+ copyAlignment = AlignmentUtils.makeCdsAlignment(sel, dataset,
+ xrefsAlignment.getSequencesArray());
+ if (copyAlignment.getHeight() == 0)
{
- /*
- * split frame display is turned off in preferences file
- */
- Desktop.addInternalFrame(newFrame, newtitle, DEFAULT_WIDTH,
- DEFAULT_HEIGHT);
- return; // via finally clause
+ System.err.println("Failed to make CDS alignment");
}
/*
- * Make a copy of this alignment (sharing the same dataset
- * sequences). If we are DNA, drop introns and update mappings
+ * pending getting Embl transcripts to 'align',
+ * we are only doing this for Ensembl
*/
- AlignmentI copyAlignment = null;
- final SequenceI[] sequenceSelection = AlignFrame.this.viewport
- .getSequenceSelection();
- List<AlignedCodonFrame> cf = xrefs.getCodonFrames();
- boolean copyAlignmentIsAligned = false;
- if (dna)
+ // TODO proper criteria for 'can align as cdna'
+ if (DBRefSource.ENSEMBL.equalsIgnoreCase(source)
+ || AlignmentUtils.looksLikeEnsembl(alignment))
{
- copyAlignment = AlignmentUtils.makeCdsAlignment(
- sequenceSelection, cf, alignment);
- if (copyAlignment.getHeight() == 0)
- {
- System.err.println("Failed to make CDS alignment");
- }
- al.getCodonFrames().clear();
- al.addCodonFrames(copyAlignment.getCodonFrames());
- al.addCodonFrames(cf);
-
- /*
- * pending getting Embl transcripts to 'align',
- * we are only doing this for Ensembl
- */
- // TODO proper criteria for 'can align as cdna'
- if (DBRefSource.ENSEMBL.equalsIgnoreCase(source)
- || AlignmentUtils.looksLikeEnsembl(alignment))
- {
- copyAlignment.alignAs(alignment);
- copyAlignmentIsAligned = true;
- }
+ copyAlignment.alignAs(alignment);
+ copyAlignmentIsAligned = true;
}
- else
- {
- copyAlignment = AlignmentUtils.makeCopyAlignment(
- sequenceSelection, xrefs.getSequencesArray());
- copyAlignment.addCodonFrames(cf);
- al.addCodonFrames(copyAlignment.getCodonFrames());
- al.addCodonFrames(cf);
- }
- copyAlignment.setGapCharacter(AlignFrame.this.viewport
- .getGapCharacter());
+ }
+ else
+ {
+ copyAlignment = AlignmentUtils.makeCopyAlignment(sel,
+ xrefs.getSequencesArray(), dataset);
+ }
+ copyAlignment.setGapCharacter(AlignFrame.this.viewport
+ .getGapCharacter());
- StructureSelectionManager ssm = StructureSelectionManager
- .getStructureSelectionManager(Desktop.instance);
- ssm.registerMappings(cf);
+ StructureSelectionManager ssm = StructureSelectionManager
+ .getStructureSelectionManager(Desktop.instance);
- if (copyAlignment.getHeight() <= 0)
- {
- System.err.println("No Sequences generated for xRef type "
- + source);
- return;
- }
+ /*
+ * register any new mappings for sequence mouseover etc
+ * (will not duplicate any previously registered mappings)
+ */
+ ssm.registerMappings(dataset.getCodonFrames());
+
+ if (copyAlignment.getHeight() <= 0)
+ {
+ System.err.println("No Sequences generated for xRef type "
+ + source);
+ return;
+ }
+ /*
+ * align protein to dna
+ */
+ if (dna && copyAlignmentIsAligned)
+ {
+ xrefsAlignment.alignAs(copyAlignment);
+ }
+ else
+ {
/*
- * align protein to dna
+ * align cdna to protein - currently only if
+ * fetching and aligning Ensembl transcripts!
*/
- if (dna && copyAlignmentIsAligned)
+ // TODO: generalise for other sources of locus/transcript/cds data
+ if (dna && DBRefSource.ENSEMBL.equalsIgnoreCase(source))
{
- al.alignAs(copyAlignment);
- }
- else
- {
- /*
- * align cdna to protein - currently only if
- * fetching and aligning Ensembl transcripts!
- */
- if (DBRefSource.ENSEMBL.equalsIgnoreCase(source))
- {
- copyAlignment.alignAs(al);
- }
+ copyAlignment.alignAs(xrefsAlignment);
}
+ }
- AlignFrame copyThis = new AlignFrame(copyAlignment,
- AlignFrame.DEFAULT_WIDTH, AlignFrame.DEFAULT_HEIGHT);
- copyThis.setTitle(AlignFrame.this.getTitle());
+ AlignFrame copyThis = new AlignFrame(copyAlignment,
+ AlignFrame.DEFAULT_WIDTH, AlignFrame.DEFAULT_HEIGHT);
+ copyThis.setTitle(AlignFrame.this.getTitle());
- boolean showSequenceFeatures = viewport
- .isShowSequenceFeatures();
- newFrame.setShowSeqFeatures(showSequenceFeatures);
- copyThis.setShowSeqFeatures(showSequenceFeatures);
- FeatureRenderer myFeatureStyling = alignPanel.getSeqPanel().seqCanvas
- .getFeatureRenderer();
+ boolean showSequenceFeatures = viewport.isShowSequenceFeatures();
+ newFrame.setShowSeqFeatures(showSequenceFeatures);
+ copyThis.setShowSeqFeatures(showSequenceFeatures);
+ FeatureRenderer myFeatureStyling = alignPanel.getSeqPanel().seqCanvas
+ .getFeatureRenderer();
- /*
- * copy feature rendering settings to split frame
- */
- newFrame.alignPanel.getSeqPanel().seqCanvas
- .getFeatureRenderer()
- .transferSettings(myFeatureStyling);
- copyThis.alignPanel.getSeqPanel().seqCanvas
- .getFeatureRenderer()
- .transferSettings(myFeatureStyling);
+ /*
+ * copy feature rendering settings to split frame
+ */
+ newFrame.alignPanel.getSeqPanel().seqCanvas.getFeatureRenderer()
+ .transferSettings(myFeatureStyling);
+ copyThis.alignPanel.getSeqPanel().seqCanvas.getFeatureRenderer()
+ .transferSettings(myFeatureStyling);
- /*
- * apply 'database source' feature configuration
- * if any was found
- */
- // TODO is this the feature colouring for the original
- // alignment or the fetched xrefs? either could be Ensembl
- newFrame.getViewport().applyFeaturesStyle(featureColourScheme);
- copyThis.getViewport().applyFeaturesStyle(featureColourScheme);
-
- SplitFrame sf = new SplitFrame(dna ? copyThis : newFrame,
- dna ? newFrame : copyThis);
- newFrame.setVisible(true);
- copyThis.setVisible(true);
- String linkedTitle = MessageManager
- .getString("label.linked_view_title");
- Desktop.addInternalFrame(sf, linkedTitle, -1, -1);
- sf.adjustDivider();
- }
- } catch (Exception e)
- {
- Cache.log.error("Exception when finding crossreferences", e);
+ /*
+ * apply 'database source' feature configuration
+ * if any was found
+ */
+ // TODO is this the feature colouring for the original
+ // alignment or the fetched xrefs? either could be Ensembl
+ newFrame.getViewport().applyFeaturesStyle(featureColourScheme);
+ copyThis.getViewport().applyFeaturesStyle(featureColourScheme);
+
+ SplitFrame sf = new SplitFrame(dna ? copyThis : newFrame,
+ dna ? newFrame : copyThis);
+ newFrame.setVisible(true);
+ copyThis.setVisible(true);
+ String linkedTitle = MessageManager
+ .getString("label.linked_view_title");
+ Desktop.addInternalFrame(sf, linkedTitle, -1, -1);
+ sf.adjustDivider();
} catch (OutOfMemoryError e)
{
new OOMWarning("whilst fetching crossreferences", e);
}
/**
- * Makes an alignment containing the given sequences. If this is of the
- * same type as the given dataset (nucleotide/protein), then the new
- * alignment shares the same dataset, and its dataset sequences are added
- * to it. Otherwise a new dataset sequence is created for the
- * cross-references.
+ * Makes an alignment containing the given sequences, and adds them to the
+ * given dataset, which is also set as the dataset for the new alignment
+ *
+ * TODO: refactor to DatasetI method
*
* @param dataset
* @param seqs
protected AlignmentI makeCrossReferencesAlignment(AlignmentI dataset,
AlignmentI seqs)
{
- boolean sameType = dataset.isNucleotide() == seqs.isNucleotide();
-
SequenceI[] sprods = new SequenceI[seqs.getHeight()];
for (int s = 0; s < sprods.length; s++)
{
sprods[s] = (seqs.getSequenceAt(s)).deriveSequence();
- if (sameType)
+ if (dataset.getSequences() == null
+ || !dataset.getSequences().contains(
+ sprods[s].getDatasetSequence()))
{
- if (dataset.getSequences() == null
- || !dataset.getSequences().contains(
- sprods[s].getDatasetSequence()))
- {
- dataset.addSequence(sprods[s].getDatasetSequence());
- }
+ dataset.addSequence(sprods[s].getDatasetSequence());
}
sprods[s].updatePDBIds();
}
Alignment al = new Alignment(sprods);
- if (sameType)
- {
- al.setDataset((Alignment) dataset);
- }
- else
- {
- al.createDatasetAlignment();
- }
+ al.setDataset(dataset);
return al;
}
SequenceI[] orderedSeqs = tmpseqs
.toArray(new SequenceI[tmpseqs.size()]);
- Alignment al = new Alignment(orderedSeqs);
+ AlignmentI al = new Alignment(orderedSeqs);
if (referenceseqForView != null)
{
}
AlignFrame loadViewport(String file, JSeq[] JSEQ,
- List<SequenceI> hiddenSeqs, Alignment al,
+ List<SequenceI> hiddenSeqs, AlignmentI al,
JalviewModelSequence jms, Viewport view, String uniqueSeqSetId,
String viewId, List<JvAnnotRow> autoAlan)
{
}
private ColourSchemeI constructAnnotationColour(
- AnnotationColours viewAnnColour, AlignFrame af, Alignment al,
+ AnnotationColours viewAnnColour, AlignFrame af, AlignmentI al,
JalviewModelSequence jms, boolean checkGroupAnnColour)
{
boolean propagateAnnColour = false;
return cs;
}
- private void reorderAutoannotation(AlignFrame af, Alignment al,
+ private void reorderAutoannotation(AlignFrame af, AlignmentI al,
List<JvAnnotRow> autoAlan)
{
// copy over visualization settings for autocalculated annotation in the
}
}
- private void recoverDatasetFor(SequenceSet vamsasSet, Alignment al,
+ private void recoverDatasetFor(SequenceSet vamsasSet, AlignmentI al,
boolean ignoreUnrefed)
{
- jalview.datamodel.Alignment ds = getDatasetFor(vamsasSet.getDatasetId());
+ jalview.datamodel.AlignmentI ds = getDatasetFor(vamsasSet
+ .getDatasetId());
Vector dseqs = null;
if (ds == null)
{
* TODO use AlignmentI here and in related methods - needs
* AlignmentI.getDataset() changed to return AlignmentI instead of Alignment
*/
- Hashtable<String, Alignment> datasetIds = null;
+ Hashtable<String, AlignmentI> datasetIds = null;
- IdentityHashMap<Alignment, String> dataset2Ids = null;
+ IdentityHashMap<AlignmentI, String> dataset2Ids = null;
- private Alignment getDatasetFor(String datasetId)
+ private AlignmentI getDatasetFor(String datasetId)
{
if (datasetIds == null)
{
- datasetIds = new Hashtable<String, Alignment>();
+ datasetIds = new Hashtable<String, AlignmentI>();
return null;
}
if (datasetIds.containsKey(datasetId))
return null;
}
- private void addDatasetRef(String datasetId, Alignment dataset)
+ private void addDatasetRef(String datasetId, AlignmentI dataset)
{
if (datasetIds == null)
{
- datasetIds = new Hashtable<String, Alignment>();
+ datasetIds = new Hashtable<String, AlignmentI>();
}
datasetIds.put(datasetId, dataset);
}
* @param dataset
* @return
*/
- private String getDatasetIdRef(Alignment dataset)
+ private String getDatasetIdRef(AlignmentI dataset)
{
if (dataset.getDataset() != null)
{
// make a new datasetId and record it
if (dataset2Ids == null)
{
- dataset2Ids = new IdentityHashMap<Alignment, String>();
+ dataset2Ids = new IdentityHashMap<AlignmentI, String>();
}
else
{
package jalview.gui;
import jalview.datamodel.Alignment;
+import jalview.datamodel.AlignmentI;
import jalview.datamodel.AlignmentView;
import jalview.datamodel.ColumnSelection;
import jalview.datamodel.SeqCigar;
{
// AlignmentOrder origorder = new AlignmentOrder(alAndColsel[0]);
- Alignment al = new Alignment((SequenceI[]) alAndColsel[0]);
- Alignment dataset = (av != null && av.getAlignment() != null) ? av
+ AlignmentI al = new Alignment((SequenceI[]) alAndColsel[0]);
+ AlignmentI dataset = (av != null && av.getAlignment() != null) ? av
.getAlignment().getDataset() : null;
if (dataset != null)
{
Cache.log.info(
"Error retrieving " + accession
+ " from " + proxy.getDbName(), e);
- } finally
- {
- return success;
}
+ return success;
}
/**
for (String q : queries)
{
- DBRefEntry[] found = null;
DBRefEntry dbr = new DBRefEntry();
dbr.setSource(proxy.getDbSource());
dbr.setVersion(null);
{
if (rs[r] != null)
{
- found = DBRefUtils.searchRefs(rs[r].getDBRefs(), accId);
- if (found != null && found.length > 0)
+ List<DBRefEntry> found = DBRefUtils.searchRefs(rs[r].getDBRefs(),
+ accId);
+ if (!found.isEmpty())
{
rfound = true;
break;
final AlignmentI bottomAlignment = bottomViewport.getAlignment();
boolean topAnnotations = topViewport.isShowAnnotation();
boolean bottomAnnotations = bottomViewport.isShowAnnotation();
+ // TODO need number of visible sequences here, not #sequences - how?
int topCount = topAlignment.getHeight();
int bottomCount = bottomAlignment.getHeight();
int topCharHeight = topViewport.getViewStyle().getCharHeight();
+ (bottomAnnotations ? bottomViewport.calcPanelHeight() : 0);
double ratio = ((double) topHeight) / (topHeight + bottomHeight);
+ /*
+ * limit to 0.2 <= ratio <= 0.8 to avoid concealing all sequences
+ */
+ ratio = Math.min(ratio, 0.8d);
+ ratio = Math.max(ratio, 0.2d);
setRelativeDividerLocation(ratio);
}
{
// AlignmentOrder origorder = new AlignmentOrder(alAndColsel[0]);
- Alignment al = new Alignment((SequenceI[]) alAndColsel[0]);
- Alignment dataset = (av != null && av.getAlignment() != null) ? av
+ AlignmentI al = new Alignment((SequenceI[]) alAndColsel[0]);
+ AlignmentI dataset = (av != null && av.getAlignment() != null) ? av
.getAlignment().getDataset() : null;
if (dataset != null)
{
}
/**
+ * Overloaded method signature to test whether a single sequence is nucleotide
+ * (that is, more than 85% CGTA)
+ *
+ * @param seq
+ * @return
+ */
+ public static final boolean isNucleotide(SequenceI seq)
+ {
+ return isNucleotide(new SequenceI[] { seq });
+ }
+
+ /**
* Answers true if more than 85% of the sequence residues (ignoring gaps) are
* A, G, C, T or U, else false. This is just a heuristic guess and may give a
* wrong answer (as AGCT are also amino acid codes).
}
/**
+ * Returns those DBRefEntry objects whose source identifier (once converted to
+ * Jalview's canonical form) is in the list of sources to search for. Returns
+ * null if no matches found.
*
* @param dbrefs
- * array of DBRef objects to search
+ * DBRefEntry objects to search
* @param sources
- * String[] array of source DBRef IDs to retrieve
+ * array of sources to select
* @return
*/
public static DBRefEntry[] selectRefs(DBRefEntry[] dbrefs,
}
/**
- * Returns an array of those references that match the given entry, or null if
- * no matches. Currently uses a comparator which matches if
+ * Returns a (possibly empty) list of those references that match the given
+ * entry. Currently uses a comparator which matches if
* <ul>
* <li>database sources are the same</li>
* <li>accession ids are the same</li>
* pattern to match
* @return
*/
- public static DBRefEntry[] searchRefs(DBRefEntry[] ref, DBRefEntry entry)
+ public static List<DBRefEntry> searchRefs(DBRefEntry[] ref,
+ DBRefEntry entry)
{
return searchRefs(ref, entry,
matchDbAndIdAndEitherMapOrEquivalentMapList);
}
/**
- * Returns an array of those references that match the given accession id
+ * Returns a list of those references that match the given accession id
* <ul>
* <li>database sources are the same</li>
* <li>accession ids are the same</li>
* <li>both have no mapping, or the mappings are the same</li>
* </ul>
*
- * @param ref
+ * @param refs
* Set of references to search
- * @param entry
- * pattern to match
+ * @param accId
+ * accession id to match
* @return
*/
- public static DBRefEntry[] searchRefs(DBRefEntry[] ref, String accId)
+ public static List<DBRefEntry> searchRefs(DBRefEntry[] refs, String accId)
{
- return searchRefs(ref, new DBRefEntry("", "", accId), matchId);
+ return searchRefs(refs, new DBRefEntry("", "", accId), matchId);
}
/**
- * Returns an array of those references that match the given entry, according
- * to the given comparator. Returns null if no matches.
+ * Returns a (possibly empty) list of those references that match the given
+ * entry, according to the given comparator.
*
* @param refs
* an array of database references to search
* @param comparator
* @return
*/
- static DBRefEntry[] searchRefs(DBRefEntry[] refs, DBRefEntry entry,
+ static List<DBRefEntry> searchRefs(DBRefEntry[] refs, DBRefEntry entry,
DbRefComp comparator)
{
+ List<DBRefEntry> rfs = new ArrayList<DBRefEntry>();
if (refs == null || entry == null)
{
- return null;
+ return rfs;
}
- List<DBRefEntry> rfs = new ArrayList<DBRefEntry>();
for (int i = 0; i < refs.length; i++)
{
if (comparator.matches(entry, refs[i]))
rfs.add(refs[i]);
}
}
- return rfs.size() == 0 ? null : rfs.toArray(new DBRefEntry[rfs.size()]);
+ return rfs;
}
interface DbRefComp
};
/**
- * accession ID and DB must be identical. Version is ignored. No map on either
- * or map but no maplist on either or maplist of map on a is equivalent to the
- * maplist of map on b.
+ * accession ID and DB must be identical, or null on a. Version is ignored. No
+ * map on either or map but no maplist on either or maplist of map on a is
+ * equivalent to the maplist of map on b.
*/
public static DbRefComp matchDbAndIdAndEitherMapOrEquivalentMapList = new DbRefComp()
{
&& refb.getSource().equals(refa.getSource()))
{
// We dont care about version
- if (refa.getAccessionId() != null && refb.getAccessionId() != null
- && refb.getAccessionId().equals(refa.getAccessionId()))
+
+ if (refa.getAccessionId() == null
+ || refa.getAccessionId().equals(refb.getAccessionId()))
{
if (refa.getMap() == null || refb.getMap() == null)
{
|| (refb.getMap().getMap() != null
&& refa.getMap().getMap() != null && (refb
.getMap().getMap().equals(refa.getMap().getMap()))))
- { // getMap().getMap().containsEither(false,refa.getMap().getMap())
+ {
return true;
}
}
return (o1 == null ? o2.equals(o1) : o1.equals(o2));
}
+ /**
+ * Selects just the DNA or protein references from a set of references
+ *
+ * @param selectDna
+ * if true, select references to 'standard' DNA databases, else to
+ * 'standard' peptide databases
+ * @param refs
+ * a set of references to select from
+ * @return
+ */
+ public static DBRefEntry[] selectDbRefs(boolean selectDna,
+ DBRefEntry[] refs)
+ {
+ return selectRefs(refs, selectDna ? DBRefSource.DNACODINGDBS
+ : DBRefSource.PROTEINDBS);
+ // could attempt to find other cross
+ // refs here - ie PDB xrefs
+ // (not dna, not protein seq)
+ }
+
+ /**
+ * Returns the (possibly empty) list of those supplied dbrefs which have the
+ * specified source database, with a case-insensitive match of source name
+ *
+ * @param dbRefs
+ * @param source
+ * @return
+ */
+ public static List<DBRefEntry> searchRefsForSource(DBRefEntry[] dbRefs,
+ String source)
+ {
+ List<DBRefEntry> matches = new ArrayList<DBRefEntry>();
+ if (dbRefs != null && source != null)
+ {
+ for (DBRefEntry dbref : dbRefs)
+ {
+ if (source.equalsIgnoreCase(dbref.getSource()))
+ {
+ matches.add(dbref);
+ }
+ }
+ }
+ return matches;
+ }
+
}
@Override
public boolean equals(Object o)
{
- // TODO should also override hashCode to ensure equal objects have equal
- // hashcodes
if (o == null || !(o instanceof MapList))
{
return false;
}
/**
+ * Returns a hashcode made from the fromRatio, toRatio, and from/to ranges
+ */
+ @Override
+ public int hashCode()
+ {
+ int hashCode = 31 * fromRatio;
+ hashCode = 31 * hashCode + toRatio;
+ hashCode = 31 * hashCode + fromShifts.toArray().hashCode();
+ hashCode = 31 * hashCode + toShifts.toArray().hashCode();
+ return hashCode;
+ }
+
+ /**
* Returns the 'from' ranges as {[start1, end1], [start2, end2], ...}
*
* @return
{
/*
* note lowest and highest values - bearing in mind the
- * direction may be revesed
+ * direction may be reversed
*/
fromLowest = Math.min(fromLowest, Math.min(from[i], from[i + 1]));
fromHighest = Math.max(fromHighest, Math.max(from[i], from[i + 1]));
*/
public void addMapList(MapList map)
{
+ if (this.equals(map))
+ {
+ return;
+ }
this.fromLowest = Math.min(fromLowest, map.fromLowest);
this.toLowest = Math.min(toLowest, map.toLowest);
this.fromHighest = Math.max(fromHighest, map.fromHighest);
}
return forwardStrand;
}
+
}
public static List<AlignedCodonFrame> findMappingsForSequence(
SequenceI sequence, List<AlignedCodonFrame> mappings)
{
+ return findMappingsForSequenceAndOthers(sequence, mappings, null);
+ }
+
+ public static List<AlignedCodonFrame> findMappingsForSequenceAndOthers(
+ SequenceI sequence, List<AlignedCodonFrame> mappings,
+ AlignmentI alignment)
+ {
List<AlignedCodonFrame> result = new ArrayList<AlignedCodonFrame>();
if (sequence == null || mappings == null)
{
{
if (mapping.involvesSequence(sequence))
{
- result.add(mapping);
+ if (alignment != null)
+ {
+ for (SequenceI otherseq : alignment.getSequences())
+ {
+ if (otherseq == sequence
+ || (otherseq.getDatasetSequence() != null && (otherseq
+ .getDatasetSequence() == sequence || otherseq
+ .getDatasetSequence() == sequence
+ .getDatasetSequence())))
+ {
+ // skip sequences in subset which directly relate to sequence
+ continue;
+ }
+ if (mapping.involvesSequence(otherseq))
+ {
+ // selected a mapping contained in subselect alignment
+ result.add(mapping);
+ break;
+ }
+ }
+ }
+ else
+ {
+ result.add(mapping);
+ }
}
}
return result;
* all gapped visible regions
*/
int lastSeq = alignment.getHeight() - 1;
+ List<AlignedCodonFrame> seqMappings = null;
for (int seqNo = getStartSeq(); seqNo < lastSeq; seqNo++, seqOffset++)
{
sequence = getAlignment().getSequenceAt(seqNo);
{
continue;
}
- List<AlignedCodonFrame> seqMappings = MappingUtils
- .findMappingsForSequence(sequence, mappings);
+ seqMappings = MappingUtils
+ .findMappingsForSequenceAndOthers(sequence, mappings,
+ getCodingComplement().getAlignment());
if (!seqMappings.isEmpty())
{
break;
}
}
- if (sequence == null)
+ if (sequence == null || seqMappings == null || seqMappings.isEmpty())
{
/*
* No ungapped mapped sequence in middle column - do nothing
return 0;
}
MappingUtils.addSearchResults(sr, sequence,
- sequence.findPosition(middleColumn), mappings);
+ sequence.findPosition(middleColumn), seqMappings);
return seqOffset;
}
import jalview.util.MessageManager;
import jalview.viewmodel.seqfeatures.FeatureRendererSettings;
-import java.util.LinkedHashSet;
+import java.util.ArrayList;
import java.util.List;
-import java.util.Set;
public abstract class AWSThread extends Thread
{
/**
* dataset sequence relationships to be propagated onto new results
*/
- protected Set<AlignedCodonFrame> codonframe = null;
+ protected List<AlignedCodonFrame> codonframe = null;
/**
* are there jobs still running in this thread.
.getCodonFrames();
if (cf != null)
{
- codonframe = new LinkedHashSet<AlignedCodonFrame>();
+ codonframe = new ArrayList<AlignedCodonFrame>();
codonframe.addAll(cf);
}
}
--- /dev/null
+package jalview.ws;
+
+import jalview.ws.seqfetcher.ASequenceFetcher;
+
+public class SequenceFetcherFactory
+{
+
+ private static SequenceFetcher instance;
+
+ /**
+ * Returns a new SequenceFetcher object, or a mock object if one has been set
+ *
+ * @return
+ */
+ public static ASequenceFetcher getSequenceFetcher()
+ {
+ return instance == null ? new SequenceFetcher() : instance;
+ }
+
+ /**
+ * Set the instance object to use (intended for unit testing with mock
+ * objects).
+ *
+ * Be sure to reset to null in the tearDown method of any tests!
+ *
+ * @param sf
+ */
+ public static void setSequenceFetcher(SequenceFetcher sf)
+ {
+ instance = sf;
+ }
+}
*/
package jalview.ws.jws1;
-import jalview.datamodel.Alignment;
+import jalview.datamodel.AlignmentI;
import jalview.datamodel.AlignmentView;
import jalview.gui.AlignFrame;
import jalview.gui.Desktop;
import javax.swing.JMenuItem;
import javax.swing.JOptionPane;
-import ext.vamsas.MuscleWS;
import ext.vamsas.MuscleWSServiceLocator;
import ext.vamsas.MuscleWSSoapBindingStub;
import ext.vamsas.ServiceHandle;
public MsaWSClient(ext.vamsas.ServiceHandle sh, String altitle,
jalview.datamodel.AlignmentView msa, boolean submitGaps,
- boolean preserveOrder, Alignment seqdataset,
+ boolean preserveOrder, AlignmentI seqdataset,
AlignFrame _alignFrame)
{
super();
}
private void startMsaWSClient(String altitle, AlignmentView msa,
- boolean submitGaps, boolean preserveOrder, Alignment seqdataset)
+ boolean submitGaps, boolean preserveOrder, AlignmentI seqdataset)
{
if (!locateWebService())
{
try
{
- this.server = (MuscleWS) loc.getMuscleWS(new java.net.URL(WsURL));
+ this.server = loc.getMuscleWS(new java.net.URL(WsURL));
((MuscleWSSoapBindingStub) this.server).setTimeout(60000); // One minute
// timeout
} catch (Exception ex)
return (WebServiceName.indexOf("lustal") > -1); // cheat!
}
+ @Override
public void attachWSMenuEntry(JMenu msawsmenu,
final ServiceHandle serviceHandle, final AlignFrame alignFrame)
{
method.setToolTipText(WsURL);
method.addActionListener(new ActionListener()
{
+ @Override
public void actionPerformed(ActionEvent e)
{
AlignmentView msa = alignFrame.gatherSequencesForAlignment();
methodR.setToolTipText(WsURL);
methodR.addActionListener(new ActionListener()
{
+ @Override
public void actionPerformed(ActionEvent e)
{
AlignmentView msa = alignFrame.gatherSequencesForAlignment();
import jalview.analysis.AlignSeq;
import jalview.bin.Cache;
import jalview.datamodel.Alignment;
+import jalview.datamodel.AlignmentI;
import jalview.datamodel.AlignmentOrder;
import jalview.datamodel.AlignmentView;
import jalview.datamodel.ColumnSelection;
*
* @return true if getAlignment will return a valid alignment result.
*/
+ @Override
public boolean hasResults()
{
if (subjobComplete && result != null && result.isFinished()
*
* @return boolean true if job can be submitted.
*/
+ @Override
public boolean hasValidInput()
{
if (seqs.getSeqs() != null)
String alTitle; // name which will be used to form new alignment window.
- Alignment dataset; // dataset to which the new alignment will be
+ AlignmentI dataset; // dataset to which the new alignment will be
// associated.
MsaWSThread(ext.vamsas.MuscleWS server, String wsUrl,
WebserviceInfo wsinfo, jalview.gui.AlignFrame alFrame,
String wsname, String title, AlignmentView _msa, boolean subgaps,
- boolean presorder, Alignment seqset)
+ boolean presorder, AlignmentI seqset)
{
this(server, wsUrl, wsinfo, alFrame, _msa, wsname, subgaps, presorder);
OutputHeader = wsInfo.getProgressText();
}
}
+ @Override
public boolean isCancellable()
{
return true;
}
+ @Override
public void cancelJob()
{
if (!jobComplete && jobs != null)
}
}
+ @Override
public void pollJob(AWsJob job) throws Exception
{
((MsaWSJob) job).result = server.getResult(((MsaWSJob) job).getJobId());
}
+ @Override
public void StartJob(AWsJob job)
{
if (!(job instanceof MsaWSJob))
return msa;
}
+ @Override
public void parseResult()
{
int results = 0; // number of result sets received
wsInfo.showResultsNewFrame
.addActionListener(new java.awt.event.ActionListener()
{
+ @Override
public void actionPerformed(java.awt.event.ActionEvent evt)
{
displayResults(true);
wsInfo.mergeResults
.addActionListener(new java.awt.event.ActionListener()
{
+ @Override
public void actionPerformed(java.awt.event.ActionEvent evt)
{
displayResults(false);
while (j < l)
{
if (((AlignmentOrder) alorders.get(i))
- .equals(((AlignmentOrder) alorders.get(j))))
+ .equals((alorders.get(j))))
{
alorders.remove(j);
l--;
}
}
+ @Override
public boolean canMergeResults()
{
return false;
*/
package jalview.ws.jws1;
-import jalview.datamodel.Alignment;
+import jalview.datamodel.AlignmentI;
import jalview.datamodel.AlignmentView;
import jalview.gui.AlignFrame;
import jalview.gui.Desktop;
import javax.swing.JMenuItem;
import javax.swing.JOptionPane;
-import ext.vamsas.SeqSearchI;
import ext.vamsas.SeqSearchServiceLocator;
import ext.vamsas.SeqSearchServiceSoapBindingStub;
import ext.vamsas.ServiceHandle;
public SeqSearchWSClient(ext.vamsas.ServiceHandle sh, String altitle,
jalview.datamodel.AlignmentView msa, String db,
- Alignment seqdataset, AlignFrame _alignFrame)
+ AlignmentI seqdataset, AlignFrame _alignFrame)
{
super();
alignFrame = _alignFrame;
}
private void startSeqSearchClient(String altitle, AlignmentView msa,
- String db, Alignment seqdataset)
+ String db, AlignmentI seqdataset)
{
if (!locateWebService())
{
try
{
- this.server = (SeqSearchI) loc.getSeqSearchService(new java.net.URL(
+ this.server = loc.getSeqSearchService(new java.net.URL(
WsURL));
((SeqSearchServiceSoapBindingStub) this.server).setTimeout(60000); // One
// minute
return dbs;
}
+ @Override
public void attachWSMenuEntry(JMenu wsmenu, final ServiceHandle sh,
final AlignFrame af)
{
method.setToolTipText(sh.getEndpointURL());
method.addActionListener(new ActionListener()
{
+ @Override
public void actionPerformed(ActionEvent e)
{
// use same input gatherer as for secondary structure prediction
final String searchdb = dbs[db];
method.addActionListener(new ActionListener()
{
+ @Override
public void actionPerformed(ActionEvent e)
{
AlignmentView msa = af.gatherSeqOrMsaForSecStrPrediction();
import jalview.api.FeatureColourI;
import jalview.bin.Cache;
import jalview.datamodel.Alignment;
+import jalview.datamodel.AlignmentI;
import jalview.datamodel.AlignmentView;
import jalview.datamodel.SequenceI;
import jalview.gui.AlignFrame;
*
* @return null or { Alignment(+features and annotation), NewickFile)}
*/
- public Object[] getAlignment(Alignment dataset,
+ public Object[] getAlignment(AlignmentI dataset,
Map<String, FeatureColourI> featureColours)
{
String alTitle; // name which will be used to form new alignment window.
- Alignment dataset; // dataset to which the new alignment will be
+ AlignmentI dataset; // dataset to which the new alignment will be
// associated.
SeqSearchWSThread(ext.vamsas.SeqSearchI server, String wsUrl,
WebserviceInfo wsinfo, jalview.gui.AlignFrame alFrame,
String wsname, String title, AlignmentView _msa, String db,
- Alignment seqset)
+ AlignmentI seqset)
{
this(server, wsUrl, wsinfo, alFrame, _msa, wsname, db);
OutputHeader = wsInfo.getProgressText();
*/
package jalview.ws.jws2;
-import jalview.datamodel.Alignment;
+import jalview.datamodel.AlignmentI;
import jalview.datamodel.AlignmentView;
import jalview.gui.AlignFrame;
import jalview.gui.Desktop;
public MsaWSClient(Jws2Instance sh, String altitle,
jalview.datamodel.AlignmentView msa, boolean submitGaps,
- boolean preserveOrder, Alignment seqdataset,
+ boolean preserveOrder, AlignmentI seqdataset,
AlignFrame _alignFrame)
{
this(sh, null, null, false, altitle, msa, submitGaps, preserveOrder,
public MsaWSClient(Jws2Instance sh, WsParamSetI preset, String altitle,
jalview.datamodel.AlignmentView msa, boolean submitGaps,
- boolean preserveOrder, Alignment seqdataset,
+ boolean preserveOrder, AlignmentI seqdataset,
AlignFrame _alignFrame)
{
this(sh, preset, null, false, altitle, msa, submitGaps, preserveOrder,
public MsaWSClient(Jws2Instance sh, WsParamSetI preset,
List<Argument> arguments, boolean editParams, String altitle,
jalview.datamodel.AlignmentView msa, boolean submitGaps,
- boolean preserveOrder, Alignment seqdataset,
+ boolean preserveOrder, AlignmentI seqdataset,
AlignFrame _alignFrame)
{
super(_alignFrame, preset, arguments);
}
private void startMsaWSClient(String altitle, AlignmentView msa,
- boolean submitGaps, boolean preserveOrder, Alignment seqdataset)
+ boolean submitGaps, boolean preserveOrder, AlignmentI seqdataset)
{
// if (!locateWebService())
// {
*
* @return true if getAlignment will return a valid alignment result.
*/
+ @Override
public boolean hasResults()
{
if (subjobComplete
*
* @return boolean true if job can be submitted.
*/
+ @Override
public boolean hasValidInput()
{
// TODO: get attributes for this MsaWS instance to check if it can do two
String alTitle; // name which will be used to form new alignment window.
- Alignment dataset; // dataset to which the new alignment will be
+ AlignmentI dataset; // dataset to which the new alignment will be
// associated.
String wsUrl, WebserviceInfo wsinfo,
jalview.gui.AlignFrame alFrame, String wsname, String title,
AlignmentView _msa, boolean subgaps, boolean presorder,
- Alignment seqset)
+ AlignmentI seqset)
{
this(server2, wsUrl, wsinfo, alFrame, _msa, wsname, subgaps, presorder);
OutputHeader = wsInfo.getProgressText();
return validInput;
}
+ @Override
public boolean isCancellable()
{
return true;
}
+ @Override
public void cancelJob()
{
if (!jobComplete && jobs != null)
}
}
+ @Override
public void pollJob(AWsJob job) throws Exception
{
// TODO: investigate if we still need to cast here in J1.6
return changed;
}
+ @Override
public void StartJob(AWsJob job)
{
Exception lex = null;
}
}
+ @Override
public void parseResult()
{
long progbar = System.currentTimeMillis();
wsInfo.showResultsNewFrame
.addActionListener(new java.awt.event.ActionListener()
{
+ @Override
public void actionPerformed(java.awt.event.ActionEvent evt)
{
displayResults(true);
wsInfo.mergeResults
.addActionListener(new java.awt.event.ActionListener()
{
+ @Override
public void actionPerformed(java.awt.event.ActionEvent evt)
{
displayResults(false);
// becomes null if the alignment window was closed before the alignment
// job finished.
AlignmentI copyComplement = new Alignment(complement);
+ // todo should this be done by copy constructor?
+ copyComplement.setGapCharacter(complement.getGapCharacter());
+ // share the same dataset (and the mappings it holds)
+ copyComplement.setDataset(complement.getDataset());
copyComplement.alignAs(al);
if (copyComplement.getHeight() > 0)
{
}
}
+ @Override
public boolean canMergeResults()
{
return false;
/**
* Constructor
*/
- public ASequenceFetcher()
+ protected ASequenceFetcher()
{
super();
* if true, only fetch from nucleotide data sources, else peptide
* @return
*/
- public SequenceI[] getSequences(DBRefEntry[] refs, boolean dna)
+ public SequenceI[] getSequences(List<DBRefEntry> refs, boolean dna)
{
Vector<SequenceI> rseqs = new Vector<SequenceI>();
Hashtable<String, List<String>> queries = new Hashtable<String, List<String>>();
- for (int r = 0; r < refs.length; r++)
+ for (DBRefEntry ref : refs)
{
- if (!queries.containsKey(refs[r].getSource()))
+ if (!queries.containsKey(ref.getSource()))
{
- queries.put(refs[r].getSource(), new ArrayList<String>());
+ queries.put(ref.getSource(), new ArrayList<String>());
}
- List<String> qset = queries.get(refs[r].getSource());
- if (!qset.contains(refs[r].getAccessionId()))
+ List<String> qset = queries.get(ref.getSource());
+ if (!qset.contains(ref.getAccessionId()))
{
- qset.add(refs[r].getAccessionId());
+ qset.add(ref.getAccessionId());
}
}
Enumeration<String> e = queries.keys();
for (int is = 0; is < seqs.length; is++)
{
rseqs.addElement(seqs[is]);
- DBRefEntry[] frefs = DBRefUtils.searchRefs(seqs[is]
+ List<DBRefEntry> frefs = DBRefUtils.searchRefs(seqs[is]
.getDBRefs(), new DBRefEntry(db, null, null));
- if (frefs != null)
+ for (DBRefEntry dbr : frefs)
{
- for (DBRefEntry dbr : frefs)
- {
- queriesFound.add(dbr.getAccessionId());
- queriesMade.remove(dbr.getAccessionId());
- }
+ queriesFound.add(dbr.getAccessionId());
+ queriesMade.remove(dbr.getAccessionId());
}
seqs[is] = null;
}
import static org.testng.AssertJUnit.assertEquals;
import static org.testng.AssertJUnit.assertFalse;
+import static org.testng.AssertJUnit.assertNotNull;
import static org.testng.AssertJUnit.assertNull;
import static org.testng.AssertJUnit.assertSame;
import static org.testng.AssertJUnit.assertTrue;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
-import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
@Test(groups = { "Functional" })
public void testMakeCdsAlignment()
{
+ /*
+ * scenario:
+ * dna1 --> [4, 6] [10,12] --> pep1
+ * dna2 --> [1, 3] [7, 9] [13,15] --> pep1
+ */
SequenceI dna1 = new Sequence("dna1", "aaaGGGcccTTTaaa");
SequenceI dna2 = new Sequence("dna2", "GGGcccTTTaaaCCC");
SequenceI pep1 = new Sequence("pep1", "GF");
SequenceI pep2 = new Sequence("pep2", "GFP");
+ pep1.addDBRef(new DBRefEntry("UNIPROT", "0", "pep1"));
+ pep2.addDBRef(new DBRefEntry("UNIPROT", "0", "pep2"));
dna1.createDatasetSequence();
dna2.createDatasetSequence();
pep1.createDatasetSequence();
pep2.createDatasetSequence();
- dna1.addSequenceFeature(new SequenceFeature("CDS", "cds1", 4, 6, 0f,
- null));
- dna1.addSequenceFeature(new SequenceFeature("CDS", "cds2", 10, 12, 0f,
- null));
- dna2.addSequenceFeature(new SequenceFeature("CDS", "cds3", 1, 3, 0f,
- null));
- dna2.addSequenceFeature(new SequenceFeature("CDS", "cds4", 7, 9, 0f,
- null));
- dna2.addSequenceFeature(new SequenceFeature("CDS", "cds5", 13, 15, 0f,
- null));
AlignmentI dna = new Alignment(new SequenceI[] { dna1, dna2 });
dna.setDataset(null);
- List<AlignedCodonFrame> mappings = new ArrayList<AlignedCodonFrame>();
+ /*
+ * need a sourceDbRef if we are to construct dbrefs to the CDS
+ * sequence
+ */
+ DBRefEntry dbref = new DBRefEntry("ENSEMBL", "0", "dna1");
+ dna1.getDatasetSequence().setSourceDBRef(dbref);
+ dbref = new DBRefEntry("ENSEMBL", "0", "dna2");
+ dna2.getDatasetSequence().setSourceDBRef(dbref);
+
+ /*
+ * CDS sequences are 'discovered' from dna-to-protein mappings on the alignment
+ * dataset (e.g. added from dbrefs by CrossRef.findXrefSequences)
+ */
MapList map = new MapList(new int[] { 4, 6, 10, 12 },
new int[] { 1, 2 }, 3, 1);
AlignedCodonFrame acf = new AlignedCodonFrame();
acf.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(), map);
- mappings.add(acf);
+ dna.addCodonFrame(acf);
map = new MapList(new int[] { 1, 3, 7, 9, 13, 15 }, new int[] { 1, 3 },
3, 1);
acf = new AlignedCodonFrame();
acf.addMap(dna2.getDatasetSequence(), pep2.getDatasetSequence(), map);
- mappings.add(acf);
+ dna.addCodonFrame(acf);
/*
* execute method under test:
*/
AlignmentI cds = AlignmentUtils.makeCdsAlignment(new SequenceI[] {
- dna1, dna2 }, mappings, dna);
+ dna1, dna2 }, dna.getDataset(), null);
+ /*
+ * verify cds sequences
+ */
assertEquals(2, cds.getSequences().size());
- assertEquals("GGGTTT", cds.getSequenceAt(0)
- .getSequenceAsString());
- assertEquals("GGGTTTCCC", cds.getSequenceAt(1)
- .getSequenceAsString());
+ assertEquals("GGGTTT", cds.getSequenceAt(0).getSequenceAsString());
+ assertEquals("GGGTTTCCC", cds.getSequenceAt(1).getSequenceAsString());
/*
* verify shared, extended alignment dataset
*/
assertSame(dna.getDataset(), cds.getDataset());
- assertTrue(dna.getDataset().getSequences()
- .contains(cds.getSequenceAt(0).getDatasetSequence()));
- assertTrue(dna.getDataset().getSequences()
- .contains(cds.getSequenceAt(1).getDatasetSequence()));
+ SequenceI cds1Dss = cds.getSequenceAt(0).getDatasetSequence();
+ SequenceI cds2Dss = cds.getSequenceAt(1).getDatasetSequence();
+ assertTrue(dna.getDataset().getSequences().contains(cds1Dss));
+ assertTrue(dna.getDataset().getSequences().contains(cds2Dss));
+
+ /*
+ * verify CDS has a dbref with mapping to peptide
+ */
+ assertNotNull(cds1Dss.getDBRefs());
+ assertEquals(1, cds1Dss.getDBRefs().length);
+ dbref = cds1Dss.getDBRefs()[0];
+ assertEquals("UNIPROT", dbref.getSource());
+ assertEquals("0", dbref.getVersion());
+ assertEquals("pep1", dbref.getAccessionId());
+ assertNotNull(dbref.getMap());
+ assertSame(pep1.getDatasetSequence(), dbref.getMap().getTo());
+ MapList cdsMapping = new MapList(new int[] { 1, 6 },
+ new int[] { 1, 2 }, 3, 1);
+ assertEquals(cdsMapping, dbref.getMap().getMap());
+
+ /*
+ * verify peptide has added a dbref with reverse mapping to CDS
+ */
+ assertNotNull(pep1.getDBRefs());
+ assertEquals(2, pep1.getDBRefs().length);
+ dbref = pep1.getDBRefs()[1];
+ assertEquals("ENSEMBL", dbref.getSource());
+ assertEquals("0", dbref.getVersion());
+ assertEquals("CDS|dna1", dbref.getAccessionId());
+ assertNotNull(dbref.getMap());
+ assertSame(cds1Dss, dbref.getMap().getTo());
+ assertEquals(cdsMapping.getInverse(), dbref.getMap().getMap());
/*
- * Verify mappings from CDS to peptide and cDNA to CDS
+ * Verify mappings from CDS to peptide, cDNA to CDS, and cDNA to peptide
* the mappings are on the shared alignment dataset
+ * 6 mappings, 2*(DNA->CDS), 2*(DNA->Pep), 2*(CDS->Pep)
*/
- assertSame(dna.getCodonFrames(), cds.getCodonFrames());
- List<AlignedCodonFrame> cdsMappings = cds.getCodonFrames();
- assertEquals(2, cdsMappings.size());
-
+ List<AlignedCodonFrame> cdsMappings = cds.getDataset().getCodonFrames();
+ assertEquals(6, cdsMappings.size());
+
/*
+ * verify that mapping sets for dna and cds alignments are different
+ * [not current behaviour - all mappings are on the alignment dataset]
+ */
+ // select -> subselect type to test.
+ // Assert.assertNotSame(dna.getCodonFrames(), cds.getCodonFrames());
+ // assertEquals(4, dna.getCodonFrames().size());
+ // assertEquals(4, cds.getCodonFrames().size());
+
+ /*
+ * Two mappings involve pep1 (dna to pep1, cds to pep1)
* Mapping from pep1 to GGGTTT in first new exon sequence
*/
- List<AlignedCodonFrame> pep1Mapping = MappingUtils
+ List<AlignedCodonFrame> pep1Mappings = MappingUtils
.findMappingsForSequence(pep1, cdsMappings);
- assertEquals(1, pep1Mapping.size());
+ assertEquals(2, pep1Mappings.size());
+ List<AlignedCodonFrame> mappings = MappingUtils
+ .findMappingsForSequence(cds.getSequenceAt(0), pep1Mappings);
+ assertEquals(1, mappings.size());
+
// map G to GGG
- SearchResults sr = MappingUtils
- .buildSearchResults(pep1, 1, cdsMappings);
+ SearchResults sr = MappingUtils.buildSearchResults(pep1, 1, mappings);
assertEquals(1, sr.getResults().size());
Match m = sr.getResults().get(0);
- assertSame(cds.getSequenceAt(0).getDatasetSequence(),
- m.getSequence());
+ assertSame(cds1Dss, m.getSequence());
assertEquals(1, m.getStart());
assertEquals(3, m.getEnd());
// map F to TTT
- sr = MappingUtils.buildSearchResults(pep1, 2, cdsMappings);
+ sr = MappingUtils.buildSearchResults(pep1, 2, mappings);
m = sr.getResults().get(0);
- assertSame(cds.getSequenceAt(0).getDatasetSequence(),
- m.getSequence());
+ assertSame(cds1Dss, m.getSequence());
assertEquals(4, m.getStart());
assertEquals(6, m.getEnd());
/*
- * Mapping from pep2 to GGGTTTCCC in second new exon sequence
+ * Two mappings involve pep2 (dna to pep2, cds to pep2)
+ * Verify mapping from pep2 to GGGTTTCCC in second new exon sequence
*/
- List<AlignedCodonFrame> pep2Mapping = MappingUtils
+ List<AlignedCodonFrame> pep2Mappings = MappingUtils
.findMappingsForSequence(pep2, cdsMappings);
- assertEquals(1, pep2Mapping.size());
+ assertEquals(2, pep2Mappings.size());
+ mappings = MappingUtils.findMappingsForSequence(cds.getSequenceAt(1),
+ pep2Mappings);
+ assertEquals(1, mappings.size());
// map G to GGG
- sr = MappingUtils.buildSearchResults(pep2, 1, cdsMappings);
+ sr = MappingUtils.buildSearchResults(pep2, 1, mappings);
assertEquals(1, sr.getResults().size());
m = sr.getResults().get(0);
- assertSame(cds.getSequenceAt(1).getDatasetSequence(),
- m.getSequence());
+ assertSame(cds2Dss, m.getSequence());
assertEquals(1, m.getStart());
assertEquals(3, m.getEnd());
// map F to TTT
- sr = MappingUtils.buildSearchResults(pep2, 2, cdsMappings);
+ sr = MappingUtils.buildSearchResults(pep2, 2, mappings);
m = sr.getResults().get(0);
- assertSame(cds.getSequenceAt(1).getDatasetSequence(),
- m.getSequence());
+ assertSame(cds2Dss, m.getSequence());
assertEquals(4, m.getStart());
assertEquals(6, m.getEnd());
// map P to CCC
- sr = MappingUtils.buildSearchResults(pep2, 3, cdsMappings);
+ sr = MappingUtils.buildSearchResults(pep2, 3, mappings);
m = sr.getResults().get(0);
- assertSame(cds.getSequenceAt(1).getDatasetSequence(),
- m.getSequence());
+ assertSame(cds2Dss, m.getSequence());
assertEquals(7, m.getStart());
assertEquals(9, m.getEnd());
}
pep1.createDatasetSequence();
pep2.createDatasetSequence();
pep3.createDatasetSequence();
- dna1.addSequenceFeature(new SequenceFeature("CDS", "cds1", 4, 6, 0f,
- null));
- dna1.addSequenceFeature(new SequenceFeature("CDS", "cds2", 10, 12, 0f,
- null));
- dna1.addSequenceFeature(new SequenceFeature("CDS", "cds3", 1, 3, 0f,
- null));
- dna1.addSequenceFeature(new SequenceFeature("CDS", "cds4", 7, 9, 0f,
- null));
- dna1.addSequenceFeature(new SequenceFeature("CDS", "cds5", 1, 3, 0f,
- null));
- dna1.addSequenceFeature(new SequenceFeature("CDS", "cds6", 10, 12, 0f,
- null));
pep1.getDatasetSequence().addDBRef(
new DBRefEntry("EMBLCDS", "2", "A12345"));
pep2.getDatasetSequence().addDBRef(
new DBRefEntry("EMBLCDS", "4", "A12347"));
/*
+ * Create the CDS alignment
+ */
+ AlignmentI dna = new Alignment(new SequenceI[] { dna1 });
+ dna.setDataset(null);
+
+ /*
* Make the mappings from dna to protein
*/
- List<AlignedCodonFrame> mappings = new ArrayList<AlignedCodonFrame>();
// map ...GGG...TTT to GF
MapList map = new MapList(new int[] { 4, 6, 10, 12 },
new int[] { 1, 2 }, 3, 1);
AlignedCodonFrame acf = new AlignedCodonFrame();
acf.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(), map);
- mappings.add(acf);
+ dna.addCodonFrame(acf);
// map aaa...ccc to KP
map = new MapList(new int[] { 1, 3, 7, 9 }, new int[] { 1, 2 }, 3, 1);
acf = new AlignedCodonFrame();
acf.addMap(dna1.getDatasetSequence(), pep2.getDatasetSequence(), map);
- mappings.add(acf);
+ dna.addCodonFrame(acf);
// map aaa......TTT to KF
map = new MapList(new int[] { 1, 3, 10, 12 }, new int[] { 1, 2 }, 3, 1);
acf = new AlignedCodonFrame();
acf.addMap(dna1.getDatasetSequence(), pep3.getDatasetSequence(), map);
- mappings.add(acf);
-
- /*
- * Create the CDS alignment; also augments the dna-to-protein mappings with
- * exon-to-protein and exon-to-dna mappings
- */
- AlignmentI dna = new Alignment(new SequenceI[] { dna1 });
- dna.setDataset(null);
+ dna.addCodonFrame(acf);
/*
* execute method under test
*/
AlignmentI cdsal = AlignmentUtils.makeCdsAlignment(
- new SequenceI[] { dna1 }, mappings, dna);
+ new SequenceI[] { dna1 }, dna.getDataset(), null);
/*
* Verify we have 3 cds sequences, mapped to pep1/2/3 respectively
SequenceI cdsSeq = cds.get(0);
assertEquals("GGGTTT", cdsSeq.getSequenceAsString());
// assertEquals("dna1|A12345", cdsSeq.getName());
- assertEquals("dna1|pep1", cdsSeq.getName());
+ assertEquals("CDS|dna1", cdsSeq.getName());
// assertEquals(1, cdsSeq.getDBRefs().length);
// DBRefEntry cdsRef = cdsSeq.getDBRefs()[0];
// assertEquals("EMBLCDS", cdsRef.getSource());
cdsSeq = cds.get(1);
assertEquals("aaaccc", cdsSeq.getSequenceAsString());
// assertEquals("dna1|A12346", cdsSeq.getName());
- assertEquals("dna1|pep2", cdsSeq.getName());
+ assertEquals("CDS|dna1", cdsSeq.getName());
// assertEquals(1, cdsSeq.getDBRefs().length);
// cdsRef = cdsSeq.getDBRefs()[0];
// assertEquals("EMBLCDS", cdsRef.getSource());
cdsSeq = cds.get(2);
assertEquals("aaaTTT", cdsSeq.getSequenceAsString());
// assertEquals("dna1|A12347", cdsSeq.getName());
- assertEquals("dna1|pep3", cdsSeq.getName());
+ assertEquals("CDS|dna1", cdsSeq.getName());
// assertEquals(1, cdsSeq.getDBRefs().length);
// cdsRef = cdsSeq.getDBRefs()[0];
// assertEquals("EMBLCDS", cdsRef.getSource());
* Verify there are mappings from each cds sequence to its protein product
* and also to its dna source
*/
- Iterator<AlignedCodonFrame> newMappingsIterator = cdsal
- .getCodonFrames().iterator();
+ List<AlignedCodonFrame> newMappings = cdsal.getCodonFrames();
- // mappings for dna1 - exon1 - pep1
- AlignedCodonFrame cdsMapping = newMappingsIterator.next();
- List<Mapping> dnaMappings = cdsMapping.getMappingsFromSequence(dna1);
- assertEquals(3, dnaMappings.size());
- assertSame(cds.get(0).getDatasetSequence(), dnaMappings.get(0)
- .getTo());
- assertEquals("G(1) in CDS should map to G(4) in DNA", 4, dnaMappings
- .get(0).getMap().getToPosition(1));
- List<Mapping> peptideMappings = cdsMapping.getMappingsFromSequence(cds
- .get(0).getDatasetSequence());
- assertEquals(1, peptideMappings.size());
- assertSame(pep1.getDatasetSequence(), peptideMappings.get(0).getTo());
-
- // mappings for dna1 - cds2 - pep2
- assertSame(cds.get(1).getDatasetSequence(), dnaMappings.get(1)
- .getTo());
- assertEquals("c(4) in CDS should map to c(7) in DNA", 7, dnaMappings
- .get(1).getMap().getToPosition(4));
- peptideMappings = cdsMapping.getMappingsFromSequence(cds.get(1)
- .getDatasetSequence());
- assertEquals(1, peptideMappings.size());
- assertSame(pep2.getDatasetSequence(), peptideMappings.get(0).getTo());
-
- // mappings for dna1 - cds3 - pep3
- assertSame(cds.get(2).getDatasetSequence(), dnaMappings.get(2)
+ /*
+ * 6 mappings involve dna1 (to pep1/2/3, cds1/2/3)
+ */
+ List<AlignedCodonFrame> dnaMappings = MappingUtils
+ .findMappingsForSequence(dna1, newMappings);
+ assertEquals(6, dnaMappings.size());
+
+ /*
+ * dna1 to pep1
+ */
+ List<AlignedCodonFrame> mappings = MappingUtils
+ .findMappingsForSequence(pep1, dnaMappings);
+ assertEquals(1, mappings.size());
+ assertEquals(1, mappings.get(0).getMappings().size());
+ assertSame(pep1.getDatasetSequence(), mappings.get(0).getMappings()
+ .get(0).getMapping().getTo());
+
+ /*
+ * dna1 to cds1
+ */
+ List<AlignedCodonFrame> dnaToCds1Mappings = MappingUtils
+ .findMappingsForSequence(cds.get(0), dnaMappings);
+ Mapping mapping = dnaToCds1Mappings.get(0).getMappings().get(0)
+ .getMapping();
+ assertSame(cds.get(0).getDatasetSequence(), mapping
.getTo());
- assertEquals("T(4) in CDS should map to T(10) in DNA", 10, dnaMappings
- .get(2).getMap().getToPosition(4));
- peptideMappings = cdsMapping.getMappingsFromSequence(cds.get(2)
- .getDatasetSequence());
- assertEquals(1, peptideMappings.size());
- assertSame(pep3.getDatasetSequence(), peptideMappings.get(0).getTo());
+ assertEquals("G(1) in CDS should map to G(4) in DNA", 4, mapping
+ .getMap().getToPosition(1));
+
+ /*
+ * dna1 to pep2
+ */
+ mappings = MappingUtils.findMappingsForSequence(pep2, dnaMappings);
+ assertEquals(1, mappings.size());
+ assertEquals(1, mappings.get(0).getMappings().size());
+ assertSame(pep2.getDatasetSequence(), mappings.get(0).getMappings()
+ .get(0).getMapping().getTo());
+
+ /*
+ * dna1 to cds2
+ */
+ List<AlignedCodonFrame> dnaToCds2Mappings = MappingUtils
+ .findMappingsForSequence(cds.get(1), dnaMappings);
+ mapping = dnaToCds2Mappings.get(0).getMappings().get(0).getMapping();
+ assertSame(cds.get(1).getDatasetSequence(), mapping.getTo());
+ assertEquals("c(4) in CDS should map to c(7) in DNA", 7, mapping
+ .getMap().getToPosition(4));
+
+ /*
+ * dna1 to pep3
+ */
+ mappings = MappingUtils.findMappingsForSequence(pep3, dnaMappings);
+ assertEquals(1, mappings.size());
+ assertEquals(1, mappings.get(0).getMappings().size());
+ assertSame(pep3.getDatasetSequence(), mappings.get(0).getMappings()
+ .get(0).getMapping().getTo());
+
+ /*
+ * dna1 to cds3
+ */
+ List<AlignedCodonFrame> dnaToCds3Mappings = MappingUtils
+ .findMappingsForSequence(cds.get(2), dnaMappings);
+ mapping = dnaToCds3Mappings.get(0).getMappings().get(0).getMapping();
+ assertSame(cds.get(2).getDatasetSequence(), mapping.getTo());
+ assertEquals("T(4) in CDS should map to T(10) in DNA", 10, mapping
+ .getMap().getToPosition(4));
}
@Test(groups = { "Functional" })
dna3.createDatasetSequence();
pep1.createDatasetSequence();
pep2.createDatasetSequence();
- dna1.addSequenceFeature(new SequenceFeature("CDS", "cds1", 4, 8, 0f,
- null));
- dna1.addSequenceFeature(new SequenceFeature("CDS", "cds2", 9, 12, 0f,
- null));
- dna1.addSequenceFeature(new SequenceFeature("CDS", "cds3", 16, 18, 0f,
- null));
- dna2.addSequenceFeature(new SequenceFeature("CDS", "cds", 4, 8, 0f,
- null));
- dna2.addSequenceFeature(new SequenceFeature("CDS", "cds", 12, 12, 0f,
- null));
- dna2.addSequenceFeature(new SequenceFeature("CDS", "cds", 16, 18, 0f,
- null));
+
+ AlignmentI dna = new Alignment(new SequenceI[] { dna1, dna2, dna3 });
+ dna.setDataset(null);
- List<AlignedCodonFrame> mappings = new ArrayList<AlignedCodonFrame>();
MapList map = new MapList(new int[] { 4, 12, 16, 18 },
new int[] { 1, 4 }, 3, 1);
AlignedCodonFrame acf = new AlignedCodonFrame();
acf.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(), map);
- mappings.add(acf);
+ dna.addCodonFrame(acf);
map = new MapList(new int[] { 4, 8, 12, 12, 16, 18 },
new int[] { 1, 3 },
3, 1);
acf = new AlignedCodonFrame();
acf.addMap(dna2.getDatasetSequence(), pep2.getDatasetSequence(), map);
- mappings.add(acf);
+ dna.addCodonFrame(acf);
- AlignmentI dna = new Alignment(new SequenceI[] { dna1, dna2, dna3 });
- dna.setDataset(null);
AlignmentI cds = AlignmentUtils.makeCdsAlignment(new SequenceI[] {
- dna1, dna2, dna3 }, mappings, dna);
+ dna1, dna2, dna3 }, dna.getDataset(), null);
List<SequenceI> cdsSeqs = cds.getSequences();
assertEquals(2, cdsSeqs.size());
assertEquals("GGGCCCTTTGGG", cdsSeqs.get(0).getSequenceAsString());
.contains(cdsSeqs.get(1).getDatasetSequence()));
/*
- * Verify updated mappings
+ * Verify 6 mappings: dna1 to cds1, cds1 to pep1, dna1 to pep1
+ * and the same for dna2/cds2/pep2
*/
- List<AlignedCodonFrame> cdsMappings = cds.getCodonFrames();
- assertEquals(2, cdsMappings.size());
+ List<AlignedCodonFrame> mappings = cds.getCodonFrames();
+ assertEquals(6, mappings.size());
/*
- * Mapping from pep1 to GGGTTT in first new CDS sequence
+ * 2 mappings involve pep1
*/
- List<AlignedCodonFrame> pep1Mapping = MappingUtils
- .findMappingsForSequence(pep1, cdsMappings);
- assertEquals(1, pep1Mapping.size());
+ List<AlignedCodonFrame> pep1Mappings = MappingUtils
+ .findMappingsForSequence(pep1, mappings);
+ assertEquals(2, pep1Mappings.size());
+
/*
+ * Get mapping of pep1 to cds1 and verify it
* maps GPFG to 1-3,4-6,7-9,10-12
*/
- SearchResults sr = MappingUtils
- .buildSearchResults(pep1, 1, cdsMappings);
+ List<AlignedCodonFrame> pep1CdsMappings = MappingUtils
+ .findMappingsForSequence(cds.getSequenceAt(0), pep1Mappings);
+ assertEquals(1, pep1CdsMappings.size());
+ SearchResults sr = MappingUtils.buildSearchResults(pep1, 1,
+ pep1CdsMappings);
assertEquals(1, sr.getResults().size());
Match m = sr.getResults().get(0);
assertEquals(cds.getSequenceAt(0).getDatasetSequence(),
m.getSequence());
assertEquals(1, m.getStart());
assertEquals(3, m.getEnd());
- sr = MappingUtils.buildSearchResults(pep1, 2, cdsMappings);
+ sr = MappingUtils.buildSearchResults(pep1, 2, pep1CdsMappings);
m = sr.getResults().get(0);
assertEquals(4, m.getStart());
assertEquals(6, m.getEnd());
- sr = MappingUtils.buildSearchResults(pep1, 3, cdsMappings);
+ sr = MappingUtils.buildSearchResults(pep1, 3, pep1CdsMappings);
m = sr.getResults().get(0);
assertEquals(7, m.getStart());
assertEquals(9, m.getEnd());
- sr = MappingUtils.buildSearchResults(pep1, 4, cdsMappings);
+ sr = MappingUtils.buildSearchResults(pep1, 4, pep1CdsMappings);
m = sr.getResults().get(0);
assertEquals(10, m.getStart());
assertEquals(12, m.getEnd());
/*
- * GPG in pep2 map to 1-3,4-6,7-9 in second CDS sequence
+ * Get mapping of pep2 to cds2 and verify it
+ * maps GPG in pep2 to 1-3,4-6,7-9 in second CDS sequence
*/
- List<AlignedCodonFrame> pep2Mapping = MappingUtils
- .findMappingsForSequence(pep2, cdsMappings);
- assertEquals(1, pep2Mapping.size());
- sr = MappingUtils.buildSearchResults(pep2, 1, cdsMappings);
+ List<AlignedCodonFrame> pep2Mappings = MappingUtils
+ .findMappingsForSequence(pep2, mappings);
+ assertEquals(2, pep2Mappings.size());
+ List<AlignedCodonFrame> pep2CdsMappings = MappingUtils
+ .findMappingsForSequence(cds.getSequenceAt(1), pep2Mappings);
+ assertEquals(1, pep2CdsMappings.size());
+ sr = MappingUtils.buildSearchResults(pep2, 1, pep2CdsMappings);
assertEquals(1, sr.getResults().size());
m = sr.getResults().get(0);
assertEquals(cds.getSequenceAt(1).getDatasetSequence(),
m.getSequence());
assertEquals(1, m.getStart());
assertEquals(3, m.getEnd());
- sr = MappingUtils.buildSearchResults(pep2, 2, cdsMappings);
+ sr = MappingUtils.buildSearchResults(pep2, 2, pep2CdsMappings);
m = sr.getResults().get(0);
assertEquals(4, m.getStart());
assertEquals(6, m.getEnd());
- sr = MappingUtils.buildSearchResults(pep2, 3, cdsMappings);
+ sr = MappingUtils.buildSearchResults(pep2, 3, pep2CdsMappings);
m = sr.getResults().get(0);
assertEquals(7, m.getStart());
assertEquals(9, m.getEnd());
assertEquals('T', map.get(11).get(seq1).charValue());
assertEquals('T', map.get(12).get(seq1).charValue());
}
+
+ /**
+ * Test for the case where the products for which we want CDS are specified.
+ * This is to represent the case where EMBL has CDS mappings to both Uniprot
+ * and EMBLCDSPROTEIN. makeCdsAlignment() should only return the mappings for
+ * the protein sequences specified.
+ */
+ @Test(groups = { "Functional" })
+ public void testMakeCdsAlignment_filterProducts()
+ {
+ SequenceI dna1 = new Sequence("dna1", "aaaGGGcccTTTaaa");
+ SequenceI dna2 = new Sequence("dna2", "GGGcccTTTaaaCCC");
+ SequenceI pep1 = new Sequence("Uniprot|pep1", "GF");
+ SequenceI pep2 = new Sequence("Uniprot|pep2", "GFP");
+ SequenceI pep3 = new Sequence("EMBL|pep3", "GF");
+ SequenceI pep4 = new Sequence("EMBL|pep4", "GFP");
+ dna1.createDatasetSequence();
+ dna2.createDatasetSequence();
+ pep1.createDatasetSequence();
+ pep2.createDatasetSequence();
+ pep3.createDatasetSequence();
+ pep4.createDatasetSequence();
+ AlignmentI dna = new Alignment(new SequenceI[] { dna1, dna2 });
+ dna.setDataset(null);
+ AlignmentI emblPeptides = new Alignment(new SequenceI[] { pep3, pep4 });
+ emblPeptides.setDataset(null);
+
+ AlignedCodonFrame acf = new AlignedCodonFrame();
+ MapList map = new MapList(new int[] { 4, 6, 10, 12 },
+ new int[] { 1, 2 }, 3, 1);
+ acf.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(), map);
+ acf.addMap(dna1.getDatasetSequence(), pep3.getDatasetSequence(), map);
+ dna.addCodonFrame(acf);
+
+ acf = new AlignedCodonFrame();
+ map = new MapList(new int[] { 1, 3, 7, 9, 13, 15 }, new int[] { 1, 3 },
+ 3, 1);
+ acf.addMap(dna2.getDatasetSequence(), pep2.getDatasetSequence(), map);
+ acf.addMap(dna2.getDatasetSequence(), pep4.getDatasetSequence(), map);
+ dna.addCodonFrame(acf);
+
+ /*
+ * execute method under test to find CDS for EMBL peptides only
+ */
+ AlignmentI cds = AlignmentUtils.makeCdsAlignment(new SequenceI[] {
+ dna1, dna2 }, dna.getDataset(), emblPeptides.getSequencesArray());
+
+ assertEquals(2, cds.getSequences().size());
+ assertEquals("GGGTTT", cds.getSequenceAt(0).getSequenceAsString());
+ assertEquals("GGGTTTCCC", cds.getSequenceAt(1).getSequenceAsString());
+
+ /*
+ * verify shared, extended alignment dataset
+ */
+ assertSame(dna.getDataset(), cds.getDataset());
+ assertTrue(dna.getDataset().getSequences()
+ .contains(cds.getSequenceAt(0).getDatasetSequence()));
+ assertTrue(dna.getDataset().getSequences()
+ .contains(cds.getSequenceAt(1).getDatasetSequence()));
+
+ /*
+ * Verify mappings from CDS to peptide, cDNA to CDS, and cDNA to peptide
+ * the mappings are on the shared alignment dataset
+ */
+ List<AlignedCodonFrame> cdsMappings = cds.getDataset().getCodonFrames();
+ /*
+ * 6 mappings, 2*(DNA->CDS), 2*(DNA->Pep), 2*(CDS->Pep)
+ */
+ assertEquals(6, cdsMappings.size());
+
+ /*
+ * verify that mapping sets for dna and cds alignments are different
+ * [not current behaviour - all mappings are on the alignment dataset]
+ */
+ // select -> subselect type to test.
+ // Assert.assertNotSame(dna.getCodonFrames(), cds.getCodonFrames());
+ // assertEquals(4, dna.getCodonFrames().size());
+ // assertEquals(4, cds.getCodonFrames().size());
+
+ /*
+ * Two mappings involve pep3 (dna to pep3, cds to pep3)
+ * Mapping from pep3 to GGGTTT in first new exon sequence
+ */
+ List<AlignedCodonFrame> pep3Mappings = MappingUtils
+ .findMappingsForSequence(pep3, cdsMappings);
+ assertEquals(2, pep3Mappings.size());
+ List<AlignedCodonFrame> mappings = MappingUtils
+ .findMappingsForSequence(cds.getSequenceAt(0), pep3Mappings);
+ assertEquals(1, mappings.size());
+
+ // map G to GGG
+ SearchResults sr = MappingUtils.buildSearchResults(pep3, 1, mappings);
+ assertEquals(1, sr.getResults().size());
+ Match m = sr.getResults().get(0);
+ assertSame(cds.getSequenceAt(0).getDatasetSequence(), m.getSequence());
+ assertEquals(1, m.getStart());
+ assertEquals(3, m.getEnd());
+ // map F to TTT
+ sr = MappingUtils.buildSearchResults(pep3, 2, mappings);
+ m = sr.getResults().get(0);
+ assertSame(cds.getSequenceAt(0).getDatasetSequence(), m.getSequence());
+ assertEquals(4, m.getStart());
+ assertEquals(6, m.getEnd());
+
+ /*
+ * Two mappings involve pep4 (dna to pep4, cds to pep4)
+ * Verify mapping from pep4 to GGGTTTCCC in second new exon sequence
+ */
+ List<AlignedCodonFrame> pep4Mappings = MappingUtils
+ .findMappingsForSequence(pep4, cdsMappings);
+ assertEquals(2, pep4Mappings.size());
+ mappings = MappingUtils.findMappingsForSequence(cds.getSequenceAt(1),
+ pep4Mappings);
+ assertEquals(1, mappings.size());
+ // map G to GGG
+ sr = MappingUtils.buildSearchResults(pep4, 1, mappings);
+ assertEquals(1, sr.getResults().size());
+ m = sr.getResults().get(0);
+ assertSame(cds.getSequenceAt(1).getDatasetSequence(), m.getSequence());
+ assertEquals(1, m.getStart());
+ assertEquals(3, m.getEnd());
+ // map F to TTT
+ sr = MappingUtils.buildSearchResults(pep4, 2, mappings);
+ m = sr.getResults().get(0);
+ assertSame(cds.getSequenceAt(1).getDatasetSequence(), m.getSequence());
+ assertEquals(4, m.getStart());
+ assertEquals(6, m.getEnd());
+ // map P to CCC
+ sr = MappingUtils.buildSearchResults(pep4, 3, mappings);
+ m = sr.getResults().get(0);
+ assertSame(cds.getSequenceAt(1).getDatasetSequence(), m.getSequence());
+ assertEquals(7, m.getStart());
+ assertEquals(9, m.getEnd());
+ }
+
+ /**
+ * Test the method that just copies aligned sequences, provided all sequences
+ * to be aligned share the aligned sequence's dataset
+ */
+ @Test(groups = "Functional")
+ public void testAlignAsSameSequences()
+ {
+ SequenceI dna1 = new Sequence("dna1", "cccGGGTTTaaa");
+ SequenceI dna2 = new Sequence("dna2", "CCCgggtttAAA");
+ AlignmentI al1 = new Alignment(new SequenceI[] { dna1, dna2 });
+ ((Alignment) al1).createDatasetAlignment();
+
+ SequenceI dna3 = new Sequence(dna1);
+ SequenceI dna4 = new Sequence(dna2);
+ assertSame(dna3.getDatasetSequence(), dna1.getDatasetSequence());
+ assertSame(dna4.getDatasetSequence(), dna2.getDatasetSequence());
+ String seq1 = "-cc-GG-GT-TT--aaa";
+ dna3.setSequence(seq1);
+ String seq2 = "C--C-Cgg--gtt-tAA-A-";
+ dna4.setSequence(seq2);
+ AlignmentI al2 = new Alignment(new SequenceI[] { dna3, dna4 });
+ ((Alignment) al2).createDatasetAlignment();
+
+ assertTrue(AlignmentUtils.alignAsSameSequences(al1, al2));
+ assertEquals(seq1, al1.getSequenceAt(0).getSequenceAsString());
+ assertEquals(seq2, al1.getSequenceAt(1).getSequenceAsString());
+
+ /*
+ * add another sequence to 'aligned' - should still succeed, since
+ * unaligned sequences still share a dataset with aligned sequences
+ */
+ SequenceI dna5 = new Sequence("dna5", "CCCgggtttAAA");
+ dna5.createDatasetSequence();
+ al2.addSequence(dna5);
+ assertTrue(AlignmentUtils.alignAsSameSequences(al1, al2));
+ assertEquals(seq1, al1.getSequenceAt(0).getSequenceAsString());
+ assertEquals(seq2, al1.getSequenceAt(1).getSequenceAsString());
+
+ /*
+ * add another sequence to 'unaligned' - should fail, since now not
+ * all unaligned sequences share a dataset with aligned sequences
+ */
+ SequenceI dna6 = new Sequence("dna6", "CCCgggtttAAA");
+ dna6.createDatasetSequence();
+ al1.addSequence(dna6);
+ // JAL-2110 JBP Comment: what's the use case for this behaviour ?
+ assertFalse(AlignmentUtils.alignAsSameSequences(al1, al2));
+ }
+
+ @Test(groups = "Functional")
+ public void testAlignAsSameSequencesMultipleSubSeq()
+ {
+ SequenceI dna1 = new Sequence("dna1", "cccGGGTTTaaa");
+ SequenceI dna2 = new Sequence("dna2", "CCCgggtttAAA");
+ SequenceI as1 = dna1.deriveSequence(), as2 = dna1.deriveSequence()
+ .getSubSequence(3, 7), as3 = dna2.deriveSequence();
+ as1.insertCharAt(6, 5, '-');
+ String s_as1 = as1.getSequenceAsString();
+ as2.insertCharAt(6, 5, '-');
+ String s_as2 = as2.getSequenceAsString();
+ as3.insertCharAt(6, 5, '-');
+ String s_as3 = as3.getSequenceAsString();
+ AlignmentI aligned = new Alignment(new SequenceI[] { as1, as2, as3 });
+
+ // why do we need to cast this still ?
+ ((Alignment) aligned).createDatasetAlignment();
+ SequenceI uas1 = dna1.deriveSequence(), uas2 = dna1.deriveSequence()
+ .getSubSequence(3, 7), uas3 = dna2.deriveSequence();
+ AlignmentI tobealigned = new Alignment(new SequenceI[] { uas1, uas2,
+ uas3 });
+ ((Alignment) tobealigned).createDatasetAlignment();
+
+ assertTrue(AlignmentUtils.alignAsSameSequences(tobealigned, aligned));
+ assertEquals(s_as1, uas1.getSequenceAsString());
+ assertEquals(s_as2, uas2.getSequenceAsString());
+ assertEquals(s_as3, uas3.getSequenceAsString());
+ }
+
}
package jalview.analysis;
import static org.testng.AssertJUnit.assertEquals;
+import static org.testng.AssertJUnit.assertFalse;
+import static org.testng.AssertJUnit.assertNotNull;
+import static org.testng.AssertJUnit.assertNotSame;
+import static org.testng.AssertJUnit.assertNull;
import static org.testng.AssertJUnit.assertSame;
+import static org.testng.AssertJUnit.assertTrue;
+import jalview.datamodel.AlignedCodonFrame;
+import jalview.datamodel.AlignedCodonFrame.SequenceToSequenceMapping;
+import jalview.datamodel.Alignment;
+import jalview.datamodel.AlignmentI;
import jalview.datamodel.DBRefEntry;
+import jalview.datamodel.Mapping;
+import jalview.datamodel.Sequence;
+import jalview.datamodel.SequenceFeature;
+import jalview.datamodel.SequenceI;
+import jalview.util.DBRefUtils;
+import jalview.util.MapList;
+import jalview.ws.SequenceFetcher;
+import jalview.ws.SequenceFetcherFactory;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.testng.annotations.AfterClass;
import org.testng.annotations.Test;
public class CrossRefTest
DBRefEntry ref6 = new DBRefEntry("emblCDS", "1", "A123");
DBRefEntry ref7 = new DBRefEntry("GeneDB", "1", "A123");
DBRefEntry ref8 = new DBRefEntry("PFAM", "1", "A123");
+ // ENSEMBL is a source of either dna or protein sequence data
+ DBRefEntry ref9 = new DBRefEntry("ENSEMBL", "1", "A123");
DBRefEntry[] refs = new DBRefEntry[] { ref1, ref2, ref3, ref4, ref5,
- ref6, ref7, ref8 };
+ ref6, ref7, ref8, ref9 };
/*
* Just the DNA refs:
*/
- DBRefEntry[] found = CrossRef.findXDbRefs(false, refs);
- assertEquals(3, found.length);
+ DBRefEntry[] found = DBRefUtils.selectDbRefs(true, refs);
+ assertEquals(4, found.length);
assertSame(ref5, found[0]);
assertSame(ref6, found[1]);
assertSame(ref7, found[2]);
+ assertSame(ref9, found[3]);
/*
* Just the protein refs:
*/
- found = CrossRef.findXDbRefs(true, refs);
- assertEquals(4, found.length);
+ found = DBRefUtils.selectDbRefs(false, refs);
+ assertEquals(5, found.length);
assertSame(ref1, found[0]);
assertSame(ref2, found[1]);
assertSame(ref3, found[2]);
assertSame(ref4, found[3]);
+ assertSame(ref9, found[4]);
+ }
+
+ /**
+ * Test the method that finds a sequence's "product" xref source databases,
+ * which may be direct (dbrefs on the sequence), or indirect (dbrefs on
+ * sequences which share a dbref with the sequence
+ */
+ @Test(groups = { "Functional" }, enabled = true)
+ public void testFindXrefSourcesForSequence_proteinToDna()
+ {
+ SequenceI seq = new Sequence("Seq1", "MGKYQARLSS");
+ List<String> sources = new ArrayList<String>();
+ AlignmentI al = new Alignment(new SequenceI[] {});
+
+ /*
+ * first with no dbrefs to search
+ */
+ sources = new CrossRef(new SequenceI[] { seq }, al)
+ .findXrefSourcesForSequences(false);
+ assertTrue(sources.isEmpty());
+
+ /*
+ * add some dbrefs to sequence
+ */
+ // protein db is not a candidate for findXrefSources
+ seq.addDBRef(new DBRefEntry("UNIPROT", "0", "A1234"));
+ // dna coding databatases are
+ seq.addDBRef(new DBRefEntry("EMBL", "0", "E2345"));
+ // a second EMBL xref should not result in a duplicate
+ seq.addDBRef(new DBRefEntry("EMBL", "0", "E2346"));
+ seq.addDBRef(new DBRefEntry("EMBLCDS", "0", "E2347"));
+ seq.addDBRef(new DBRefEntry("GENEDB", "0", "E2348"));
+ seq.addDBRef(new DBRefEntry("ENSEMBL", "0", "E2349"));
+ seq.addDBRef(new DBRefEntry("ENSEMBLGENOMES", "0", "E2350"));
+ sources = new CrossRef(new SequenceI[] { seq }, al)
+ .findXrefSourcesForSequences(false);
+ assertEquals(4, sources.size());
+ assertEquals("[EMBL, EMBLCDS, GENEDB, ENSEMBL]", sources.toString());
+
+ /*
+ * add a sequence to the alignment which has a dbref to UNIPROT|A1234
+ * and others to dna coding databases
+ */
+ sources.clear();
+ seq.setDBRefs(null);
+ seq.addDBRef(new DBRefEntry("UNIPROT", "0", "A1234"));
+ seq.addDBRef(new DBRefEntry("EMBLCDS", "0", "E2347"));
+ SequenceI seq2 = new Sequence("Seq2", "MGKYQARLSS");
+ seq2.addDBRef(new DBRefEntry("UNIPROT", "0", "A1234"));
+ seq2.addDBRef(new DBRefEntry("EMBL", "0", "E2345"));
+ seq2.addDBRef(new DBRefEntry("GENEDB", "0", "E2348"));
+ // TODO include ENSEMBLGENOMES in DBRefSource.DNACODINGDBS ?
+ al.addSequence(seq2);
+ sources = new CrossRef(new SequenceI[] { seq, seq2 }, al)
+ .findXrefSourcesForSequences(false);
+ assertEquals(3, sources.size());
+ assertEquals("[EMBLCDS, EMBL, GENEDB]", sources.toString());
}
+ /**
+ * Test for finding 'product' sequences for the case where only an indirect
+ * xref is found - not on the nucleotide sequence but on a peptide sequence in
+ * the alignment which which it shares a nucleotide dbref
+ */
+ @Test(groups = { "Functional" }, enabled = true)
+ public void testFindXrefSequences_indirectDbrefToProtein()
+ {
+ /*
+ * Alignment setup:
+ * - nucleotide dbref EMBL|AF039662
+ * - peptide dbrefs EMBL|AF039662, UNIPROT|Q9ZTS2
+ */
+ SequenceI emblSeq = new Sequence("AF039662", "GGGGCAGCACAAGAAC");
+ emblSeq.addDBRef(new DBRefEntry("EMBL", "0", "AF039662"));
+ SequenceI uniprotSeq = new Sequence("Q9ZTS2", "MASVSATMISTS");
+ uniprotSeq.addDBRef(new DBRefEntry("EMBL", "0", "AF039662"));
+ uniprotSeq.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2"));
+
+ /*
+ * Find UNIPROT xrefs for nucleotide
+ * - it has no UNIPROT dbref of its own
+ * - but peptide with matching nucleotide dbref does, so is returned
+ */
+ AlignmentI al = new Alignment(new SequenceI[] { emblSeq, uniprotSeq });
+ Alignment xrefs = new CrossRef(new SequenceI[] { emblSeq }, al)
+ .findXrefSequences("UNIPROT", true);
+ assertEquals(1, xrefs.getHeight());
+ assertSame(uniprotSeq, xrefs.getSequenceAt(0));
+ }
+
+ /**
+ * Test for finding 'product' sequences for the case where only an indirect
+ * xref is found - not on the peptide sequence but on a nucleotide sequence in
+ * the alignment which which it shares a protein dbref
+ */
+ @Test(groups = { "Functional" }, enabled = true)
+ public void testFindXrefSequences_indirectDbrefToNucleotide()
+ {
+ /*
+ * Alignment setup:
+ * - peptide dbref UNIPROT|Q9ZTS2
+ * - nucleotide dbref EMBL|AF039662, UNIPROT|Q9ZTS2
+ */
+ SequenceI uniprotSeq = new Sequence("Q9ZTS2", "MASVSATMISTS");
+ uniprotSeq.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2"));
+ SequenceI emblSeq = new Sequence("AF039662", "GGGGCAGCACAAGAAC");
+ emblSeq.addDBRef(new DBRefEntry("EMBL", "0", "AF039662"));
+ emblSeq.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2"));
+
+ /*
+ * find EMBL xrefs for peptide sequence - it has no direct
+ * dbrefs, but the 'corresponding' nucleotide sequence does, so is returned
+ */
+ /*
+ * Find EMBL xrefs for peptide
+ * - it has no EMBL dbref of its own
+ * - but nucleotide with matching peptide dbref does, so is returned
+ */
+ AlignmentI al = new Alignment(new SequenceI[] { emblSeq, uniprotSeq });
+ Alignment xrefs = new CrossRef(new SequenceI[] { uniprotSeq }, al)
+ .findXrefSequences("EMBL", false);
+ assertEquals(1, xrefs.getHeight());
+ assertSame(emblSeq, xrefs.getSequenceAt(0));
+ }
+
+ /**
+ * Test for finding 'product' sequences for the case where the selected
+ * sequence has no dbref to the desired source, and there are no indirect
+ * references via another sequence in the alignment
+ */
+ @Test(groups = { "Functional" })
+ public void testFindXrefSequences_noDbrefs()
+ {
+ /*
+ * two nucleotide sequences, one with UNIPROT dbref
+ */
+ SequenceI dna1 = new Sequence("AF039662", "GGGGCAGCACAAGAAC");
+ dna1.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2"));
+ SequenceI dna2 = new Sequence("AJ307031", "AAACCCTTT");
+
+ /*
+ * find UNIPROT xrefs for peptide sequence - it has no direct
+ * dbrefs, and the other sequence (which has a UNIPROT dbref) is not
+ * equatable to it, so no results found
+ */
+ AlignmentI al = new Alignment(new SequenceI[] { dna1, dna2 });
+ Alignment xrefs = new CrossRef(new SequenceI[] { dna2 }, al)
+ .findXrefSequences("UNIPROT", true);
+ assertNull(xrefs);
+ }
+
+ /**
+ * Tests for the method that searches an alignment (with one sequence
+ * excluded) for protein/nucleotide sequences with a given cross-reference
+ */
+ @Test(groups = { "Functional" }, enabled = true)
+ public void testSearchDataset()
+ {
+ /*
+ * nucleotide sequence with UNIPROT AND EMBL dbref
+ * peptide sequence with UNIPROT dbref
+ */
+ SequenceI dna1 = new Sequence("AF039662", "GGGGCAGCACAAGAAC");
+ Mapping map = new Mapping(new Sequence("pep2", "MLAVSRG"), new MapList(
+ new int[] { 1, 21 }, new int[] {
+ 1, 7 }, 3, 1));
+ DBRefEntry dbref = new DBRefEntry("UNIPROT", "0", "Q9ZTS2", map);
+ dna1.addDBRef(dbref);
+ dna1.addDBRef(new DBRefEntry("EMBL", "0", "AF039662"));
+ SequenceI pep1 = new Sequence("Q9ZTS2", "MLAVSRGQ");
+ dbref = new DBRefEntry("UNIPROT", "0", "Q9ZTS2");
+ pep1.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2"));
+ AlignmentI al = new Alignment(new SequenceI[] { dna1, pep1 });
+
+ List<SequenceI> result = new ArrayList<SequenceI>();
+
+ /*
+ * first search for a dbref nowhere on the alignment:
+ */
+ dbref = new DBRefEntry("UNIPROT", "0", "P30419");
+ CrossRef testee = new CrossRef(al.getSequencesArray(), al);
+ AlignedCodonFrame acf = new AlignedCodonFrame();
+ boolean found = testee.searchDataset(true, dna1, dbref, result, acf,
+ true);
+ assertFalse(found);
+ assertTrue(result.isEmpty());
+ assertTrue(acf.isEmpty());
+
+ /*
+ * search for a protein sequence with dbref UNIPROT:Q9ZTS2
+ */
+ acf = new AlignedCodonFrame();
+ dbref = new DBRefEntry("UNIPROT", "0", "Q9ZTS2");
+ found = testee.searchDataset(!dna1.isProtein(), dna1, dbref, result,
+ acf, false); // search dataset with a protein xref from a dna
+ // sequence to locate the protein product
+ assertTrue(found);
+ assertEquals(1, result.size());
+ assertSame(pep1, result.get(0));
+ assertTrue(acf.isEmpty());
+
+ /*
+ * search for a nucleotide sequence with dbref UNIPROT:Q9ZTS2
+ */
+ result.clear();
+ acf = new AlignedCodonFrame();
+ dbref = new DBRefEntry("UNIPROT", "0", "Q9ZTS2");
+ found = testee.searchDataset(!pep1.isProtein(), pep1, dbref, result,
+ acf, false); // search dataset with a protein's direct dbref to
+ // locate dna sequences with matching xref
+ assertTrue(found);
+ assertEquals(1, result.size());
+ assertSame(dna1, result.get(0));
+ // should now have a mapping from dna to pep1
+ List<SequenceToSequenceMapping> mappings = acf.getMappings();
+ assertEquals(1, mappings.size());
+ SequenceToSequenceMapping mapping = mappings.get(0);
+ assertSame(dna1, mapping.getFromSeq());
+ assertSame(pep1, mapping.getMapping().getTo());
+ MapList mapList = mapping.getMapping().getMap();
+ assertEquals(1, mapList.getToRatio());
+ assertEquals(3, mapList.getFromRatio());
+ assertEquals(1, mapList.getFromRanges().size());
+ assertEquals(1, mapList.getFromRanges().get(0)[0]);
+ assertEquals(21, mapList.getFromRanges().get(0)[1]);
+ assertEquals(1, mapList.getToRanges().size());
+ assertEquals(1, mapList.getToRanges().get(0)[0]);
+ assertEquals(7, mapList.getToRanges().get(0)[1]);
+ }
+
+ /**
+ * Test for finding 'product' sequences for the case where the selected
+ * sequence has a dbref with a mapping to a sequence. This represents the case
+ * where either
+ * <ul>
+ * <li>a fetched sequence is already decorated with its cross-reference (e.g.
+ * EMBL + translation), or</li>
+ * <li>Get Cross-References has been done once resulting in instantiated
+ * cross-reference mappings</li>
+ * </ul>
+ */
+ @Test(groups = { "Functional" })
+ public void testFindXrefSequences_fromDbRefMap()
+ {
+ /*
+ * scenario: nucleotide sequence AF039662
+ * with dbref + mapping to Q9ZTS2 and P30419
+ * which themselves each have a dbref and feature
+ */
+ SequenceI dna1 = new Sequence("AF039662", "GGGGCAGCACAAGAAC");
+ SequenceI pep1 = new Sequence("Q9ZTS2", "MALFQRSV");
+ SequenceI pep2 = new Sequence("P30419", "MTRRSQIF");
+ dna1.createDatasetSequence();
+ pep1.createDatasetSequence();
+ pep2.createDatasetSequence();
+
+ pep1.getDatasetSequence().addDBRef(
+ new DBRefEntry("Pfam", "0", "PF00111"));
+ pep1.addSequenceFeature(new SequenceFeature("type", "desc", 12, 14, 1f,
+ "group"));
+ pep2.getDatasetSequence().addDBRef(new DBRefEntry("PDB", "0", "3JTK"));
+ pep2.addSequenceFeature(new SequenceFeature("type2", "desc2", 13, 15,
+ 12f, "group2"));
+
+ MapList mapList = new MapList(new int[] { 1, 24 }, new int[] { 1, 3 },
+ 3, 1);
+ Mapping map = new Mapping(pep1, mapList);
+ DBRefEntry dbRef1 = new DBRefEntry("UNIPROT", "0", "Q9ZTS2", map);
+ dna1.getDatasetSequence().addDBRef(dbRef1);
+ mapList = new MapList(new int[] { 1, 24 }, new int[] { 1, 3 }, 3, 1);
+ map = new Mapping(pep2, mapList);
+ DBRefEntry dbRef2 = new DBRefEntry("UNIPROT", "0", "P30419", map);
+ dna1.getDatasetSequence().addDBRef(dbRef2);
+
+ /*
+ * find UNIPROT xrefs for nucleotide sequence - it should pick up
+ * mapped sequences
+ */
+ AlignmentI al = new Alignment(new SequenceI[] { dna1 });
+ Alignment xrefs = new CrossRef(new SequenceI[] { dna1 }, al)
+ .findXrefSequences("UNIPROT", true);
+ assertEquals(2, xrefs.getHeight());
+
+ /*
+ * cross-refs alignment holds copies of the mapped sequences
+ * including copies of their dbrefs and features
+ */
+ checkCopySequence(pep1, xrefs.getSequenceAt(0));
+ checkCopySequence(pep2, xrefs.getSequenceAt(1));
+ }
+
+ /**
+ * Helper method that verifies that 'copy' has the same name, start, end,
+ * sequence and dataset sequence object as 'original' (but is not the same
+ * object)
+ *
+ * @param copy
+ * @param original
+ */
+ private void checkCopySequence(SequenceI copy, SequenceI original)
+ {
+ assertNotSame(copy, original);
+ assertSame(copy.getDatasetSequence(), original.getDatasetSequence());
+ assertEquals(copy.getName(), original.getName());
+ assertEquals(copy.getStart(), original.getStart());
+ assertEquals(copy.getEnd(), original.getEnd());
+ assertEquals(copy.getSequenceAsString(), original.getSequenceAsString());
+ }
+
+ /**
+ * Test for finding 'product' sequences for the case where the selected
+ * sequence has a dbref with no mapping, triggering a fetch from database
+ */
+ @Test(groups = { "Functional" })
+ public void testFindXrefSequences_withFetch()
+ {
+ SequenceI dna1 = new Sequence("AF039662", "GGGGCAGCACAAGAAC");
+ dna1.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2"));
+ dna1.addDBRef(new DBRefEntry("UNIPROT", "0", "P30419"));
+ dna1.addDBRef(new DBRefEntry("UNIPROT", "0", "P00314"));
+ final SequenceI pep1 = new Sequence("Q9ZTS2", "MYQLIRSSW");
+ final SequenceI pep2 = new Sequence("P00314", "MRKLLAASG");
+
+ /*
+ * argument false suppresses adding DAS sources
+ * todo: define an interface type SequenceFetcherI and mock that
+ */
+ SequenceFetcher mockFetcher = new SequenceFetcher(false)
+ {
+ @Override
+ public boolean isFetchable(String source)
+ {
+ return true;
+ }
+
+ @Override
+ public SequenceI[] getSequences(List<DBRefEntry> refs, boolean dna)
+ {
+ return new SequenceI[] { pep1, pep2 };
+ }
+ };
+ SequenceFetcherFactory.setSequenceFetcher(mockFetcher);
+
+ /*
+ * find UNIPROT xrefs for nucleotide sequence
+ */
+ AlignmentI al = new Alignment(new SequenceI[] { dna1 });
+ Alignment xrefs = new CrossRef(new SequenceI[] { dna1 }, al)
+ .findXrefSequences("UNIPROT", true);
+ assertEquals(2, xrefs.getHeight());
+ assertSame(pep1, xrefs.getSequenceAt(0));
+ assertSame(pep2, xrefs.getSequenceAt(1));
+ }
+
+ @AfterClass
+ public void tearDown()
+ {
+ SequenceFetcherFactory.setSequenceFetcher(null);
+ }
+
+ /**
+ * Test for finding 'product' sequences for the case where both gene and
+ * transcript sequences have dbrefs to Uniprot.
+ */
+ @Test(groups = { "Functional" })
+ public void testFindXrefSequences_forGeneAndTranscripts()
+ {
+ /*
+ * 'gene' sequence
+ */
+ SequenceI gene = new Sequence("ENSG00000157764", "CGCCTCCCTTCCCC");
+ gene.addDBRef(new DBRefEntry("UNIPROT", "0", "P15056"));
+ gene.addDBRef(new DBRefEntry("UNIPROT", "0", "H7C5K3"));
+
+ /*
+ * 'transcript' with CDS feature (supports mapping to protein)
+ */
+ SequenceI braf001 = new Sequence("ENST00000288602", "taagATGGCGGCGCTGa");
+ braf001.addDBRef(new DBRefEntry("UNIPROT", "0", "P15056"));
+ braf001.addSequenceFeature(new SequenceFeature("CDS", "", 5, 16, 0f,
+ null));
+
+ /*
+ * 'spliced transcript' with CDS ranges
+ */
+ SequenceI braf002 = new Sequence("ENST00000497784", "gCAGGCtaTCTGTTCaa");
+ braf002.addDBRef(new DBRefEntry("UNIPROT", "0", "H7C5K3"));
+ braf002.addSequenceFeature(new SequenceFeature("CDS", "", 2, 6, 0f,
+ null));
+ braf002.addSequenceFeature(new SequenceFeature("CDS", "", 9, 15, 0f,
+ null));
+
+ /*
+ * TODO code is fragile - use of SequenceIdMatcher depends on fetched
+ * sequences having a name starting Source|Accession
+ * which happens to be true for Uniprot,PDB,EMBL but not Pfam,Rfam,Ensembl
+ */
+ final SequenceI pep1 = new Sequence("UNIPROT|P15056", "MAAL");
+ final SequenceI pep2 = new Sequence("UNIPROT|H7C5K3", "QALF");
+
+ /*
+ * argument false suppresses adding DAS sources
+ * todo: define an interface type SequenceFetcherI and mock that
+ */
+ SequenceFetcher mockFetcher = new SequenceFetcher(false)
+ {
+ @Override
+ public boolean isFetchable(String source)
+ {
+ return true;
+ }
+
+ @Override
+ public SequenceI[] getSequences(List<DBRefEntry> refs, boolean dna)
+ {
+ return new SequenceI[] { pep1, pep2 };
+ }
+ };
+ SequenceFetcherFactory.setSequenceFetcher(mockFetcher);
+
+ /*
+ * find UNIPROT xrefs for gene and transcripts
+ * verify that
+ * - the two proteins are retrieved but not duplicated
+ * - mappings are built from transcript (CDS) to proteins
+ * - no mappings from gene to proteins
+ */
+ SequenceI[] seqs = new SequenceI[] { gene, braf001, braf002 };
+ AlignmentI al = new Alignment(seqs);
+ Alignment xrefs = new CrossRef(seqs, al).findXrefSequences("UNIPROT",
+ true);
+ assertEquals(2, xrefs.getHeight());
+ assertSame(pep1, xrefs.getSequenceAt(0));
+ assertSame(pep2, xrefs.getSequenceAt(1));
+ }
+
+ /**
+ * <pre>
+ * Test that emulates this (real but simplified) case:
+ * Alignment: DBrefs
+ * UNIPROT|P0CE19 EMBL|J03321, EMBL|X06707, EMBL|M19487
+ * UNIPROT|P0CE20 EMBL|J03321, EMBL|X06707, EMBL|X07547
+ * Find cross-references for EMBL. These are mocked here as
+ * EMBL|J03321 with mappings to P0CE18, P0CE19, P0CE20
+ * EMBL|X06707 with mappings to P0CE17, P0CE19, P0CE20
+ * EMBL|M19487 with mappings to P0CE19, Q46432
+ * EMBL|X07547 with mappings to P0CE20, B0BCM4
+ * EMBL sequences are first 'fetched' (mocked here) for P0CE19.
+ * The 3 EMBL sequences are added to the alignment dataset.
+ * Their dbrefs to Uniprot products P0CE19 and P0CE20 should be matched in the
+ * alignment dataset and updated to reference the original Uniprot sequences.
+ * For the second Uniprot sequence, the J03321 and X06707 xrefs should be
+ * resolved from the dataset, and only the X07547 dbref fetched.
+ * So the end state to verify is:
+ * - 4 cross-ref sequences returned: J03321, X06707, M19487, X07547
+ * - P0CE19/20 dbrefs to EMBL sequences now have mappings
+ * - J03321 dbrefs to P0CE19/20 mapped to original Uniprot sequences
+ * - X06707 dbrefs to P0CE19/20 mapped to original Uniprot sequences
+ * </pre>
+ */
+ @Test(groups = { "Functional" })
+ public void testFindXrefSequences_uniprotEmblManyToMany()
+ {
+ /*
+ * Uniprot sequences, both with xrefs to EMBL|J03321
+ * and EMBL|X07547
+ */
+ SequenceI p0ce19 = new Sequence("UNIPROT|P0CE19", "KPFG");
+ p0ce19.addDBRef(new DBRefEntry("EMBL", "0", "J03321"));
+ p0ce19.addDBRef(new DBRefEntry("EMBL", "0", "X06707"));
+ p0ce19.addDBRef(new DBRefEntry("EMBL", "0", "M19487"));
+ SequenceI p0ce20 = new Sequence("UNIPROT|P0CE20", "PFGK");
+ p0ce20.addDBRef(new DBRefEntry("EMBL", "0", "J03321"));
+ p0ce20.addDBRef(new DBRefEntry("EMBL", "0", "X06707"));
+ p0ce20.addDBRef(new DBRefEntry("EMBL", "0", "X07547"));
+
+ /*
+ * EMBL sequences to be 'fetched', complete with dbrefs and mappings
+ * to their protein products (CDS location and translations are provided
+ * in EMBL XML); these should be matched to, and replaced with,
+ * the corresponding uniprot sequences after fetching
+ */
+
+ /*
+ * J03321 with mappings to P0CE19 and P0CE20
+ */
+ final SequenceI j03321 = new Sequence("EMBL|J03321", "AAACCCTTTGGGAAAA");
+ DBRefEntry dbref1 = new DBRefEntry("UNIPROT", "0", "P0CE19");
+ MapList mapList = new MapList(new int[] { 1, 12 }, new int[] { 1, 4 },
+ 3, 1);
+ Mapping map = new Mapping(new Sequence("UNIPROT|P0CE19", "KPFG"),
+ mapList);
+ // add a dbref to the mapped to sequence - should get copied to p0ce19
+ map.getTo().addDBRef(new DBRefEntry("PIR", "0", "S01875"));
+ dbref1.setMap(map);
+ j03321.addDBRef(dbref1);
+ DBRefEntry dbref2 = new DBRefEntry("UNIPROT", "0", "P0CE20");
+ mapList = new MapList(new int[] { 4, 15 }, new int[] { 2, 5 }, 3, 1);
+ dbref2.setMap(new Mapping(new Sequence("UNIPROT|P0CE20", "PFGK"),
+ new MapList(mapList)));
+ j03321.addDBRef(dbref2);
+
+ /*
+ * X06707 with mappings to P0CE19 and P0CE20
+ */
+ final SequenceI x06707 = new Sequence("EMBL|X06707", "atgAAACCCTTTGGG");
+ DBRefEntry dbref3 = new DBRefEntry("UNIPROT", "0", "P0CE19");
+ MapList map2 = new MapList(new int[] { 4, 15 }, new int[] { 1, 4 }, 3,
+ 1);
+ dbref3.setMap(new Mapping(new Sequence("UNIPROT|P0CE19", "KPFG"), map2));
+ x06707.addDBRef(dbref3);
+ DBRefEntry dbref4 = new DBRefEntry("UNIPROT", "0", "P0CE20");
+ MapList map3 = new MapList(new int[] { 4, 15 }, new int[] { 1, 4 }, 3,
+ 1);
+ dbref4.setMap(new Mapping(new Sequence("UNIPROT|P0CE20", "PFGK"), map3));
+ x06707.addDBRef(dbref4);
+
+ /*
+ * M19487 with mapping to P0CE19 and Q46432
+ */
+ final SequenceI m19487 = new Sequence("EMBL|M19487", "AAACCCTTTGGG");
+ DBRefEntry dbref5 = new DBRefEntry("UNIPROT", "0", "P0CE19");
+ dbref5.setMap(new Mapping(new Sequence("UNIPROT|P0CE19", "KPFG"),
+ new MapList(mapList)));
+ m19487.addDBRef(dbref5);
+ DBRefEntry dbref6 = new DBRefEntry("UNIPROT", "0", "Q46432");
+ dbref6.setMap(new Mapping(new Sequence("UNIPROT|Q46432", "KPFG"),
+ new MapList(mapList)));
+ m19487.addDBRef(dbref6);
+
+ /*
+ * X07547 with mapping to P0CE20 and B0BCM4
+ */
+ final SequenceI x07547 = new Sequence("EMBL|X07547", "cccAAACCCTTTGGG");
+ DBRefEntry dbref7 = new DBRefEntry("UNIPROT", "0", "P0CE20");
+ dbref7.setMap(new Mapping(new Sequence("UNIPROT|P0CE19", "KPFG"),
+ new MapList(map2)));
+ x07547.addDBRef(dbref7);
+ DBRefEntry dbref8 = new DBRefEntry("UNIPROT", "0", "B0BCM4");
+ dbref8.setMap(new Mapping(new Sequence("UNIPROT|B0BCM4", "KPFG"),
+ new MapList(map2)));
+ x07547.addDBRef(dbref8);
+
+ /*
+ * mock sequence fetcher to 'return' the EMBL sequences
+ * TODO: Mockito would allow .thenReturn().thenReturn() here,
+ * and also capture and verification of the parameters
+ * passed in calls to getSequences() - important to verify that
+ * duplicate sequence fetches are not requested
+ */
+ SequenceFetcher mockFetcher = new SequenceFetcher(false)
+ {
+ int call = 0;
+
+ @Override
+ public boolean isFetchable(String source)
+ {
+ return true;
+ }
+
+ @Override
+ public SequenceI[] getSequences(List<DBRefEntry> refs, boolean dna)
+ {
+ call++;
+ if (call == 1)
+ {
+ assertEquals("Expected 3 embl seqs in first fetch", 3,
+ refs.size());
+ return new SequenceI[] { j03321, x06707, m19487 };
+ }
+ else
+ {
+ assertEquals("Expected 1 embl seq in second fetch", 1,
+ refs.size());
+ return new SequenceI[] { x07547 };
+ }
+ }
+ };
+ SequenceFetcherFactory.setSequenceFetcher(mockFetcher);
+
+ /*
+ * find EMBL xrefs for Uniprot seqs and verify that
+ * - the EMBL xref'd sequences are retrieved without duplicates
+ * - mappings are added to the Uniprot dbrefs
+ * - mappings in the EMBL-to-Uniprot dbrefs are updated to the
+ * alignment sequences
+ * - dbrefs on the EMBL sequences are added to the original dbrefs
+ */
+ SequenceI[] seqs = new SequenceI[] { p0ce19, p0ce20 };
+ AlignmentI al = new Alignment(seqs);
+ Alignment xrefs = new CrossRef(seqs, al).findXrefSequences("EMBL",
+ false);
+
+ /*
+ * verify retrieved sequences
+ */
+ assertNotNull(xrefs);
+ assertEquals(4, xrefs.getHeight());
+ assertSame(j03321, xrefs.getSequenceAt(0));
+ assertSame(x06707, xrefs.getSequenceAt(1));
+ assertSame(m19487, xrefs.getSequenceAt(2));
+ assertSame(x07547, xrefs.getSequenceAt(3));
+
+ /*
+ * verify mappings added to Uniprot-to-EMBL dbrefs
+ */
+ Mapping mapping = p0ce19.getDBRefs()[0].getMap();
+ assertSame(j03321, mapping.getTo());
+ mapping = p0ce19.getDBRefs()[1].getMap();
+ assertSame(x06707, mapping.getTo());
+ mapping = p0ce20.getDBRefs()[0].getMap();
+ assertSame(j03321, mapping.getTo());
+ mapping = p0ce20.getDBRefs()[1].getMap();
+ assertSame(x06707, mapping.getTo());
+
+ /*
+ * verify dbrefs on EMBL are mapped to alignment seqs
+ */
+ assertSame(p0ce19, j03321.getDBRefs()[0].getMap().getTo());
+ assertSame(p0ce20, j03321.getDBRefs()[1].getMap().getTo());
+ assertSame(p0ce19, x06707.getDBRefs()[0].getMap().getTo());
+ assertSame(p0ce20, x06707.getDBRefs()[1].getMap().getTo());
+
+ /*
+ * verify new dbref on EMBL dbref mapping is copied to the
+ * original Uniprot sequence
+ */
+ assertEquals(4, p0ce19.getDBRefs().length);
+ assertEquals("PIR", p0ce19.getDBRefs()[3].getSource());
+ assertEquals("S01875", p0ce19.getDBRefs()[3].getAccessionId());
+ }
+
+ @Test(groups = "Functional")
+ public void testSameSequence()
+ {
+ assertTrue(CrossRef.sameSequence(null, null));
+ SequenceI seq1 = new Sequence("seq1", "ABCDEF");
+ assertFalse(CrossRef.sameSequence(seq1, null));
+ assertFalse(CrossRef.sameSequence(null, seq1));
+ assertTrue(CrossRef.sameSequence(seq1, new Sequence("seq2", "ABCDEF")));
+ assertTrue(CrossRef.sameSequence(seq1, new Sequence("seq2", "abcdef")));
+ assertFalse(CrossRef
+ .sameSequence(seq1, new Sequence("seq2", "ABCDE-F")));
+ assertFalse(CrossRef.sameSequence(seq1, new Sequence("seq2", "BCDEF")));
+ }
}
* case insensitive matching
*/
assertTrue(testee.equals("a12345"));
+
+ testee = sequenceIdMatcher.new SeqIdName("UNIPROT|A12345");
+ assertFalse(testee.equals("A12345"));
+ assertFalse(testee.equals("UNIPROT|B98765"));
+ assertFalse(testee.equals("UNIPROT|"));
+ assertTrue(testee.equals("UNIPROT"));
}
}
assertArrayEquals(new int[] { 2, 2 },
acf.getMappedRegion(seq2, seq1, 6));
}
+
+ /**
+ * Tests for addMap. See also tests for MapList.addMapList
+ */
+ @Test(groups = { "Functional" })
+ public void testAddMap()
+ {
+ final Sequence seq1 = new Sequence("Seq1", "c-G-TA-gC-gT-T");
+ seq1.createDatasetSequence();
+ final Sequence aseq1 = new Sequence("Seq1", "-V-L");
+ aseq1.createDatasetSequence();
+
+ AlignedCodonFrame acf = new AlignedCodonFrame();
+ MapList map = new MapList(new int[] { 2, 4, 6, 6, 8, 9 }, new int[] {
+ 1, 2 }, 3, 1);
+ acf.addMap(seq1.getDatasetSequence(), aseq1.getDatasetSequence(), map);
+ assertEquals(1, acf.getMappingsFromSequence(seq1).size());
+ Mapping before = acf.getMappingsFromSequence(seq1).get(0);
+
+ /*
+ * add the same map again, verify it doesn't get duplicated
+ */
+ acf.addMap(seq1.getDatasetSequence(), aseq1.getDatasetSequence(), map);
+ assertEquals(1, acf.getMappingsFromSequence(seq1).size());
+ assertSame(before, acf.getMappingsFromSequence(seq1).get(0));
+ }
}
*
* @throws IOException
*/
- @Test(groups = { "Functional" }, enabled = false)
+ @Test(groups = { "Functional" }, enabled = true)
// TODO review / update this test after redesign of alignAs method
public void testAlignAs_cdnaAsProtein() throws IOException
{
*
* @throws IOException
*/
- @Test(groups = { "Functional" }, enabled = false)
+ @Test(groups = { "Functional" }, enabled = true)
// TODO review / update this test after redesign of alignAs method
public void testAlignAs_cdnaAsProtein_singleSequence() throws IOException
{
acf.addMap(seqFrom, seqTo, ml);
}
+ /*
+ * not sure whether mappings 'belong' or protein or nucleotide
+ * alignment, so adding to both ;~)
+ */
alFrom.addCodonFrame(acf);
+ alTo.addCodonFrame(acf);
}
/**
// TODO should the copy constructor copy the dataset?
// or make a new one referring to the same dataset sequences??
assertNull(copy.getDataset());
+ // TODO test metadata is copied when AlignmentI is a dataset
+
// assertArrayEquals(copy.getDataset().getSequencesArray(), protein
// .getDataset().getSequencesArray());
}
// TODO promote this method to AlignmentI
((Alignment) protein).createDatasetAlignment();
- // TODO this method should return AlignmentI not Alignment !!
- Alignment ds = protein.getDataset();
+ AlignmentI ds = protein.getDataset();
// side-effect: dataset created on second sequence
assertNotNull(protein.getSequenceAt(1).getDatasetSequence());
package jalview.datamodel;
import static org.testng.AssertJUnit.assertEquals;
+import static org.testng.AssertJUnit.assertSame;
import jalview.util.MapList;
m = new Mapping(seq, fk);
assertEquals("[ [1, 6] [8, 13] ] 3:1 to [ [4, 7] ] Seq1", m.toString());
}
+
+ @Test(groups = { "Functional" })
+ public void testCopyConstructor()
+ {
+ MapList ml = new MapList(new int[] { 1, 6, 8, 13 }, new int[] { 4, 7 },
+ 3, 1);
+ SequenceI seq = new Sequence("seq1", "agtacg");
+ Mapping m = new Mapping(seq, ml);
+ m.setMappedFromId("abc");
+ Mapping copy = new Mapping(m);
+ assertEquals("abc", copy.getMappedFromId());
+ assertEquals(ml, copy.getMap());
+ assertSame(seq, copy.getTo());
+ }
}
assertEquals("Gap interval 2 end wrong", 8, gapInt.get(1)[1]);
}
+ @Test(groups = ("Functional"))
+ public void testIsProtein()
+ {
+ // test Protein
+ assertTrue(new Sequence("prot","ASDFASDFASDF").isProtein());
+ // test DNA
+ assertFalse(new Sequence("prot","ACGTACGTACGT").isProtein());
+ // test RNA
+ SequenceI sq = new Sequence("prot","ACGUACGUACGU");
+ assertFalse(sq.isProtein());
+ // change sequence, should trigger an update of cached result
+ sq.setSequence("ASDFASDFADSF");
+ assertTrue(sq.isProtein());
+ /*
+ * in situ change of sequence doesn't change hashcode :-O
+ * (sequence should not expose internal implementation)
+ */
+ for (int i = 0; i < sq.getSequence().length; i++)
+ {
+ sq.getSequence()[i] = "acgtu".charAt(i % 5);
+ }
+ assertTrue(sq.isProtein()); // but it isn't
+ }
+
@Test(groups = { "Functional" })
public void testGetAnnotation()
{
}
/**
+ * test createDatasetSequence behaves to doc
+ */
+ @Test(groups = { "Functional" })
+ public void testCreateDatasetSequence()
+ {
+ SequenceI sq = new Sequence("my","ASDASD");
+ assertNull(sq.getDatasetSequence());
+ SequenceI rds = sq.createDatasetSequence();
+ assertNotNull(rds);
+ assertNull(rds.getDatasetSequence());
+ assertEquals(sq.getDatasetSequence(), rds);
+ }
+
+ /**
* Test for deriveSequence applied to a sequence with a dataset
*/
@Test(groups = { "Functional" })
package jalview.datamodel.xdb.embl;
import static org.testng.AssertJUnit.assertEquals;
+import static org.testng.AssertJUnit.assertNull;
import static org.testng.AssertJUnit.assertSame;
import jalview.analysis.SequenceIdMatcher;
import jalview.datamodel.DBRefEntry;
-import jalview.datamodel.Sequence;
+import jalview.datamodel.DBRefSource;
import jalview.datamodel.SequenceI;
+import jalview.util.MapList;
import java.util.ArrayList;
import java.util.Arrays;
EmblEntry testee = new EmblEntry();
/*
- * Make a (CDS) Feature with 4 locations
+ * Make a (CDS) Feature with 5 locations
*/
EmblFeature cds = new EmblFeature();
cds.setLocation("join(10..20,complement(30..40),50..60,70..80,complement(110..120))");
public void testParseCodingFeature()
{
// not the whole sequence but enough for this test...
- SequenceI dna = new Sequence("J03321", "GGATCCGTAAGTTAGACGAAATT");
List<SequenceI> peptides = new ArrayList<SequenceI>();
SequenceIdMatcher matcher = new SequenceIdMatcher(peptides);
EmblFile ef = EmblTestHelper.getEmblFile();
+ assertEquals(1, ef.getEntries().size());
+ EmblEntry testee = ef.getEntries().get(0);
+ String sourceDb = "EMBL";
+ SequenceI dna = testee.makeSequence(sourceDb);
/*
- * parse two CDS features, one with two Uniprot cross-refs,
- * the other with one
+ * parse three CDS features, with two/one/no Uniprot cross-refs
*/
- EmblEntry testee = new EmblEntry();
for (EmblFeature feature : ef.getEntries().get(0).getFeatures())
{
if ("CDS".equals(feature.getName()))
{
- testee.parseCodingFeature(feature, "EMBL", dna, peptides, matcher);
+ testee.parseCodingFeature(feature, sourceDb, dna, peptides, matcher);
}
}
/*
* peptides should now have five entries:
* EMBL product and two Uniprot accessions for the first CDS / translation
- * EMBL product and one Uniprot accession for the second CDS / translation
+ * EMBL product and one Uniprot accession for the second CDS / "
+ * EMBL product only for the third
*/
- assertEquals(5, peptides.size());
+ assertEquals(6, peptides.size());
assertEquals("CAA30420.1", peptides.get(0).getName());
assertEquals("MLCF", peptides.get(0).getSequenceAsString());
assertEquals("UNIPROT|B0BCM4", peptides.get(1).getName());
assertEquals("MSSS", peptides.get(3).getSequenceAsString());
assertEquals("UNIPROT|B0BCM3", peptides.get(4).getName());
assertEquals("MSSS", peptides.get(4).getSequenceAsString());
+ assertEquals("CAA12345.6", peptides.get(5).getName());
+ assertEquals("MSS", peptides.get(5).getSequenceAsString());
/*
- * verify dna sequence has dbrefs with mappings to the peptide 'products'
+ * verify dna sequence has dbrefs with CDS mappings to the peptide 'products'
*/
+ MapList cds1Map = new MapList(new int[] { 57, 46 }, new int[] { 1, 4 },
+ 3, 1);
+ MapList cds2Map = new MapList(new int[] { 4, 15 }, new int[] { 1, 4 },
+ 3, 1);
+ MapList cds3Map = new MapList(new int[] { 4, 6, 10, 15 }, new int[] {
+ 1, 3 }, 3, 1);
DBRefEntry[] dbrefs = dna.getDBRefs();
- assertEquals(3, dbrefs.length);
+ assertEquals(4, dbrefs.length);
DBRefEntry dbRefEntry = dbrefs[0];
assertEquals("UNIPROT", dbRefEntry.getSource());
assertEquals("B0BCM4", dbRefEntry.getAccessionId());
assertSame(peptides.get(1), dbRefEntry.getMap().getTo());
- List<int[]> fromRanges = dbRefEntry.getMap().getMap().getFromRanges();
- assertEquals(1, fromRanges.size());
- assertEquals(57, fromRanges.get(0)[0]);
- assertEquals(46, fromRanges.get(0)[1]);
- List<int[]> toRanges = dbRefEntry.getMap().getMap().getToRanges();
- assertEquals(1, toRanges.size());
- assertEquals(1, toRanges.get(0)[0]);
- assertEquals(4, toRanges.get(0)[1]);
+ assertEquals(cds1Map, dbRefEntry.getMap().getMap());
dbRefEntry = dbrefs[1];
assertEquals("UNIPROT", dbRefEntry.getSource());
assertEquals("P0CE20", dbRefEntry.getAccessionId());
assertSame(peptides.get(2), dbRefEntry.getMap().getTo());
- fromRanges = dbRefEntry.getMap().getMap().getFromRanges();
- assertEquals(1, fromRanges.size());
- assertEquals(57, fromRanges.get(0)[0]);
- assertEquals(46, fromRanges.get(0)[1]);
- toRanges = dbRefEntry.getMap().getMap().getToRanges();
- assertEquals(1, toRanges.size());
- assertEquals(1, toRanges.get(0)[0]);
- assertEquals(4, toRanges.get(0)[1]);
+ assertEquals(cds1Map, dbRefEntry.getMap().getMap());
dbRefEntry = dbrefs[2];
assertEquals("UNIPROT", dbRefEntry.getSource());
assertEquals("B0BCM3", dbRefEntry.getAccessionId());
assertSame(peptides.get(4), dbRefEntry.getMap().getTo());
- fromRanges = dbRefEntry.getMap().getMap().getFromRanges();
- assertEquals(1, fromRanges.size());
- assertEquals(4, fromRanges.get(0)[0]);
- assertEquals(15, fromRanges.get(0)[1]);
- toRanges = dbRefEntry.getMap().getMap().getToRanges();
- assertEquals(1, toRanges.size());
- assertEquals(1, toRanges.get(0)[0]);
- assertEquals(4, toRanges.get(0)[1]);
+ assertEquals(cds2Map, dbRefEntry.getMap().getMap());
+
+ dbRefEntry = dbrefs[3];
+ assertEquals("EMBLCDSPROTEIN", dbRefEntry.getSource());
+ assertEquals("CAA12345.6", dbRefEntry.getAccessionId());
+ assertSame(peptides.get(5), dbRefEntry.getMap().getTo());
+ assertEquals(cds3Map, dbRefEntry.getMap().getMap());
+
+ /*
+ * verify peptides have dbrefs
+ * - to EMBL sequence (with inverse 1:3 cds mapping)
+ * - to EMBLCDS (with 1:3 mapping)
+ * - direct (no mapping) to other protein accessions
+ */
+ MapList proteinToCdsMap1 = new MapList(new int[] { 1, 4 }, new int[] {
+ 1, 12 }, 1, 3);
+ MapList proteinToCdsMap2 = new MapList(new int[] { 1, 3 }, new int[] {
+ 1, 9 }, 1, 3);
+
+ // dbrefs for first CDS EMBL product CAA30420.1
+ dbrefs = peptides.get(0).getDBRefs();
+ assertEquals(5, dbrefs.length);
+ assertEquals(DBRefSource.EMBL, dbrefs[0].getSource());
+ assertEquals("CAA30420.1", dbrefs[0].getAccessionId());
+ assertEquals(cds1Map.getInverse(), dbrefs[0].getMap().getMap());
+ assertEquals(DBRefSource.EMBLCDS, dbrefs[1].getSource());
+ assertEquals("CAA30420.1", dbrefs[1].getAccessionId());
+ assertEquals(proteinToCdsMap1, dbrefs[1].getMap().getMap());
+ assertEquals(DBRefSource.EMBLCDSProduct, dbrefs[2].getSource());
+ assertEquals("CAA30420.1", dbrefs[2].getAccessionId());
+ assertNull(dbrefs[2].getMap());
+ assertEquals(new DBRefEntry(DBRefSource.UNIPROT, "2.1", "B0BCM4"),
+ dbrefs[3]);
+ assertNull(dbrefs[3].getMap());
+ assertEquals(new DBRefEntry(DBRefSource.UNIPROT, "0", "P0CE20"),
+ dbrefs[4]);
+ assertNull(dbrefs[4].getMap());
+
+ // dbrefs for first CDS first Uniprot xref
+ dbrefs = peptides.get(1).getDBRefs();
+ assertEquals(2, dbrefs.length);
+ assertEquals(new DBRefEntry(DBRefSource.UNIPROT, "2.1", "B0BCM4"),
+ dbrefs[0]);
+ assertNull(dbrefs[0].getMap());
+ assertEquals(DBRefSource.EMBL, dbrefs[1].getSource());
+ assertEquals("X07547", dbrefs[1].getAccessionId());
+ assertEquals(cds1Map.getInverse(), dbrefs[1].getMap().getMap());
+
+ // dbrefs for first CDS second Uniprot xref
+ dbrefs = peptides.get(2).getDBRefs();
+ assertEquals(2, dbrefs.length);
+ assertEquals(new DBRefEntry(DBRefSource.UNIPROT, "0", "P0CE20"),
+ dbrefs[0]);
+ assertNull(dbrefs[0].getMap());
+ assertEquals(DBRefSource.EMBL, dbrefs[1].getSource());
+ assertEquals("X07547", dbrefs[1].getAccessionId());
+ assertEquals(cds1Map.getInverse(), dbrefs[1].getMap().getMap());
+
+ // dbrefs for second CDS EMBL product CAA30421.1
+ dbrefs = peptides.get(3).getDBRefs();
+ assertEquals(4, dbrefs.length);
+ assertEquals(DBRefSource.EMBL, dbrefs[0].getSource());
+ assertEquals("CAA30421.1", dbrefs[0].getAccessionId());
+ assertEquals(cds2Map.getInverse(), dbrefs[0].getMap().getMap());
+ assertEquals(DBRefSource.EMBLCDS, dbrefs[1].getSource());
+ assertEquals("CAA30421.1", dbrefs[1].getAccessionId());
+ assertEquals(proteinToCdsMap1, dbrefs[1].getMap().getMap());
+ assertEquals(DBRefSource.EMBLCDSProduct, dbrefs[2].getSource());
+ assertEquals("CAA30421.1", dbrefs[2].getAccessionId());
+ assertNull(dbrefs[2].getMap());
+ assertEquals(new DBRefEntry(DBRefSource.UNIPROT, "0", "B0BCM3"),
+ dbrefs[3]);
+ assertNull(dbrefs[3].getMap());
+
+ // dbrefs for second CDS second Uniprot xref
+ dbrefs = peptides.get(4).getDBRefs();
+ assertEquals(2, dbrefs.length);
+ assertEquals(new DBRefEntry(DBRefSource.UNIPROT, "0", "B0BCM3"),
+ dbrefs[0]);
+ assertNull(dbrefs[0].getMap());
+ assertEquals(DBRefSource.EMBL, dbrefs[1].getSource());
+ assertEquals("X07547", dbrefs[1].getAccessionId());
+ assertEquals(cds2Map.getInverse(), dbrefs[1].getMap().getMap());
+
+ // dbrefs for third CDS inferred EMBL product CAA12345.6
+ dbrefs = peptides.get(5).getDBRefs();
+ assertEquals(3, dbrefs.length);
+ assertEquals(DBRefSource.EMBL, dbrefs[0].getSource());
+ assertEquals("CAA12345.6", dbrefs[0].getAccessionId());
+ assertEquals(cds3Map.getInverse(), dbrefs[0].getMap().getMap());
+ assertEquals(DBRefSource.EMBLCDS, dbrefs[1].getSource());
+ assertEquals("CAA12345.6", dbrefs[1].getAccessionId());
+ assertEquals(proteinToCdsMap2, dbrefs[1].getMap().getMap());
+ assertEquals(DBRefSource.EMBLCDSProduct, dbrefs[2].getSource());
+ assertEquals("CAA12345.6", dbrefs[2].getAccessionId());
+ assertNull(dbrefs[2].getMap());
+ }
+
+ @Test(groups = "Functional")
+ public void testAdjustForProteinLength()
+ {
+ int[] exons = new int[] { 11, 15, 21, 25, 31, 38 }; // 18 bp
+
+ // exact length match:
+ assertSame(exons, EmblEntry.adjustForProteinLength(6, exons));
+
+ // match if we assume exons include stop codon not in protein:
+ assertSame(exons, EmblEntry.adjustForProteinLength(5, exons));
+
+ // truncate last exon by 6bp
+ int[] truncated = EmblEntry.adjustForProteinLength(4, exons);
+ assertEquals("[11, 15, 21, 25, 31, 32]",
+ Arrays.toString(truncated));
+
+ // remove last exon and truncate preceding by 1bp
+ truncated = EmblEntry.adjustForProteinLength(3, exons);
+ assertEquals("[11, 15, 21, 24]", Arrays.toString(truncated));
+
+ // exact removal of exon case:
+ exons = new int[] { 11, 15, 21, 27, 33, 38 }; // 18 bp
+ truncated = EmblEntry.adjustForProteinLength(4, exons);
+ assertEquals("[11, 15, 21, 27]", Arrays.toString(truncated));
+
+ // what if exons are too short for protein?
+ truncated = EmblEntry.adjustForProteinLength(7, exons);
+ assertSame(exons, truncated);
}
}
assertEquals("0", dbref.getVersion());
/*
- * two sequence features for CDS
+ * three sequence features for CDS
*/
- assertEquals(2, entry.getFeatures().size());
+ assertEquals(3, entry.getFeatures().size());
/*
* first CDS
*/
assertEquals("MSSS", q.getValues()[0]);
/*
+ * third CDS
+ */
+ ef = entry.getFeatures().get(2);
+ assertEquals("CDS", ef.getName());
+ assertEquals("join(4..6,10..15)", ef.getLocation());
+ assertNull(ef.getDbRefs());
+ assertEquals(2, ef.getQualifiers().size());
+ q = ef.getQualifiers().get(0);
+ assertEquals("protein_id", q.getName());
+ assertEquals(1, q.getValues().length);
+ assertEquals("CAA12345.6", q.getValues()[0]);
+ q = ef.getQualifiers().get(1);
+ assertEquals("translation", q.getName());
+ assertEquals(1, q.getValues().length);
+ assertEquals("MSS", q.getValues()[0]);
+
+ /*
* Sequence - verify newline not converted to space (JAL-2029)
*/
EmblSequence seq = entry.getSequence();
+ "<qualifier name=\"translation\"><value>MSSS</value></qualifier>"
+ "</feature>"
/*
+ * third CDS is made up - has no xref - code should synthesize
+ * one to an assumed EMBLCDSPROTEIN accession
+ */
+ + "<feature name=\"CDS\" location=\"join(4..6,10..15)\">"
+ + "<qualifier name=\"protein_id\"><value>CAA12345.6</value></qualifier>"
+ + "<qualifier name=\"translation\"><value>MSS</value></qualifier>"
+ + "</feature>"
+ /*
* sequence (modified for test purposes)
* emulates EMBL XML 1.2 which splits sequence data every 60 characters
* see EmblSequence.setSequence
import jalview.io.FileLoader;
import jalview.io.FormatAdapter;
import jalview.structure.StructureSelectionManager;
+import jalview.util.MapList;
import java.util.ArrayList;
import java.util.List;
AlignFrame af1 = new FileLoader().LoadFileWaitTillLoaded(
">Seq1\nCAGT\n", FormatAdapter.PASTE);
+ SequenceI s1 = af1.getViewport().getAlignment().getSequenceAt(0);
AlignedCodonFrame acf1 = new AlignedCodonFrame();
+ acf1.addMap(s1, s1, new MapList(new int[] { 1, 4 }, new int[] { 1, 4 },
+ 1, 1));
AlignedCodonFrame acf2 = new AlignedCodonFrame();
+ acf2.addMap(s1, s1, new MapList(new int[] { 1, 4 }, new int[] { 4, 1 },
+ 1, 1));
List<AlignedCodonFrame> mappings = new ArrayList<AlignedCodonFrame>();
mappings.add(acf1);
">Seq1\nRSVQ\n", FormatAdapter.PASTE);
AlignFrame af2 = new FileLoader().LoadFileWaitTillLoaded(
">Seq2\nDGEL\n", FormatAdapter.PASTE);
-
+ SequenceI cs1 = new Sequence("cseq1", "CCCGGGTTTAAA");
+ SequenceI cs2 = new Sequence("cseq2", "CTTGAGTCTAGA");
+ SequenceI s1 = af1.getViewport().getAlignment().getSequenceAt(0);
+ SequenceI s2 = af2.getViewport().getAlignment().getSequenceAt(0);
+ // need to be distinct
AlignedCodonFrame acf1 = new AlignedCodonFrame();
+ acf1.addMap(cs1, s1, new MapList(new int[] { 1, 4 },
+ new int[] { 1, 12 }, 1, 3));
AlignedCodonFrame acf2 = new AlignedCodonFrame();
+ acf2.addMap(cs2, s2, new MapList(new int[] { 1, 4 },
+ new int[] { 1, 12 }, 1, 3));
AlignedCodonFrame acf3 = new AlignedCodonFrame();
+ acf3.addMap(cs2, cs2, new MapList(new int[] { 1, 12 }, new int[] { 1,
+ 12 }, 1, 1));
List<AlignedCodonFrame> mappings1 = new ArrayList<AlignedCodonFrame>();
mappings1.add(acf1);
">Seq1\nRSVQ\n", FormatAdapter.PASTE);
AlignFrame af2 = new FileLoader().LoadFileWaitTillLoaded(
">Seq2\nDGEL\n", FormatAdapter.PASTE);
-
+ SequenceI cs1 = new Sequence("cseq1", "CCCGGGTTTAAA");
+ SequenceI cs2 = new Sequence("cseq2", "CTTGAGTCTAGA");
+ SequenceI s1 = af1.getViewport().getAlignment().getSequenceAt(0);
+ SequenceI s2 = af2.getViewport().getAlignment().getSequenceAt(0);
+ // need to be distinct
AlignedCodonFrame acf1 = new AlignedCodonFrame();
+ acf1.addMap(cs1, s1, new MapList(new int[] { 1, 4 },
+ new int[] { 1, 12 }, 1, 3));
AlignedCodonFrame acf2 = new AlignedCodonFrame();
+ acf2.addMap(cs2, s2, new MapList(new int[] { 1, 4 },
+ new int[] { 1, 12 }, 1, 3));
AlignedCodonFrame acf3 = new AlignedCodonFrame();
+ acf3.addMap(cs2, cs2, new MapList(new int[] { 1, 12 }, new int[] { 1,
+ 12 }, 1, 1));
List<AlignedCodonFrame> mappings1 = new ArrayList<AlignedCodonFrame>();
mappings1.add(acf1);
public class UserColourSchemeTest
{
- @Test(groups = "functional")
+ @Test(groups = "Functional")
public void testGetColourFromString()
{
/*
import jalview.datamodel.SequenceI;
import jalview.io.FormatAdapter;
import jalview.io.StructureFile;
+import jalview.util.MapList;
import java.util.ArrayList;
import java.util.List;
public void testRegisterMapping()
{
AlignedCodonFrame acf1 = new AlignedCodonFrame();
+ acf1.addMap(new Sequence("s1", "ttt"), new Sequence("p1", "p"),
+ new MapList(new int[] { 1, 3 }, new int[] { 1, 1 }, 1, 1));
AlignedCodonFrame acf2 = new AlignedCodonFrame();
+ acf2.addMap(new Sequence("s2", "ttt"), new Sequence("p2", "p"),
+ new MapList(new int[] { 1, 3 }, new int[] { 1, 1 }, 1, 1));
ssm.registerMapping(acf1);
assertEquals(1, ssm.getSequenceMappings().size());
public void testRegisterMappings()
{
AlignedCodonFrame acf1 = new AlignedCodonFrame();
+ acf1.addMap(new Sequence("s1", "ttt"), new Sequence("p1", "p"),
+ new MapList(new int[] { 1, 3 }, new int[] { 1, 1 }, 1, 1));
AlignedCodonFrame acf2 = new AlignedCodonFrame();
+ acf2.addMap(new Sequence("s2", "ttt"), new Sequence("p2", "p"),
+ new MapList(new int[] { 1, 3 }, new int[] { 1, 1 }, 1, 1));
AlignedCodonFrame acf3 = new AlignedCodonFrame();
+ acf3.addMap(new Sequence("s3", "ttt"), new Sequence("p3", "p"),
+ new MapList(new int[] { 1, 3 }, new int[] { 1, 1 }, 1, 1));
List<AlignedCodonFrame> set1 = new ArrayList<AlignedCodonFrame>();
set1.add(acf1);
import jalview.datamodel.Sequence;
import jalview.datamodel.SequenceI;
+import java.util.List;
+
import org.testng.annotations.Test;
public class DBRefUtilsTest
ref5.setMap(new Mapping(new MapList(new int[] { 1, 1 }, new int[] { 1,
1 }, 1, 1)));
- DBRefEntry[] matches = DBRefUtils.searchRefs(new DBRefEntry[] { ref1,
+ List<DBRefEntry> matches = DBRefUtils.searchRefs(new DBRefEntry[] {
+ ref1,
ref2, ref3, ref4, ref5 }, target);
- assertEquals(3, matches.length);
- assertSame(ref1, matches[0]);
- assertSame(ref2, matches[1]);
- assertSame(ref5, matches[2]);
+ assertEquals(3, matches.size());
+ assertSame(ref1, matches.get(0));
+ assertSame(ref2, matches.get(1));
+ assertSame(ref5, matches.get(2));
}
/**
new int[] { 1, 1 }, 2, 2));
ref3.setMap(map3);
- DBRefEntry[] matches = DBRefUtils.searchRefs(new DBRefEntry[] { ref1,
+ List<DBRefEntry> matches = DBRefUtils.searchRefs(new DBRefEntry[] {
+ ref1,
ref2, ref3 }, target);
- assertEquals(2, matches.length);
- assertSame(ref1, matches[0]);
- assertSame(ref2, matches[1]);
+ assertEquals(2, matches.size());
+ assertSame(ref1, matches.get(0));
+ assertSame(ref2, matches.get(1));
}
/**
ref5.setMap(new Mapping(new MapList(new int[] { 1, 1 }, new int[] { 1,
1 }, 1, 1)));
- DBRefEntry[] matches = DBRefUtils.searchRefs(new DBRefEntry[] { ref1,
- ref2, ref3, ref4, ref5 }, "A1234");
- assertEquals(3, matches.length);
- assertSame(ref1, matches[0]);
- assertSame(ref2, matches[1]);
- assertSame(ref5, matches[2]);
+ DBRefEntry[] dbrefs = new DBRefEntry[] { ref1,
+ ref2, ref3, ref4, ref5 };
+ List<DBRefEntry> matches = DBRefUtils.searchRefs(dbrefs, "A1234");
+ assertEquals(3, matches.size());
+ assertSame(ref1, matches.get(0));
+ assertSame(ref2, matches.get(1));
+ assertSame(ref5, matches.get(2));
+ }
+
+ /**
+ * Test the method that searches for matches references - case when we are
+ * matching a reference with null (any) accession id
+ */
+ @Test(groups = { "Functional" })
+ public void testSearchRefs_wildcardAccessionid()
+ {
+ DBRefEntry target = new DBRefEntry("EMBL", "2", null);
+
+ DBRefEntry ref1 = new DBRefEntry("EMBL", "1", "A1234"); // matches
+ // constructor changes embl to EMBL
+ DBRefEntry ref2 = new DBRefEntry("embl", "1", "A1235"); // matches
+ // constructor does not upper-case accession id
+ DBRefEntry ref3 = new DBRefEntry("EMBL", "1", "A1236"); // matches
+ DBRefEntry ref4 = new DBRefEntry("EMBLCDS", "1", "A1234"); // no match
+ // ref5 matches although it has a mapping - ignored
+ DBRefEntry ref5 = new DBRefEntry("EMBL", "1", "A1237");
+ ref5.setMap(new Mapping(new MapList(new int[] { 1, 1 }, new int[] { 1,
+ 1 }, 1, 1)));
+
+ List<DBRefEntry> matches = DBRefUtils.searchRefs(new DBRefEntry[] {
+ ref1,
+ ref2, ref3, ref4, ref5 }, target);
+ assertEquals(4, matches.size());
+ assertSame(ref1, matches.get(0));
+ assertSame(ref2, matches.get(1));
+ assertSame(ref3, matches.get(2));
+ assertSame(ref5, matches.get(3));
}
}
s);
}
+ /**
+ * Test that confirms adding a map twice does nothing
+ */
+ @Test(groups = { "Functional" })
+ public void testAddMapList_sameMap()
+ {
+ MapList ml = new MapList(new int[] { 11, 15, 20, 25, 35, 30 },
+ new int[] { 72, 22 }, 1, 3);
+ String before = ml.toString();
+ ml.addMapList(ml);
+ assertEquals(before, ml.toString());
+ ml.addMapList(new MapList(ml));
+ assertEquals(before, ml.toString());
+ }
+
@Test(groups = { "Functional" })
public void testAddMapList_contiguous()
{
assertEquals(0, result.size());
}
+ /**
+ * just like the one above, but this time, we provide a set of sequences to
+ * subselect the mapping search
+ */
+ @Test(groups = { "Functional" })
+ public void testFindMappingsBetweenSequenceAndOthers()
+ {
+ SequenceI seq1 = new Sequence("Seq1", "ABC");
+ SequenceI seq2 = new Sequence("Seq2", "ABC");
+ SequenceI seq3 = new Sequence("Seq3", "ABC");
+ SequenceI seq4 = new Sequence("Seq4", "ABC");
+ seq1.createDatasetSequence();
+ seq2.createDatasetSequence();
+ seq3.createDatasetSequence();
+ seq4.createDatasetSequence();
+
+ /*
+ * Create mappings from seq1 to seq2, seq2 to seq1, seq3 to seq1
+ */
+ AlignedCodonFrame acf1 = new AlignedCodonFrame();
+ MapList map = new MapList(new int[] { 1, 3 }, new int[] { 1, 3 }, 1, 1);
+ acf1.addMap(seq1.getDatasetSequence(), seq2.getDatasetSequence(), map);
+ AlignedCodonFrame acf2 = new AlignedCodonFrame();
+ acf2.addMap(seq2.getDatasetSequence(), seq1.getDatasetSequence(), map);
+ AlignedCodonFrame acf3 = new AlignedCodonFrame();
+ acf3.addMap(seq3.getDatasetSequence(), seq1.getDatasetSequence(), map);
+
+ List<AlignedCodonFrame> mappings = new ArrayList<AlignedCodonFrame>();
+ mappings.add(acf1);
+ mappings.add(acf2);
+ mappings.add(acf3);
+
+ /*
+ * Seq1 has three mappings
+ */
+ List<AlignedCodonFrame> result = MappingUtils
+ .findMappingsForSequenceAndOthers(seq1, mappings,
+ new Alignment(new SequenceI[] { seq1, seq2 }));
+ assertTrue(result.contains(acf1));
+ assertTrue(result.contains(acf2));
+ assertFalse("Did not expect to find mapping acf3 - subselect failed",
+ result.contains(acf3));
+ assertEquals(2, result.size());
+ }
+
@Test(groups = { "Functional" })
public void testMapEditCommand()
{
package jalview.ws;
+import jalview.analysis.CrossRef;
import jalview.datamodel.Alignment;
import jalview.datamodel.AlignmentI;
import jalview.datamodel.DBRefSource;
// TODO: extracted from SequenceFetcher - convert to proper unit test with
// assertions
- AlignmentI ds = null;
- Vector<Object[]> noProds = new Vector<Object[]>();
String usage = "SequenceFetcher.main [-nodas] [<DBNAME> [<ACCNO>]]\n"
+ "With no arguments, all DbSources will be queried with their test Accession number.\n"
+ "With one argument, the argument will be resolved to one or more db sources and each will be queried with their test accession only.\n"
{
List<DbSourceProxy> sps = new SequenceFetcher(withDas)
.getSourceProxy(argv[0]);
-
+
if (sps != null)
{
for (DbSourceProxy sp : sps)
AlignmentI al = null;
try
{
- al = sp.getSequenceRecords(argv.length > 1 ? argv[1] : sp
+ testRetrieval(argv[0], sp,
+ argv.length > 1 ? argv[1] : sp
.getTestQuery());
} catch (Exception e)
{
+ (argv.length > 1 ? argv[1] : sp.getTestQuery())
+ " from " + argv[0] + "\nUsage: " + usage);
}
- SequenceI[] prod = al.getSequencesArray();
- if (al != null)
- {
- for (int p = 0; p < prod.length; p++)
- {
- System.out.println("Prod " + p + ": "
- + prod[p].getDisplayId(true) + " : "
- + prod[p].getDescription());
- }
- }
}
return;
}
}
for (DbSourceProxy sp : sfetcher.getSourceProxy(db))
{
- System.out.println("Source: " + sp.getDbName() + " (" + db
- + "): retrieving test:" + sp.getTestQuery());
- AlignmentI al = null;
- try
+ testRetrieval(db, sp, sp.getTestQuery());
+ }
+ }
+
+ }
+
+ private static void testRetrieval(String db, DbSourceProxy sp,
+ String testQuery)
+ {
+ AlignmentI ds = null;
+ Vector<Object[]> noProds = new Vector<Object[]>();
+ System.out.println("Source: " + sp.getDbName() + " (" + db
+ + "): retrieving test:" + sp.getTestQuery());
+ {
+ AlignmentI al = null;
+ try
+ {
+ al = sp.getSequenceRecords(testQuery);
+ if (al != null && al.getHeight() > 0)
{
- al = sp.getSequenceRecords(sp.getTestQuery());
- if (al != null && al.getHeight() > 0)
+ boolean dna = sp.isDnaCoding();
+ al.setDataset(null);
+ AlignmentI alds = al.getDataset();
+ // try and find products
+ CrossRef crossRef = new CrossRef(al.getSequencesArray(), alds);
+ List<String> types = crossRef.findXrefSourcesForSequences(dna);
+ if (types != null)
{
- boolean dna = sp.isDnaCoding();
- // try and find products
- String types[] = jalview.analysis.CrossRef
- .findSequenceXrefTypes(dna, al.getSequencesArray());
- if (types != null)
+ System.out.println("Xref Types for: " + (dna ? "dna" : "prot"));
+ for (String source : types)
{
- System.out.println("Xref Types for: "
- + (dna ? "dna" : "prot"));
- for (int t = 0; t < types.length; t++)
+ System.out.println("Type: " + source);
+ SequenceI[] prod = crossRef.findXrefSequences(source, dna)
+ .getSequencesArray();
+ System.out.println("Found "
+ + ((prod == null) ? "no" : "" + prod.length)
+ + " products");
+ if (prod != null)
{
- System.out.println("Type: " + types[t]);
- SequenceI[] prod = jalview.analysis.CrossRef
- .findXrefSequences(al.getSequencesArray(), dna,
- types[t], null)
- .getSequencesArray();
- System.out.println("Found "
- + ((prod == null) ? "no" : "" + prod.length)
- + " products");
- if (prod != null)
+ for (int p = 0; p < prod.length; p++)
{
- for (int p = 0; p < prod.length; p++)
- {
- System.out.println("Prod " + p + ": "
- + prod[p].getDisplayId(true));
- }
+ System.out.println("Prod " + p + ": "
+ + prod[p].getDisplayId(true));
}
}
}
- else
- {
- noProds.addElement((dna ? new Object[] { al, al }
- : new Object[] { al }));
- }
-
- }
- } catch (Exception ex)
- {
- System.out.println("ERROR:Failed to retrieve test query.");
- ex.printStackTrace(System.out);
- }
-
- if (al == null)
- {
- System.out.println("ERROR:No alignment retrieved.");
- StringBuffer raw = sp.getRawRecords();
- if (raw != null)
- {
- System.out.println(raw.toString());
}
else
{
- System.out.println("ERROR:No Raw results.");
+ noProds.addElement((dna ? new Object[] { al, al }
+ : new Object[] { al }));
}
+
+ }
+ } catch (Exception ex)
+ {
+ System.out.println("ERROR:Failed to retrieve test query.");
+ ex.printStackTrace(System.out);
+ }
+
+ if (al == null)
+ {
+ System.out.println("ERROR:No alignment retrieved.");
+ StringBuffer raw = sp.getRawRecords();
+ if (raw != null)
+ {
+ System.out.println(raw.toString());
}
else
{
- System.out.println("Retrieved " + al.getHeight() + " sequences.");
- for (int s = 0; s < al.getHeight(); s++)
- {
- SequenceI sq = al.getSequenceAt(s);
- while (sq.getDatasetSequence() != null)
- {
- sq = sq.getDatasetSequence();
-
- }
- if (ds == null)
- {
- ds = new Alignment(new SequenceI[] { sq });
-
- }
- else
- {
- ds.addSequence(sq);
- }
- }
+ System.out.println("ERROR:No Raw results.");
+ }
+ }
+ else
+ {
+ System.out.println("Retrieved " + al.getHeight() + " sequences.");
+ if (ds == null)
+ {
+ ds = al.getDataset();
+ }
+ else
+ {
+ ds.append(al.getDataset());
+ al.setDataset(ds);
}
- System.out.flush();
- System.err.flush();
-
}
- if (noProds.size() > 0)
+ System.out.flush();
+ System.err.flush();
+ }
+ if (noProds.size() > 0)
+ {
+ Enumeration<Object[]> ts = noProds.elements();
+ while (ts.hasMoreElements())
+
{
- Enumeration<Object[]> ts = noProds.elements();
- while (ts.hasMoreElements())
-
+ Object[] typeSq = ts.nextElement();
+ boolean dna = (typeSq.length > 1);
+ AlignmentI al = (AlignmentI) typeSq[0];
+ System.out.println("Trying getProducts for "
+ + al.getSequenceAt(0).getDisplayId(true));
+ System.out.println("Search DS Xref for: " + (dna ? "dna" : "prot"));
+ // have a bash at finding the products amongst all the retrieved
+ // sequences.
+ SequenceI[] seqs = al.getSequencesArray();
+ Alignment prodal = new CrossRef(seqs, ds).findXrefSequences(null,
+ dna);
+ System.out.println("Found "
+ + ((prodal == null) ? "no" : "" + prodal.getHeight())
+ + " products");
+ if (prodal != null)
{
- Object[] typeSq = ts.nextElement();
- boolean dna = (typeSq.length > 1);
- AlignmentI al = (AlignmentI) typeSq[0];
- System.out.println("Trying getProducts for "
- + al.getSequenceAt(0).getDisplayId(true));
- System.out.println("Search DS Xref for: "
- + (dna ? "dna" : "prot"));
- // have a bash at finding the products amongst all the retrieved
- // sequences.
- SequenceI[] seqs = al.getSequencesArray();
- Alignment prodal = jalview.analysis.CrossRef.findXrefSequences(
- seqs, dna, null, ds);
- System.out.println("Found "
- + ((prodal == null) ? "no" : "" + prodal.getHeight())
- + " products");
- if (prodal != null)
+ SequenceI[] prod = prodal.getSequencesArray(); // note
+ // should
+ // test
+ // rather
+ // than
+ // throw
+ // away
+ // codon
+ // mapping
+ // (if
+ // present)
+ for (int p = 0; p < prod.length; p++)
{
- SequenceI[] prod = prodal.getSequencesArray(); // note
- // should
- // test
- // rather
- // than
- // throw
- // away
- // codon
- // mapping
- // (if
- // present)
- for (int p = 0; p < prod.length; p++)
- {
- System.out.println("Prod " + p + ": "
- + prod[p].getDisplayId(true));
- }
+ System.out.println("Prod " + p + ": "
+ + prod[p].getDisplayId(true));
}
}
-
}
-
}
}
-
}
.getMap().getMappedWidth(), 1);
assertEquals("Expected local reference map to be 3 nucleotides", dr[0]
.getMap().getWidth(), 3);
- AlignmentI sprods = CrossRef.findXrefSequences(
- alsq.getSequencesArray(), true, dr[0].getSource(), alsq);
+ AlignmentI sprods = new CrossRef(alsq.getSequencesArray(), alsq)
+ .findXrefSequences(dr[0].getSource(), true);
assertNotNull(
"Couldn't recover cross reference sequence from dataset. Was it ever added ?",
sprods);