import jalview.datamodel.AlignedCodon;
import jalview.datamodel.AlignedCodonFrame;
+import jalview.datamodel.AlignedCodonFrame.SequenceToSequenceMapping;
import jalview.datamodel.Alignment;
import jalview.datamodel.AlignmentAnnotation;
import jalview.datamodel.AlignmentI;
Collection<String> types, List<SequenceI> forSequences,
boolean anyType, boolean doShow)
{
- for (AlignmentAnnotation aa : al.getAlignmentAnnotation())
+ AlignmentAnnotation[] anns = al.getAlignmentAnnotation();
+ if (anns != null)
{
- if (anyType || types.contains(aa.label))
+ for (AlignmentAnnotation aa : anns)
{
- if ((aa.sequenceRef != null)
- && (forSequences == null || forSequences
- .contains(aa.sequenceRef)))
+ if (anyType || types.contains(aa.label))
{
- aa.visible = doShow;
+ if ((aa.sequenceRef != null)
+ && (forSequences == null || forSequences
+ .contains(aa.sequenceRef)))
+ {
+ aa.visible = doShow;
+ }
}
}
}
*
* @param dna
* aligned dna sequences
- * @param mappings
- * from dna to protein
- * @param al
+ * @param dataset
+ * - throws error if not given a dataset
+ * @param products
+ * (optional) to restrict results to CDS that map to specified
+ * protein products
* @return an alignment whose sequences are the cds-only parts of the dna
* sequences (or null if no mappings are found)
*/
public static AlignmentI makeCdsAlignment(SequenceI[] dna,
- List<AlignedCodonFrame> mappings, AlignmentI al)
+ AlignmentI dataset, AlignmentI products)
{
+ if (dataset.getDataset() != null)
+ {
+ throw new Error(
+ "IMPLEMENTATION ERROR: dataset.getDataset() must be null!");
+ }
List<SequenceI> cdsSeqs = new ArrayList<SequenceI>();
-
+ List<AlignedCodonFrame> mappings = dataset.getCodonFrames();
+ HashSet<SequenceI> productSeqs = null;
+ if (products != null)
+ {
+ productSeqs = new HashSet<SequenceI>();
+ for (SequenceI seq : products.getSequences())
+ {
+ productSeqs.add(seq.getDatasetSequence() == null ? seq : seq
+ .getDatasetSequence());
+ }
+ }
+
+ /*
+ * construct CDS sequences from the (cds-to-protein) mappings made earlier;
+ * this makes it possible to model multiple products from dna (e.g. EMBL);
+ * however it does mean we don't have the EMBL protein_id (a property on
+ * the CDS features) in order to make the CDS sequence name :-(
+ */
for (SequenceI seq : dna)
{
- AlignedCodonFrame cdsMappings = new AlignedCodonFrame();
+ SequenceI seqDss = seq.getDatasetSequence() == null ? seq : seq
+ .getDatasetSequence();
List<AlignedCodonFrame> seqMappings = MappingUtils
.findMappingsForSequence(seq, mappings);
- List<AlignedCodonFrame> alignmentMappings = al.getCodonFrames();
for (AlignedCodonFrame mapping : seqMappings)
{
- for (Mapping aMapping : mapping.getMappingsFromSequence(seq))
+ List<Mapping> mappingsFromSequence = mapping.getMappingsFromSequence(seq);
+
+ for (Mapping aMapping : mappingsFromSequence)
{
- SequenceI cdsSeq = makeCdsSequence(seq.getDatasetSequence(),
- aMapping);
+ if (aMapping.getMap().getFromRatio() == 1)
+ {
+ /*
+ * not a dna-to-protein mapping (likely dna-to-cds)
+ */
+ continue;
+ }
+
+ /*
+ * check for an existing CDS sequence i.e. a 3:1 mapping to
+ * the dna mapping's product
+ */
+ SequenceI cdsSeq = null;
+
+ // TODO better mappings collection data model so we can do
+ // a direct lookup instead of double loops to find mappings
+
+ SequenceI proteinProduct = aMapping.getTo();
+
+ /*
+ * skip if not mapped to one of a specified set of proteins
+ */
+ if (productSeqs != null && !productSeqs.contains(proteinProduct))
+ {
+ continue;
+ }
+
+ for (AlignedCodonFrame acf : MappingUtils
+ .findMappingsForSequence(proteinProduct, mappings))
+ {
+ for (SequenceToSequenceMapping map : acf.getMappings())
+ {
+ if (map.getMapping().getMap().getFromRatio() == 3
+ && proteinProduct == map.getMapping().getTo()
+ && seqDss != map.getFromSeq())
+ {
+ /*
+ * found a 3:1 mapping to the protein product which is not
+ * from the dna sequence...assume it is from the CDS sequence
+ * TODO mappings data model that brings together related
+ * dna-cds-protein mappings in one object
+ */
+ cdsSeq = map.getFromSeq();
+ }
+ }
+ }
+ if (cdsSeq != null)
+ {
+ /*
+ * mappings are always to dataset sequences so create an aligned
+ * sequence to own it; add the dataset sequence to the dataset
+ */
+ SequenceI derivedSequence = cdsSeq.deriveSequence();
+ cdsSeqs.add(derivedSequence);
+ if (!dataset.getSequences().contains(cdsSeq))
+ {
+ dataset.addSequence(cdsSeq);
+ }
+ continue;
+ }
+
+ /*
+ * didn't find mapped CDS sequence - construct it and add
+ * its dataset sequence to the dataset
+ */
+ cdsSeq = makeCdsSequence(seq.getDatasetSequence(), aMapping);
+ SequenceI cdsSeqDss = cdsSeq.createDatasetSequence();
cdsSeqs.add(cdsSeq);
-
+ if (!dataset.getSequences().contains(cdsSeqDss))
+ {
+ dataset.addSequence(cdsSeqDss);
+ }
+
/*
* add a mapping from CDS to the (unchanged) mapped to range
*/
MapList map = new MapList(cdsRange, aMapping.getMap()
.getToRanges(), aMapping.getMap().getFromRatio(),
aMapping.getMap().getToRatio());
- cdsMappings.addMap(cdsSeq, aMapping.getTo(), map);
+ AlignedCodonFrame cdsToProteinMapping = new AlignedCodonFrame();
+ cdsToProteinMapping.addMap(cdsSeq, proteinProduct, map);
+
+ /*
+ * guard against duplicating the mapping if repeating this action
+ */
+ if (!mappings.contains(cdsToProteinMapping))
+ {
+ mappings.add(cdsToProteinMapping);
+ }
/*
* add another mapping from original 'from' range to CDS
*/
+ AlignedCodonFrame dnaToProteinMapping = new AlignedCodonFrame();
map = new MapList(aMapping.getMap().getFromRanges(), cdsRange, 1,
1);
- cdsMappings.addMap(seq.getDatasetSequence(), cdsSeq, map);
+ dnaToProteinMapping.addMap(seq.getDatasetSequence(), cdsSeq, map);
+ if (!mappings.contains(dnaToProteinMapping))
+ {
+ mappings.add(dnaToProteinMapping);
+ }
- alignmentMappings.add(cdsMappings);
/*
* transfer any features on dna that overlap the CDS
}
}
- /*
- * add CDS seqs to shared dataset
- */
- Alignment dataset = al.getDataset();
- for (SequenceI seq : cdsSeqs)
- {
- if (!dataset.getSequences().contains(seq.getDatasetSequence()))
- {
- dataset.addSequence(seq.getDatasetSequence());
- }
- }
AlignmentI cds = new Alignment(cdsSeqs.toArray(new SequenceI[cdsSeqs
.size()]));
cds.setDataset(dataset);
*
* @param seq
* @param mapping
- * @return
+ * @return CDS sequence (as a dataset sequence)
*/
static SequenceI makeCdsSequence(SequenceI seq, Mapping mapping)
{
SequenceI newSeq = new Sequence(seq.getName() + "|"
+ mapping.getTo().getName(), newSeqChars, 1, newPos);
- newSeq.createDatasetSequence();
return newSeq;
}
* sort to get sequence features in start position order
* - would be better to store in Sequence as a TreeSet or NCList?
*/
- Arrays.sort(peptide.getSequenceFeatures(),
- new Comparator<SequenceFeature>()
- {
- @Override
- public int compare(SequenceFeature o1, SequenceFeature o2)
+ if (peptide.getSequenceFeatures() != null)
+ {
+ Arrays.sort(peptide.getSequenceFeatures(),
+ new Comparator<SequenceFeature>()
{
- int c = Integer.compare(o1.getBegin(), o2.getBegin());
- return c == 0 ? Integer.compare(o1.getEnd(), o2.getEnd())
- : c;
- }
- });
+ @Override
+ public int compare(SequenceFeature o1, SequenceFeature o2)
+ {
+ int c = Integer.compare(o1.getBegin(), o2.getBegin());
+ return c == 0 ? Integer.compare(o1.getEnd(), o2.getEnd())
+ : c;
+ }
+ });
+ }
return count;
}
// set score to 0f so 'graduated colour' option is offered! JAL-2060
SequenceFeature sf = new SequenceFeature(
SequenceOntologyI.SEQUENCE_VARIANT, desc, peptidePos,
- peptidePos, 0f, null);
+ peptidePos, 0f, "Jalview");
StringBuilder attributes = new StringBuilder(32);
String id = (String) var.variant.getValue(ID);
if (id != null)
*
* @param seqs
* @param xrefs
+ * @param dataset
+ * alignment dataset shared by the new copy
* @return
*/
public static AlignmentI makeCopyAlignment(SequenceI[] seqs,
- SequenceI[] xrefs)
+ SequenceI[] xrefs, AlignmentI dataset)
{
AlignmentI copy = new Alignment(new Alignment(seqs));
+ copy.setDataset(dataset);
SequenceIdMatcher matcher = new SequenceIdMatcher(seqs);
if (xrefs != null)
{
/*
* Map will hold, for each aligned column position, a map of
- * {unalignedSequence, sequenceCharacter} at that position.
+ * {unalignedSequence, characterPerSequence} at that position.
* TreeMap keeps the entries in ascending column order.
*/
Map<Integer, Map<SequenceI, Character>> map = new TreeMap<Integer, Map<SequenceI, Character>>();
/*
- * r any sequences that have no mapping so can't be realigned
+ * record any sequences that have no mapping so can't be realigned
*/
unmapped.addAll(unaligned.getSequences());
return false;
}
+ /*
+ * invert mapping if it is from unaligned to aligned sequence
+ */
+ if (seqMap.getTo() == fromSeq.getDatasetSequence())
+ {
+ seqMap = new Mapping(seq.getDatasetSequence(), seqMap.getMap()
+ .getInverse());
+ }
+
char[] fromChars = fromSeq.getSequence();
int toStart = seq.getStart();
char[] toChars = seq.getSequence();
* of the next character of the mapped-to sequence; stop when all
* the characters of the range have been counted
*/
- while (mappedCharPos <= range[1])
+ while (mappedCharPos <= range[1] && fromCol <= fromChars.length
+ && fromCol >= 0)
{
if (!Comparison.isGap(fromChars[fromCol - 1]))
{