*/
package jalview.analysis;
-import jalview.datamodel.AlignedCodon;
-import jalview.datamodel.AlignedCodonFrame;
-import jalview.datamodel.AlignmentAnnotation;
-import jalview.datamodel.AlignmentI;
-import jalview.datamodel.Mapping;
-import jalview.datamodel.SearchResults;
-import jalview.datamodel.Sequence;
-import jalview.datamodel.SequenceI;
-import jalview.schemes.ResidueProperties;
-import jalview.util.MapList;
-
import java.util.ArrayList;
import java.util.Arrays;
+import java.util.Collection;
import java.util.HashMap;
+import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Set;
import java.util.TreeMap;
+import jalview.datamodel.AlignedCodon;
+import jalview.datamodel.AlignedCodonFrame;
+import jalview.datamodel.AlignmentAnnotation;
+import jalview.datamodel.AlignmentI;
+import jalview.datamodel.Mapping;
+import jalview.datamodel.SearchResults;
+import jalview.datamodel.Sequence;
+import jalview.datamodel.SequenceGroup;
+import jalview.datamodel.SequenceI;
+import jalview.schemes.ResidueProperties;
+import jalview.util.MapList;
+
/**
* grab bag of useful alignment manipulation operations Expect these to be
* refactored elsewhere at some point.
{
/**
- * Represents the 3 possible results of trying to map one alignment to
- * another.
- */
- public enum MappingResult
- {
- Mapped, NotMapped, AlreadyMapped
- }
-
- /**
* given an existing alignment, create a new alignment including all, or up to
* flankSize additional symbols from each sequence's dataset sequence
*
/**
* Build mapping of protein to cDNA alignment. Mappings are made between
* sequences where the cDNA translates to the protein sequence. Any new
- * mappings are added to the protein alignment. Has a 3-valued result: either
- * Mapped (at least one sequence mapping was created), AlreadyMapped (all
- * possible sequence mappings already exist), or NotMapped (no possible
- * sequence mappings exist).
+ * mappings are added to the protein alignment. Returns true if any mappings
+ * either already exist or were added, else false.
*
* @param proteinAlignment
* @param cdnaAlignment
* @return
*/
- public static MappingResult mapProteinToCdna(
+ public static boolean mapProteinToCdna(
final AlignmentI proteinAlignment,
final AlignmentI cdnaAlignment)
{
if (proteinAlignment == null || cdnaAlignment == null)
{
- return MappingResult.NotMapped;
+ return false;
}
- boolean mappingPossible = false;
- boolean mappingPerformed = false;
+ Set<SequenceI> mappedDna = new HashSet<SequenceI>();
+ Set<SequenceI> mappedProtein = new HashSet<SequenceI>();
- List<SequenceI> mapped = new ArrayList<SequenceI>();
+ /*
+ * First pass - map sequences where cross-references exist. This include
+ * 1-to-many mappings to support, for example, variant cDNA.
+ */
+ boolean mappingPerformed = mapProteinToCdna(proteinAlignment,
+ cdnaAlignment, mappedDna, mappedProtein, true);
+ /*
+ * Second pass - map sequences where no cross-references exist. This only
+ * does 1-to-1 mappings and assumes corresponding sequences are in the same
+ * order in the alignments.
+ */
+ mappingPerformed |= mapProteinToCdna(proteinAlignment, cdnaAlignment,
+ mappedDna, mappedProtein, false);
+ return mappingPerformed;
+ }
+
+ /**
+ * Make mappings between compatible sequences (where the cDNA translation
+ * matches the protein).
+ *
+ * @param proteinAlignment
+ * @param cdnaAlignment
+ * @param mappedDna
+ * a set of mapped DNA sequences (to add to)
+ * @param mappedProtein
+ * a set of mapped Protein sequences (to add to)
+ * @param xrefsOnly
+ * if true, only map sequences where xrefs exist
+ * @return
+ */
+ protected static boolean mapProteinToCdna(
+ final AlignmentI proteinAlignment,
+ final AlignmentI cdnaAlignment, Set<SequenceI> mappedDna,
+ Set<SequenceI> mappedProtein, boolean xrefsOnly)
+ {
+ boolean mappingPerformed = false;
List<SequenceI> thisSeqs = proteinAlignment.getSequences();
-
for (SequenceI aaSeq : thisSeqs)
{
+ boolean proteinMapped = false;
AlignedCodonFrame acf = new AlignedCodonFrame();
for (SequenceI cdnaSeq : cdnaAlignment.getSequences())
{
/*
- * Heuristic rule: don't map more than one AA sequence to the same cDNA;
- * map progressively assuming that alignments have mappable sequences in
- * the same respective order
+ * Always try to map if sequences have xref to each other; this supports
+ * variant cDNA or alternative splicing for a protein sequence.
+ *
+ * If no xrefs, try to map progressively, assuming that alignments have
+ * mappable sequences in corresponding order. These are not
+ * many-to-many, as that would risk mixing species with similar cDNA
+ * sequences.
*/
- if (mapped.contains(cdnaSeq))
+ if (xrefsOnly && !CrossRef.haveCrossRef(aaSeq, cdnaSeq))
+ {
+ continue;
+ }
+
+ /*
+ * Don't map non-xrefd sequences more than once each. This heuristic
+ * allows us to pair up similar sequences in ordered alignments.
+ */
+ if (!xrefsOnly
+ && (mappedProtein.contains(aaSeq) || mappedDna
+ .contains(cdnaSeq)))
{
continue;
}
{
acf.addMap(cdnaSeq, aaSeq, map);
mappingPerformed = true;
- mapped.add(cdnaSeq);
-
- /*
- * Heuristic rule #2: don't map AA sequence to more than one cDNA
- */
- break;
+ proteinMapped = true;
+ mappedDna.add(cdnaSeq);
+ mappedProtein.add(aaSeq);
}
}
}
- proteinAlignment.addCodonFrame(acf);
- }
-
- /*
- * If at least one mapping was possible but none was done, then the
- * alignments are already as mapped as they can be.
- */
- if (mappingPossible && !mappingPerformed)
- {
- return MappingResult.AlreadyMapped;
- }
- else
- {
- return mappingPerformed ? MappingResult.Mapped
- : MappingResult.NotMapped;
+ if (proteinMapped)
+ {
+ proteinAlignment.addCodonFrame(acf);
+ }
}
+ return mappingPerformed;
}
/**
* ? allow X in protein to match untranslatable in dna ?
*/
final char aaRes = aaSeqChars[aaResidue];
- if (translated == null && aaRes == 'X')
+ if ((translated == null || "STOP".equals(translated)) && aaRes == 'X')
{
continue;
}
if (translated == null
|| !(aaRes == translated.charAt(0)))
{
+ // debug
+ // System.out.println(("Mismatch at " + i + "/" + aaResidue + ": "
+ // + codon + "(" + translated + ") != " + aaRes));
return false;
}
}
*/
return mapProteinToCdna(proteinDs, dnaDs) != null;
}
+
+ /**
+ * Finds any reference annotations associated with the sequences in
+ * sequenceScope, that are not already added to the alignment, and adds them
+ * to the 'candidates' map. Also populates a lookup table of annotation
+ * labels, keyed by calcId, for use in constructing tooltips or the like.
+ *
+ * @param sequenceScope
+ * the sequences to scan for reference annotations
+ * @param labelForCalcId
+ * (optional) map to populate with label for calcId
+ * @param candidates
+ * map to populate with annotations for sequence
+ * @param al
+ * the alignment to check for presence of annotations
+ */
+ public static void findAddableReferenceAnnotations(
+ List<SequenceI> sequenceScope, Map<String, String> labelForCalcId,
+ final Map<SequenceI, List<AlignmentAnnotation>> candidates,
+ AlignmentI al)
+ {
+ if (sequenceScope == null)
+ {
+ return;
+ }
+
+ /*
+ * For each sequence in scope, make a list of any annotations on the
+ * underlying dataset sequence which are not already on the alignment.
+ *
+ * Add to a map of { alignmentSequence, <List of annotations to add> }
+ */
+ for (SequenceI seq : sequenceScope)
+ {
+ SequenceI dataset = seq.getDatasetSequence();
+ if (dataset == null)
+ {
+ continue;
+ }
+ AlignmentAnnotation[] datasetAnnotations = dataset.getAnnotation();
+ if (datasetAnnotations == null)
+ {
+ continue;
+ }
+ final List<AlignmentAnnotation> result = new ArrayList<AlignmentAnnotation>();
+ for (AlignmentAnnotation dsann : datasetAnnotations)
+ {
+ /*
+ * Find matching annotations on the alignment. If none is found, then
+ * add this annotation to the list of 'addable' annotations for this
+ * sequence.
+ */
+ final Iterable<AlignmentAnnotation> matchedAlignmentAnnotations = al
+ .findAnnotations(seq, dsann.getCalcId(),
+ dsann.label);
+ if (!matchedAlignmentAnnotations.iterator().hasNext())
+ {
+ result.add(dsann);
+ if (labelForCalcId != null)
+ {
+ labelForCalcId.put(dsann.getCalcId(), dsann.label);
+ }
+ }
+ }
+ /*
+ * Save any addable annotations for this sequence
+ */
+ if (!result.isEmpty())
+ {
+ candidates.put(seq, result);
+ }
+ }
+ }
+
+ /**
+ * Adds annotations to the top of the alignment annotations, in the same order
+ * as their related sequences.
+ *
+ * @param annotations
+ * the annotations to add
+ * @param alignment
+ * the alignment to add them to
+ * @param selectionGroup
+ * current selection group (or null if none)
+ */
+ public static void addReferenceAnnotations(
+ Map<SequenceI, List<AlignmentAnnotation>> annotations,
+ final AlignmentI alignment, final SequenceGroup selectionGroup)
+ {
+ for (SequenceI seq : annotations.keySet())
+ {
+ for (AlignmentAnnotation ann : annotations.get(seq))
+ {
+ AlignmentAnnotation copyAnn = new AlignmentAnnotation(ann);
+ int startRes = 0;
+ int endRes = ann.annotations.length;
+ if (selectionGroup != null)
+ {
+ startRes = selectionGroup.getStartRes();
+ endRes = selectionGroup.getEndRes();
+ }
+ copyAnn.restrict(startRes, endRes);
+
+ /*
+ * Add to the sequence (sets copyAnn.datasetSequence), unless the
+ * original annotation is already on the sequence.
+ */
+ if (!seq.hasAnnotation(ann))
+ {
+ seq.addAlignmentAnnotation(copyAnn);
+ }
+ // adjust for gaps
+ copyAnn.adjustForAlignment();
+ // add to the alignment and set visible
+ alignment.addAnnotation(copyAnn);
+ copyAnn.visible = true;
+ }
+ }
+ }
+
+ /**
+ * Set visibility of alignment annotations of specified types (labels), for
+ * specified sequences. This supports controls like
+ * "Show all secondary structure", "Hide all Temp factor", etc.
+ *
+ * @al the alignment to scan for annotations
+ * @param types
+ * the types (labels) of annotations to be updated
+ * @param forSequences
+ * if not null, only annotations linked to one of these sequences are
+ * in scope for update; if null, acts on all sequence annotations
+ * @param anyType
+ * if this flag is true, 'types' is ignored (label not checked)
+ * @param doShow
+ * if true, set visibility on, else set off
+ */
+ public static void showOrHideSequenceAnnotations(AlignmentI al,
+ Collection<String> types, List<SequenceI> forSequences,
+ boolean anyType, boolean doShow)
+ {
+ for (AlignmentAnnotation aa : al
+ .getAlignmentAnnotation())
+ {
+ if (anyType || types.contains(aa.label))
+ {
+ if ((aa.sequenceRef != null)
+ && (forSequences == null || forSequences
+ .contains(aa.sequenceRef)))
+ {
+ aa.visible = doShow;
+ }
+ }
+ }
+ }
}