import jalview.datamodel.AlignedCodonFrame;
import jalview.datamodel.AlignmentAnnotation;
import jalview.datamodel.AlignmentI;
+import jalview.datamodel.SearchResults;
+import jalview.datamodel.Sequence;
import jalview.datamodel.SequenceI;
import jalview.schemes.ResidueProperties;
import jalview.util.MapList;
}
return gapsToAdd;
}
+
+ /**
+ * Returns a list of sequences mapped from the given sequences and aligned
+ * (gapped) in the same way. For example, the cDNA for aligned protein, where
+ * a single gap in protein generates three gaps in cDNA.
+ *
+ * @param sequences
+ * @param gapCharacter
+ * @param mappings
+ * @return
+ */
+ public static List<SequenceI> getAlignedTranslation(
+ List<SequenceI> sequences, char gapCharacter,
+ Set<AlignedCodonFrame> mappings)
+ {
+ List<SequenceI> alignedSeqs = new ArrayList<SequenceI>();
+
+ for (SequenceI seq : sequences)
+ {
+ List<SequenceI> mapped = getAlignedTranslation(seq, gapCharacter,
+ mappings);
+ alignedSeqs.addAll(mapped);
+ }
+ return alignedSeqs;
+ }
+
+ /**
+ * Returns sequences aligned 'like' the source sequence, as mapped by the
+ * given mappings. Normally we expect zero or one 'mapped' sequences, but this
+ * will support 1-to-many as well.
+ *
+ * @param seq
+ * @param gapCharacter
+ * @param mappings
+ * @return
+ */
+ protected static List<SequenceI> getAlignedTranslation(SequenceI seq,
+ char gapCharacter, Set<AlignedCodonFrame> mappings)
+ {
+ List<SequenceI> result = new ArrayList<SequenceI>();
+ for (AlignedCodonFrame mapping : mappings)
+ {
+ if (mapping.involvesSequence(seq))
+ {
+ SequenceI mapped = getAlignedTranslation(seq, gapCharacter, mapping);
+ if (mapped != null)
+ {
+ result.add(mapped);
+ }
+ }
+ }
+ return result;
+ }
+
+ /**
+ * Returns the translation of 'seq' (as held in the mapping) with
+ * corresponding alignment (gaps).
+ *
+ * @param seq
+ * @param gapCharacter
+ * @param mapping
+ * @return
+ */
+ protected static SequenceI getAlignedTranslation(SequenceI seq,
+ char gapCharacter, AlignedCodonFrame mapping)
+ {
+ String gap = String.valueOf(gapCharacter);
+ boolean toDna = false;
+ int fromRatio = 1;
+ SequenceI mapTo = mapping.getDnaForAaSeq(seq);
+ if (mapTo != null)
+ {
+ // mapping is from protein to nucleotide
+ toDna = true;
+ // should ideally get gap count ratio from mapping
+ gap = String.valueOf(new char[]
+ { gapCharacter, gapCharacter, gapCharacter });
+ }
+ else
+ {
+ // mapping is from nucleotide to protein
+ mapTo = mapping.getAaForDnaSeq(seq);
+ fromRatio = 3;
+ }
+ StringBuilder newseq = new StringBuilder(seq.getLength()
+ * (toDna ? 3 : 1));
+
+ int residueNo = 0; // in seq, base 1
+ int[] phrase = new int[fromRatio];
+ int phraseOffset = 0;
+ int gapWidth = 0;
+ boolean first = true;
+ final Sequence alignedSeq = new Sequence("", "");
+
+ for (char c : seq.getSequence())
+ {
+ if (c == gapCharacter)
+ {
+ gapWidth++;
+ if (gapWidth >= fromRatio)
+ {
+ newseq.append(gap);
+ gapWidth = 0;
+ }
+ }
+ else
+ {
+ phrase[phraseOffset++] = residueNo + 1;
+ if (phraseOffset == fromRatio)
+ {
+ /*
+ * Have read a whole codon (or protein residue), now translate: map
+ * source phrase to positions in target sequence add characters at
+ * these positions to newseq Note mapping positions are base 1, our
+ * sequence positions base 0.
+ */
+ SearchResults sr = new SearchResults();
+ for (int pos : phrase)
+ {
+ mapping.markMappedRegion(seq, pos, sr);
+ }
+ newseq.append(sr.toString());
+ if (first)
+ {
+ first = false;
+ // Hack: Copy sequence dataset, name and description from
+ // SearchResults.match[0].sequence
+ // TODO? carry over sequence names from original 'complement'
+ // alignment
+ SequenceI mappedTo = sr.getResultSequence(0);
+ alignedSeq.setName(mappedTo.getName());
+ alignedSeq.setDescription(mappedTo.getDescription());
+ alignedSeq.setDatasetSequence(mappedTo);
+ }
+ phraseOffset = 0;
+ }
+ residueNo++;
+ }
+ }
+ alignedSeq.setSequence(newseq.toString());
+ return alignedSeq;
+ }
}