+ /**
+ * Helper method that adds a peptide variant feature, provided the given codon
+ * translates to a value different to the current residue (is a non-synonymous
+ * variant). ID and clinical_significance attributes of the dna variant (if
+ * present) are copied to the new feature.
+ *
+ * @param peptide
+ * @param peptidePos
+ * @param residue
+ * @param var
+ * @param codon
+ * @return true if a feature was added, else false
+ */
+ static boolean addPeptideVariant(SequenceI peptide, int peptidePos,
+ String residue, DnaVariant var, String codon)
+ {
+ /*
+ * get peptide translation of codon e.g. GAT -> D
+ * note that variants which are not single alleles,
+ * e.g. multibase variants or HGMD_MUTATION etc
+ * are currently ignored here
+ */
+ String trans = codon.contains("-") ? "-"
+ : (codon.length() > 3 ? null : ResidueProperties
+ .codonTranslate(codon));
+ if (trans != null && !trans.equals(residue))
+ {
+ String residue3Char = StringUtils
+ .toSentenceCase(ResidueProperties.aa2Triplet.get(residue));
+ String trans3Char = StringUtils
+ .toSentenceCase(ResidueProperties.aa2Triplet.get(trans));
+ String desc = "p." + residue3Char + peptidePos + trans3Char;
+ // set score to 0f so 'graduated colour' option is offered! JAL-2060
+ SequenceFeature sf = new SequenceFeature(
+ SequenceOntologyI.SEQUENCE_VARIANT, desc, peptidePos,
+ peptidePos, 0f, "Jalview");
+ StringBuilder attributes = new StringBuilder(32);
+ String id = (String) var.variant.getValue(ID);
+ if (id != null)
+ {
+ if (id.startsWith(SEQUENCE_VARIANT))
+ {
+ id = id.substring(SEQUENCE_VARIANT.length());
+ }
+ sf.setValue(ID, id);
+ attributes.append(ID).append("=").append(id);
+ // TODO handle other species variants
+ StringBuilder link = new StringBuilder(32);
+ try
+ {
+ link.append(desc).append(" ").append(id)
+ .append("|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=")
+ .append(URLEncoder.encode(id, "UTF-8"));
+ sf.addLink(link.toString());
+ } catch (UnsupportedEncodingException e)
+ {
+ // as if
+ }
+ }
+ String clinSig = (String) var.variant
+ .getValue(CLINICAL_SIGNIFICANCE);
+ if (clinSig != null)
+ {
+ sf.setValue(CLINICAL_SIGNIFICANCE, clinSig);
+ attributes.append(";").append(CLINICAL_SIGNIFICANCE).append("=")
+ .append(clinSig);
+ }
+ peptide.addSequenceFeature(sf);
+ if (attributes.length() > 0)
+ {
+ sf.setAttributes(attributes.toString());
+ }
+ return true;
+ }
+ return false;
+ }
+
+ /**
+ * Builds a map whose key is position in the protein sequence, and value is a
+ * list of the base and all variants for each corresponding codon position
+ *
+ * @param dnaSeq
+ * @param dnaToProtein
+ * @return
+ */
+ static LinkedHashMap<Integer, List<DnaVariant>[]> buildDnaVariantsMap(
+ SequenceI dnaSeq, MapList dnaToProtein)
+ {
+ /*
+ * map from peptide position to all variants of the codon which codes for it
+ * LinkedHashMap ensures we keep the peptide features in sequence order
+ */
+ LinkedHashMap<Integer, List<DnaVariant>[]> variants = new LinkedHashMap<Integer, List<DnaVariant>[]>();
+ SequenceOntologyI so = SequenceOntologyFactory.getInstance();
+
+ SequenceFeature[] dnaFeatures = dnaSeq.getSequenceFeatures();
+ if (dnaFeatures == null)
+ {
+ return variants;
+ }
+
+ int dnaStart = dnaSeq.getStart();
+ int[] lastCodon = null;
+ int lastPeptidePostion = 0;
+
+ /*
+ * build a map of codon variations for peptides
+ */
+ for (SequenceFeature sf : dnaFeatures)
+ {
+ int dnaCol = sf.getBegin();
+ if (dnaCol != sf.getEnd())
+ {
+ // not handling multi-locus variant features
+ continue;
+ }
+ if (so.isA(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT))
+ {
+ int[] mapsTo = dnaToProtein.locateInTo(dnaCol, dnaCol);
+ if (mapsTo == null)
+ {
+ // feature doesn't lie within coding region
+ continue;
+ }
+ int peptidePosition = mapsTo[0];
+ List<DnaVariant>[] codonVariants = variants.get(peptidePosition);
+ if (codonVariants == null)
+ {
+ codonVariants = new ArrayList[3];
+ codonVariants[0] = new ArrayList<DnaVariant>();
+ codonVariants[1] = new ArrayList<DnaVariant>();
+ codonVariants[2] = new ArrayList<DnaVariant>();
+ variants.put(peptidePosition, codonVariants);
+ }
+
+ /*
+ * extract dna variants to a string array
+ */
+ String alls = (String) sf.getValue("alleles");
+ if (alls == null)
+ {
+ continue;
+ }
+ String[] alleles = alls.toUpperCase().split(",");
+ int i = 0;
+ for (String allele : alleles)
+ {
+ alleles[i++] = allele.trim(); // lose any space characters "A, G"
+ }
+
+ /*
+ * get this peptide's codon positions e.g. [3, 4, 5] or [4, 7, 10]
+ */
+ int[] codon = peptidePosition == lastPeptidePostion ? lastCodon
+ : MappingUtils.flattenRanges(dnaToProtein.locateInFrom(
+ peptidePosition, peptidePosition));
+ lastPeptidePostion = peptidePosition;
+ lastCodon = codon;
+
+ /*
+ * save nucleotide (and any variant) for each codon position
+ */
+ for (int codonPos = 0; codonPos < 3; codonPos++)
+ {
+ String nucleotide = String.valueOf(
+ dnaSeq.getCharAt(codon[codonPos] - dnaStart))
+ .toUpperCase();
+ List<DnaVariant> codonVariant = codonVariants[codonPos];
+ if (codon[codonPos] == dnaCol)
+ {
+ if (!codonVariant.isEmpty()
+ && codonVariant.get(0).variant == null)
+ {
+ /*
+ * already recorded base value, add this variant
+ */
+ codonVariant.get(0).variant = sf;
+ }
+ else
+ {
+ /*
+ * add variant with base value
+ */
+ codonVariant.add(new DnaVariant(nucleotide, sf));
+ }
+ }
+ else if (codonVariant.isEmpty())
+ {
+ /*
+ * record (possibly non-varying) base value
+ */
+ codonVariant.add(new DnaVariant(nucleotide));
+ }
+ }
+ }
+ }
+ return variants;
+ }
+
+ /**
+ * Makes an alignment with a copy of the given sequences, adding in any
+ * non-redundant sequences which are mapped to by the cross-referenced
+ * sequences.
+ *
+ * @param seqs
+ * @param xrefs
+ * @param dataset
+ * the alignment dataset shared by the new copy
+ * @return
+ */
+ public static AlignmentI makeCopyAlignment(SequenceI[] seqs,
+ SequenceI[] xrefs, AlignmentI dataset)
+ {
+ AlignmentI copy = new Alignment(new Alignment(seqs));
+ copy.setDataset(dataset);
+
+ SequenceIdMatcher matcher = new SequenceIdMatcher(seqs);
+ if (xrefs != null)
+ {
+ for (SequenceI xref : xrefs)
+ {
+ DBRefEntry[] dbrefs = xref.getDBRefs();
+ if (dbrefs != null)
+ {
+ for (DBRefEntry dbref : dbrefs)
+ {
+ if (dbref.getMap() == null || dbref.getMap().getTo() == null)
+ {
+ continue;
+ }
+ SequenceI mappedTo = dbref.getMap().getTo();
+ SequenceI match = matcher.findIdMatch(mappedTo);
+ if (match == null)
+ {
+ matcher.add(mappedTo);
+ copy.addSequence(mappedTo);
+ }
+ }
+ }
+ }
+ }
+ return copy;
+ }
+
+ /**
+ * Try to align sequences in 'unaligned' to match the alignment of their
+ * mapped regions in 'aligned'. For example, could use this to align CDS
+ * sequences which are mapped to their parent cDNA sequences.
+ *
+ * This method handles 1:1 mappings (dna-to-dna or protein-to-protein). For
+ * dna-to-protein or protein-to-dna use alternative methods.
+ *
+ * @param unaligned
+ * sequences to be aligned
+ * @param aligned
+ * holds aligned sequences and their mappings
+ * @return
+ */
+ public static int alignAs(AlignmentI unaligned, AlignmentI aligned)
+ {
+ /*
+ * easy case - aligning a copy of aligned sequences
+ */
+ if (alignAsSameSequences(unaligned, aligned))
+ {
+ return unaligned.getHeight();
+ }
+
+ /*
+ * fancy case - aligning via mappings between sequences
+ */
+ List<SequenceI> unmapped = new ArrayList<SequenceI>();
+ Map<Integer, Map<SequenceI, Character>> columnMap = buildMappedColumnsMap(
+ unaligned, aligned, unmapped);
+ int width = columnMap.size();
+ char gap = unaligned.getGapCharacter();
+ int realignedCount = 0;
+
+ for (SequenceI seq : unaligned.getSequences())
+ {
+ if (!unmapped.contains(seq))
+ {
+ char[] newSeq = new char[width];
+ Arrays.fill(newSeq, gap);
+ int newCol = 0;
+ int lastCol = 0;
+
+ /*
+ * traverse the map to find columns populated
+ * by our sequence
+ */
+ for (Integer column : columnMap.keySet())
+ {
+ Character c = columnMap.get(column).get(seq);
+ if (c != null)
+ {
+ /*
+ * sequence has a character at this position
+ *
+ */
+ newSeq[newCol] = c;
+ lastCol = newCol;
+ }
+ newCol++;
+ }
+
+ /*
+ * trim trailing gaps
+ */
+ if (lastCol < width)
+ {
+ char[] tmp = new char[lastCol + 1];
+ System.arraycopy(newSeq, 0, tmp, 0, lastCol + 1);
+ newSeq = tmp;
+ }
+ seq.setSequence(String.valueOf(newSeq));
+ realignedCount++;
+ }
+ }
+ return realignedCount;
+ }
+
+ /**
+ * If unaligned and aligned sequences share the same dataset sequences, then
+ * simply copies the aligned sequences to the unaligned sequences and returns
+ * true; else returns false
+ *
+ * @param unaligned
+ * @param aligned
+ * @return
+ */
+ static boolean alignAsSameSequences(AlignmentI unaligned,
+ AlignmentI aligned)
+ {
+ if (aligned.getDataset() == null || unaligned.getDataset() == null)
+ {
+ return false; // should only pass alignments with datasets here
+ }
+
+ Map<SequenceI, SequenceI> alignedDatasets = new HashMap<SequenceI, SequenceI>();
+ for (SequenceI seq : aligned.getSequences())
+ {
+ alignedDatasets.put(seq.getDatasetSequence(), seq);
+ }
+
+ /*
+ * first pass - check whether all sequences to be aligned share a dataset
+ * sequence with an aligned sequence
+ */
+ for (SequenceI seq : unaligned.getSequences())
+ {
+ if (!alignedDatasets.containsKey(seq.getDatasetSequence()))
+ {
+ return false;
+ }
+ }
+
+ /*
+ * second pass - copy aligned sequences
+ */
+ for (SequenceI seq : unaligned.getSequences())
+ {
+ SequenceI alignedSequence = alignedDatasets.get(seq
+ .getDatasetSequence());
+ seq.setSequence(alignedSequence.getSequenceAsString());
+ }
+
+ return true;
+ }
+
+ /**
+ * Returns a map whose key is alignment column number (base 1), and whose
+ * values are a map of sequence characters in that column.
+ *
+ * @param unaligned
+ * @param aligned
+ * @param unmapped
+ * @return
+ */
+ static Map<Integer, Map<SequenceI, Character>> buildMappedColumnsMap(
+ AlignmentI unaligned, AlignmentI aligned, List<SequenceI> unmapped)
+ {
+ /*
+ * Map will hold, for each aligned column position, a map of
+ * {unalignedSequence, characterPerSequence} at that position.
+ * TreeMap keeps the entries in ascending column order.
+ */
+ Map<Integer, Map<SequenceI, Character>> map = new TreeMap<Integer, Map<SequenceI, Character>>();
+
+ /*
+ * record any sequences that have no mapping so can't be realigned
+ */
+ unmapped.addAll(unaligned.getSequences());
+
+ List<AlignedCodonFrame> mappings = aligned.getCodonFrames();
+
+ for (SequenceI seq : unaligned.getSequences())
+ {
+ for (AlignedCodonFrame mapping : mappings)
+ {
+ SequenceI fromSeq = mapping.findAlignedSequence(seq, aligned);
+ if (fromSeq != null)
+ {
+ Mapping seqMap = mapping.getMappingBetween(fromSeq, seq);
+ if (addMappedPositions(seq, fromSeq, seqMap, map))
+ {
+ unmapped.remove(seq);
+ }
+ }
+ }
+ }
+ return map;