+/*
+ * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
+ * Copyright (C) $$Year-Rel$$ The Jalview Authors
+ *
+ * This file is part of Jalview.
+ *
+ * Jalview is free software: you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, either version 3
+ * of the License, or (at your option) any later version.
+ *
+ * Jalview is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ * PURPOSE. See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
+ * The Jalview Authors are detailed in the 'AUTHORS' file.
+ */
package jalview.datamodel;
-import jalview.io.gff.Gff3Helper;
-import jalview.schemes.ResidueProperties;
-import jalview.util.MappingUtils;
-import jalview.util.StringUtils;
+import java.util.Locale;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
+import jalview.io.gff.Gff3Helper;
+import jalview.schemes.ResidueProperties;
+import jalview.util.MapList;
+import jalview.util.MappingUtils;
+import jalview.util.StringUtils;
+
/**
* A data bean to hold a list of mapped sequence features (e.g. CDS features
* mapped from protein), and the mapping between the sequences. It also provides
*/
public class MappedFeatures
{
+ /*
+ * VEP CSQ:HGVSp (if present) is a short-cut to the protein variant consequence
+ */
private static final String HGV_SP = "HGVSp";
private static final String CSQ = "CSQ";
/*
- * the mapping from one sequence to another
+ * the sequence the mapped features are on
*/
- public final Mapping mapping;
+ private final SequenceI featureSequence;
- /**
- * the sequence mapped from
+ /*
+ * the mapping between sequences;
+ * NB this could be in either sense (from or to featureSequence)
*/
- public final SequenceI fromSeq;
+ private final Mapping mapping;
/*
- * features on the sequence mapped to that overlap the mapped positions
+ * features on featureSequence that overlap the mapped positions
*/
public final List<SequenceFeature> features;
* Constructor
*
* @param theMapping
- * @param from
- * the sequence mapped from (e.g. CDS)
+ * sequence mapping (which may be either to, or from, the sequence
+ * holding the linked features)
+ * @param featureSeq
+ * the sequence hosting the virtual features
* @param pos
- * the residue position in the sequence mapped to
+ * the residue position in the sequence mapped to
* @param res
- * the residue character at position pos
+ * the residue character at position pos
* @param theFeatures
- * list of mapped features found in the 'from' sequence at
- * the mapped position(s)
+ * list of mapped features found in the 'featureSeq' sequence at the
+ * mapped position(s)
*/
- public MappedFeatures(Mapping theMapping, SequenceI from, int pos,
+ public MappedFeatures(Mapping theMapping, SequenceI featureSeq, int pos,
char res, List<SequenceFeature> theFeatures)
{
mapping = theMapping;
- fromSeq = from;
+ featureSequence = featureSeq;
toPosition = pos;
toResidue = res;
features = theFeatures;
* for a peptide-to-CDS mapping
*/
int[] codonIntervals = mapping.getMap().locateInFrom(toPosition, toPosition);
- if (codonIntervals != null)
+ int[] codonPositions = codonIntervals == null ? null
+ : MappingUtils.flattenRanges(codonIntervals);
+ if (codonPositions != null && codonPositions.length == 3)
{
- codonPos = MappingUtils.flattenRanges(codonIntervals);
- if (codonPos.length == 3)
- {
- baseCodon = new char[3];
- int cdsStart = fromSeq.getStart();
- baseCodon[0] = fromSeq.getCharAt(codonPos[0] - cdsStart);
- baseCodon[1] = fromSeq.getCharAt(codonPos[1] - cdsStart);
- baseCodon[2] = fromSeq.getCharAt(codonPos[2] - cdsStart);
- }
- else
- {
- baseCodon = null;
- }
+ codonPos = codonPositions;
+ baseCodon = new char[3];
+ int cdsStart = featureSequence.getStart();
+ baseCodon[0] = Character
+ .toUpperCase(featureSequence.getCharAt(codonPos[0] - cdsStart));
+ baseCodon[1] = Character
+ .toUpperCase(featureSequence.getCharAt(codonPos[1] - cdsStart));
+ baseCodon[2] = Character
+ .toUpperCase(featureSequence.getCharAt(codonPos[2] - cdsStart));
}
else
{
codonPos = null;
- baseCodon = null; // todo tidy!
+ baseCodon = null;
}
}
/**
* Computes and returns comma-delimited HGVS notation peptide variants derived
* from codon allele variants. If no variants are found, answers an empty
- * string.
+ * string. The peptide variant is either simply read from the "CSQ:HGVSp"
+ * attribute if present, else computed based on the "alleles" attribute if
+ * present. If neither attribute is found, no variant (empty string) is
+ * returned.
*
* @param sf
- * a sequence feature (which must be one of those held in this
- * object)
+ * a sequence feature (which must be one of those held in this
+ * object)
* @return
*/
public String findProteinVariants(SequenceFeature sf)
if (colonPos >= 0)
{
String var = hgvsp.substring(colonPos + 1);
- return var;
+ if (var.contains("p.")) // sanity check
+ {
+ return var;
+ }
}
}
/*
* otherwise, compute codon and peptide variant
*/
- // todo avoid duplication of code in AlignmentUtils.buildDnaVariantsMap
int cdsPos = sf.getBegin();
if (cdsPos != sf.getEnd())
{
* e.g. C,G,T gives variants G and T for base C
*/
Set<String> variantPeptides = new HashSet<>();
- String[] alleles = alls.toUpperCase().split(",");
+ String[] alleles = alls.toUpperCase(Locale.ROOT).split(",");
StringBuilder vars = new StringBuilder();
for (String allele : alleles)
{
- allele = allele.trim().toUpperCase();
+ allele = allele.trim().toUpperCase(Locale.ROOT);
if (allele.length() > 1 || "-".equals(allele))
{
continue; // multi-locus variant
*/
final int i = cdsPos == codonPos[0] ? 0
: (cdsPos == codonPos[1] ? 1 : 2);
- variantCodon[i] = allele.toUpperCase().charAt(0);
+ variantCodon[i] = allele.toUpperCase(Locale.ROOT).charAt(0);
if (variantCodon[i] == baseCodon[i])
{
continue;
return vars.toString();
}
+
+ /**
+ * Answers the name of the linked sequence holding any mapped features
+ *
+ * @return
+ */
+ public String getLinkedSequenceName()
+ {
+ return featureSequence == null ? null : featureSequence.getName();
+ }
+
+ /**
+ * Answers the mapped ranges (as one or more [start, end] positions) which
+ * correspond to the given [begin, end] range of the linked sequence.
+ *
+ * <pre>
+ * Example: MappedFeatures with CDS features mapped to peptide
+ * CDS/200-220 gtc aac TGa acGt att AAC tta
+ * mapped to PEP/6-7 WN by mapping [206, 207, 210, 210, 215, 217] to [6, 7]
+ * getMappedPositions(206, 206) should return [6, 6]
+ * getMappedPositions(200, 214) should return [6, 6]
+ * getMappedPositions(210, 215) should return [6, 7]
+ * </pre>
+ *
+ * @param begin
+ * @param end
+ * @return
+ */
+ public int[] getMappedPositions(int begin, int end)
+ {
+ MapList map = mapping.getMap();
+ return mapping.to == featureSequence ? map.getOverlapsInFrom(begin, end)
+ : map.getOverlapsInTo(begin, end);
+ }
+
+ /**
+ * Answers true if the linked features are on coding sequence, false if on
+ * peptide
+ *
+ * @return
+ */
+ public boolean isFromCds()
+ {
+ if (mapping.getMap().getFromRatio() == 3)
+ {
+ return mapping.to != featureSequence;
+ }
+ return mapping.to == featureSequence;
+ }
}