X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fdatamodel%2FMappedFeatures.java;h=ca6db1bc3df439decc0d591fd0e86006c738dcbc;hb=08c7bee16c16563cc7cec7ea4d336b3e0c4c937a;hp=0fa03cf0d6c632601da074b2f41e81a51967b9d1;hpb=be38fa8acaa8061b501a4e94fb7dea4b7ed8ed41;p=jalview.git diff --git a/src/jalview/datamodel/MappedFeatures.java b/src/jalview/datamodel/MappedFeatures.java index 0fa03cf..ca6db1b 100644 --- a/src/jalview/datamodel/MappedFeatures.java +++ b/src/jalview/datamodel/MappedFeatures.java @@ -1,14 +1,37 @@ +/* + * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) + * Copyright (C) $$Year-Rel$$ The Jalview Authors + * + * This file is part of Jalview. + * + * Jalview is free software: you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, either version 3 + * of the License, or (at your option) any later version. + * + * Jalview is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Jalview. If not, see . + * The Jalview Authors are detailed in the 'AUTHORS' file. + */ package jalview.datamodel; -import jalview.io.gff.Gff3Helper; -import jalview.schemes.ResidueProperties; -import jalview.util.MappingUtils; -import jalview.util.StringUtils; +import java.util.Locale; import java.util.HashSet; import java.util.List; import java.util.Set; +import jalview.io.gff.Gff3Helper; +import jalview.schemes.ResidueProperties; +import jalview.util.MapList; +import jalview.util.MappingUtils; +import jalview.util.StringUtils; + /** * A data bean to hold a list of mapped sequence features (e.g. CDS features * mapped from protein), and the mapping between the sequences. It also provides @@ -18,22 +41,26 @@ import java.util.Set; */ public class MappedFeatures { + /* + * VEP CSQ:HGVSp (if present) is a short-cut to the protein variant consequence + */ private static final String HGV_SP = "HGVSp"; private static final String CSQ = "CSQ"; /* - * the mapping from one sequence to another + * the sequence the mapped features are on */ - public final Mapping mapping; + private final SequenceI featureSequence; - /** - * the sequence mapped from + /* + * the mapping between sequences; + * NB this could be in either sense (from or to featureSequence) */ - public final SequenceI fromSeq; + private final Mapping mapping; /* - * features on the sequence mapped to that overlap the mapped positions + * features on featureSequence that overlap the mapped positions */ public final List features; @@ -60,21 +87,23 @@ public class MappedFeatures * Constructor * * @param theMapping - * @param from - * the sequence mapped from (e.g. CDS) + * sequence mapping (which may be either to, or from, the sequence + * holding the linked features) + * @param featureSeq + * the sequence hosting the virtual features * @param pos - * the residue position in the sequence mapped to + * the residue position in the sequence mapped to * @param res - * the residue character at position pos + * the residue character at position pos * @param theFeatures - * list of mapped features found in the 'from' sequence at - * the mapped position(s) + * list of mapped features found in the 'featureSeq' sequence at the + * mapped position(s) */ - public MappedFeatures(Mapping theMapping, SequenceI from, int pos, + public MappedFeatures(Mapping theMapping, SequenceI featureSeq, int pos, char res, List theFeatures) { mapping = theMapping; - fromSeq = from; + featureSequence = featureSeq; toPosition = pos; toResidue = res; features = theFeatures; @@ -90,13 +119,13 @@ public class MappedFeatures { codonPos = codonPositions; baseCodon = new char[3]; - int cdsStart = fromSeq.getStart(); + int cdsStart = featureSequence.getStart(); baseCodon[0] = Character - .toUpperCase(fromSeq.getCharAt(codonPos[0] - cdsStart)); + .toUpperCase(featureSequence.getCharAt(codonPos[0] - cdsStart)); baseCodon[1] = Character - .toUpperCase(fromSeq.getCharAt(codonPos[1] - cdsStart)); + .toUpperCase(featureSequence.getCharAt(codonPos[1] - cdsStart)); baseCodon[2] = Character - .toUpperCase(fromSeq.getCharAt(codonPos[2] - cdsStart)); + .toUpperCase(featureSequence.getCharAt(codonPos[2] - cdsStart)); } else { @@ -108,11 +137,14 @@ public class MappedFeatures /** * Computes and returns comma-delimited HGVS notation peptide variants derived * from codon allele variants. If no variants are found, answers an empty - * string. + * string. The peptide variant is either simply read from the "CSQ:HGVSp" + * attribute if present, else computed based on the "alleles" attribute if + * present. If neither attribute is found, no variant (empty string) is + * returned. * * @param sf - * a sequence feature (which must be one of those held in this - * object) + * a sequence feature (which must be one of those held in this + * object) * @return */ public String findProteinVariants(SequenceFeature sf) @@ -169,12 +201,12 @@ public class MappedFeatures * e.g. C,G,T gives variants G and T for base C */ Set variantPeptides = new HashSet<>(); - String[] alleles = alls.toUpperCase().split(","); + String[] alleles = alls.toUpperCase(Locale.ROOT).split(","); StringBuilder vars = new StringBuilder(); for (String allele : alleles) { - allele = allele.trim().toUpperCase(); + allele = allele.trim().toUpperCase(Locale.ROOT); if (allele.length() > 1 || "-".equals(allele)) { continue; // multi-locus variant @@ -190,7 +222,7 @@ public class MappedFeatures */ final int i = cdsPos == codonPos[0] ? 0 : (cdsPos == codonPos[1] ? 1 : 2); - variantCodon[i] = allele.toUpperCase().charAt(0); + variantCodon[i] = allele.toUpperCase(Locale.ROOT).charAt(0); if (variantCodon[i] == baseCodon[i]) { continue; @@ -233,4 +265,53 @@ public class MappedFeatures return vars.toString(); } + + /** + * Answers the name of the linked sequence holding any mapped features + * + * @return + */ + public String getLinkedSequenceName() + { + return featureSequence == null ? null : featureSequence.getName(); + } + + /** + * Answers the mapped ranges (as one or more [start, end] positions) which + * correspond to the given [begin, end] range of the linked sequence. + * + *
+   * Example: MappedFeatures with CDS features mapped to peptide 
+   * CDS/200-220 gtc aac TGa acGt att AAC tta
+   * mapped to PEP/6-7 WN by mapping [206, 207, 210, 210, 215, 217] to [6, 7]
+   * getMappedPositions(206, 206) should return [6, 6]
+   * getMappedPositions(200, 214) should return [6, 6]
+   * getMappedPositions(210, 215) should return [6, 7]
+   * 
+ * + * @param begin + * @param end + * @return + */ + public int[] getMappedPositions(int begin, int end) + { + MapList map = mapping.getMap(); + return mapping.to == featureSequence ? map.getOverlapsInFrom(begin, end) + : map.getOverlapsInTo(begin, end); + } + + /** + * Answers true if the linked features are on coding sequence, false if on + * peptide + * + * @return + */ + public boolean isFromCds() + { + if (mapping.getMap().getFromRatio() == 3) + { + return mapping.to != featureSequence; + } + return mapping.to == featureSequence; + } }