*/
package jalview.analysis;
-import static jalview.io.gff.GffConstants.CLINICAL_SIGNIFICANCE;
-
import jalview.commands.RemoveGapColCommand;
import jalview.datamodel.AlignedCodon;
import jalview.datamodel.AlignedCodonFrame;
import jalview.datamodel.SequenceGroup;
import jalview.datamodel.SequenceI;
import jalview.datamodel.features.SequenceFeatures;
-import jalview.io.gff.Gff3Helper;
import jalview.io.gff.SequenceOntologyI;
import jalview.schemes.ResidueProperties;
import jalview.util.Comparison;
import jalview.util.IntRangeComparator;
import jalview.util.MapList;
import jalview.util.MappingUtils;
-import jalview.util.StringUtils;
-import java.io.UnsupportedEncodingException;
-import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
return false;
}
String name = seq2.getName();
- final DBRefEntry[] xrefs = seq1.getDBRefs();
+ final List<DBRefEntry> xrefs = seq1.getDBRefs();
if (xrefs != null)
{
- for (DBRefEntry xref : xrefs)
+ for (int ix = 0, nx = xrefs.size(); ix < nx; ix++)
{
+ DBRefEntry xref = xrefs.get(ix);
String xrefName = xref.getSource() + "|" + xref.getAccessionId();
// case-insensitive test, consistent with DBRefEntry.equalRef()
if (xrefName.equalsIgnoreCase(name))
cdsSeqs.add(cdsSeq);
- if (!dataset.getSequences().contains(cdsSeqDss))
- {
- // check if this sequence is a newly created one
- // so needs adding to the dataset
- dataset.addSequence(cdsSeqDss);
- }
-
/*
- * add a mapping from CDS to the (unchanged) mapped to range
+ * build the mapping from CDS to protein
*/
List<int[]> cdsRange = Collections
.singletonList(new int[]
MapList cdsToProteinMap = new MapList(cdsRange,
mapList.getToRanges(), mapList.getFromRatio(),
mapList.getToRatio());
- AlignedCodonFrame cdsToProteinMapping = new AlignedCodonFrame();
- cdsToProteinMapping.addMap(cdsSeqDss, proteinProduct,
- cdsToProteinMap);
- /*
- * guard against duplicating the mapping if repeating this action
- */
- if (!mappings.contains(cdsToProteinMapping))
+ if (!dataset.getSequences().contains(cdsSeqDss))
{
- mappings.add(cdsToProteinMapping);
+ /*
+ * if this sequence is a newly created one, add it to the dataset
+ * and made a CDS to protein mapping (if sequence already exists,
+ * CDS-to-protein mapping _is_ the transcript-to-protein mapping)
+ */
+ dataset.addSequence(cdsSeqDss);
+ AlignedCodonFrame cdsToProteinMapping = new AlignedCodonFrame();
+ cdsToProteinMapping.addMap(cdsSeqDss, proteinProduct,
+ cdsToProteinMap);
+
+ /*
+ * guard against duplicating the mapping if repeating this action
+ */
+ if (!mappings.contains(cdsToProteinMapping))
+ {
+ mappings.add(cdsToProteinMapping);
+ }
}
propagateDBRefsToCDS(cdsSeqDss, dnaSeq.getDatasetSequence(),
// need to
// synthesize an xref.
- for (DBRefEntry primRef : dnaDss.getPrimaryDBRefs())
+ List<DBRefEntry> primrefs = dnaDss.getPrimaryDBRefs();
+ for (int ip = 0, np = primrefs.size(); ip < np; ip++)
{
+ DBRefEntry primRef = primrefs.get(ip);
/*
* create a cross-reference from CDS to the source sequence's
* primary reference and vice versa
dnaSeq.addDBRef(new DBRefEntry(source, version, cdsSeq
.getName(), new Mapping(cdsSeqDss, dnaToCdsMap)));
-
// problem here is that the cross-reference is synthesized -
// cdsSeq.getName() may be like 'CDS|dnaaccession' or
// 'CDS|emblcdsacc'
.getInverse()));
proteinProduct.addDBRef(proteinToCdsRef);
}
-
/*
* transfer any features on dna that overlap the CDS
*/
List<DBRefEntry> direct = new ArrayList<>();
HashSet<String> directSources = new HashSet<>();
- if (contig.getDBRefs() != null)
+ List<DBRefEntry> refs = contig.getDBRefs();
+ if (refs != null)
{
- for (DBRefEntry dbr : contig.getDBRefs())
+ for (int ib = 0, nb = refs.size(); ib < nb; ib++)
{
- if (dbr.hasMap() && dbr.getMap().getMap().isTripletMap())
+ DBRefEntry dbr = refs.get(ib);
+ MapList map;
+ if (dbr.hasMap() && (map = dbr.getMap().getMap()).isTripletMap())
{
- MapList map = dbr.getMap().getMap();
// check if map is the CDS mapping
if (mapping.getMap().equals(map))
{
}
}
}
- DBRefEntry[] onSource = DBRefUtils.selectRefs(
+ List<DBRefEntry> onSource = DBRefUtils.selectRefs(
proteinProduct.getDBRefs(),
directSources.toArray(new String[0]));
List<DBRefEntry> propagated = new ArrayList<>();
// and generate appropriate mappings
- for (DBRefEntry cdsref : direct)
+ for (int ic = 0, nc = direct.size(); ic < nc; ic++)
{
+ DBRefEntry cdsref = direct.get(ic);
+ Mapping m = cdsref.getMap();
// clone maplist and mapping
MapList cdsposmap = new MapList(
Arrays.asList(new int[][]
{ new int[] { cdsSeq.getStart(), cdsSeq.getEnd() } }),
- cdsref.getMap().getMap().getToRanges(), 3, 1);
- Mapping cdsmap = new Mapping(cdsref.getMap().getTo(),
- cdsref.getMap().getMap());
+ m.getMap().getToRanges(), 3, 1);
+ Mapping cdsmap = new Mapping(m.getTo(), m.getMap());
// create dbref
DBRefEntry newref = new DBRefEntry(cdsref.getSource(),
int phase = 0;
try
{
- phase = Integer.parseInt(sf.getPhase());
+ String s = sf.getPhase();
+ if (s != null)
+ {
+ phase = Integer.parseInt(s);
+ }
} catch (NumberFormatException e)
{
- // ignore
+ // leave as zero
}
/*
* phase > 0 on first codon means 5' incomplete - skip to the start
}
/**
- * Helper method that adds a peptide variant feature. ID and
- * clinical_significance attributes of the dna variant (if present) are copied
- * to the new feature.
- *
- * @param peptide
- * @param peptidePos
- * @param residue
- * @param var
- * @param codon
- * the variant codon e.g. aCg
- * @param canonical
- * the 'normal' codon e.g. aTg
- * @return true if a feature was added, else false
- */
- static boolean addPeptideVariant(SequenceI peptide, int peptidePos,
- String residue, DnaVariant var, String codon, String canonical)
- {
- /*
- * get peptide translation of codon e.g. GAT -> D
- * note that variants which are not single alleles,
- * e.g. multibase variants or HGMD_MUTATION etc
- * are currently ignored here
- */
- String trans = codon.contains("-") ? null
- : (codon.length() > CODON_LENGTH ? null
- : ResidueProperties.codonTranslate(codon));
- if (trans == null)
- {
- return false;
- }
- String desc = canonical + "/" + codon;
- String featureType = "";
- if (trans.equals(residue))
- {
- featureType = SequenceOntologyI.SYNONYMOUS_VARIANT;
- }
- else if (ResidueProperties.STOP.equals(trans))
- {
- featureType = SequenceOntologyI.STOP_GAINED;
- }
- else
- {
- String residue3Char = StringUtils
- .toSentenceCase(ResidueProperties.aa2Triplet.get(residue));
- String trans3Char = StringUtils
- .toSentenceCase(ResidueProperties.aa2Triplet.get(trans));
- desc = "p." + residue3Char + peptidePos + trans3Char;
- featureType = SequenceOntologyI.NONSYNONYMOUS_VARIANT;
- }
- SequenceFeature sf = new SequenceFeature(featureType, desc, peptidePos,
- peptidePos, var.getSource());
-
- StringBuilder attributes = new StringBuilder(32);
- String id = (String) var.variant.getValue(VARIANT_ID);
- if (id != null)
- {
- if (id.startsWith(SEQUENCE_VARIANT))
- {
- id = id.substring(SEQUENCE_VARIANT.length());
- }
- sf.setValue(VARIANT_ID, id);
- attributes.append(VARIANT_ID).append("=").append(id);
- // TODO handle other species variants JAL-2064
- StringBuilder link = new StringBuilder(32);
- try
- {
- link.append(desc).append(" ").append(id).append(
- "|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=")
- .append(URLEncoder.encode(id, "UTF-8"));
- sf.addLink(link.toString());
- } catch (UnsupportedEncodingException e)
- {
- // as if
- }
- }
- String clinSig = (String) var.variant.getValue(CLINICAL_SIGNIFICANCE);
- if (clinSig != null)
- {
- sf.setValue(CLINICAL_SIGNIFICANCE, clinSig);
- attributes.append(";").append(CLINICAL_SIGNIFICANCE).append("=")
- .append(clinSig);
- }
- peptide.addSequenceFeature(sf);
- if (attributes.length() > 0)
- {
- sf.setAttributes(attributes.toString());
- }
- return true;
- }
-
- /**
* Makes an alignment with a copy of the given sequences, adding in any
* non-redundant sequences which are mapped to by the cross-referenced
* sequences.
SequenceIdMatcher matcher = new SequenceIdMatcher(seqs);
if (xrefs != null)
{
- for (SequenceI xref : xrefs)
+ // BH 2019.01.25 recoded to remove iterators
+
+ for (int ix = 0, nx = xrefs.length; ix < nx; ix++)
{
- DBRefEntry[] dbrefs = xref.getDBRefs();
+ SequenceI xref = xrefs[ix];
+ List<DBRefEntry> dbrefs = xref.getDBRefs();
if (dbrefs != null)
{
- for (DBRefEntry dbref : dbrefs)
+ for (int ir = 0, nir = dbrefs.size(); ir < nir; ir++)
{
- if (dbref.getMap() == null || dbref.getMap().getTo() == null
- || dbref.getMap().getTo().isProtein() != isProtein)
+ DBRefEntry dbref = dbrefs.get(ir);
+ Mapping map = dbref.getMap();
+ SequenceI mto;
+ if (map == null || (mto = map.getTo()) == null
+ || mto.isProtein() != isProtein)
{
continue;
}
- SequenceI mappedTo = dbref.getMap().getTo();
+ SequenceI mappedTo = mto;
SequenceI match = matcher.findIdMatch(mappedTo);
if (match == null)
{
{
List<SequenceI> alignedSequences = alignedDatasets
.get(seq.getDatasetSequence());
+ if (alignedSequences.isEmpty())
+ {
+ /*
+ * defensive check - shouldn't happen! (JAL-3536)
+ */
+ continue;
+ }
SequenceI alignedSeq = alignedSequences.get(0);
/*