X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fanalysis%2FAlignmentUtils.java;h=2946ba2416922a157fdcebf25164c07f9b77c24a;hb=80634498762666e6acc92368716bf1a4d4f42f7b;hp=aa1a98b98dd4a91d7f2811e89da76b9997b29a09;hpb=241bd0223b016b5ad5ec78520310a8de32842722;p=jalview.git diff --git a/src/jalview/analysis/AlignmentUtils.java b/src/jalview/analysis/AlignmentUtils.java index aa1a98b..2946ba2 100644 --- a/src/jalview/analysis/AlignmentUtils.java +++ b/src/jalview/analysis/AlignmentUtils.java @@ -74,12 +74,15 @@ import java.util.TreeMap; */ public class AlignmentUtils { - private static final int CODON_LENGTH = 3; private static final String SEQUENCE_VARIANT = "sequence_variant:"; - private static final String ID = "ID"; + /* + * the 'id' attribute is provided for variant features fetched from + * Ensembl using its REST service with JSON format + */ + public static final String VARIANT_ID = "id"; /** * A data model to hold the 'normal' base value at a position, and an optional @@ -1599,11 +1602,12 @@ public class AlignmentUtils return false; } String name = seq2.getName(); - final DBRefEntry[] xrefs = seq1.getDBRefs(); + final List xrefs = seq1.getDBRefs(); if (xrefs != null) { - for (DBRefEntry xref : xrefs) + for (int ix = 0, nx = xrefs.size(); ix < nx; ix++) { + DBRefEntry xref = xrefs.get(ix); String xrefName = xref.getSource() + "|" + xref.getAccessionId(); // case-insensitive test, consistent with DBRefEntry.equalRef() if (xrefName.equalsIgnoreCase(name)) @@ -1799,8 +1803,10 @@ public class AlignmentUtils // need to // synthesize an xref. - for (DBRefEntry primRef : dnaDss.getPrimaryDBRefs()) + List primrefs = dnaDss.getPrimaryDBRefs(); + for (int ip = 0, np = primrefs.size(); ip < np; ip++) { + DBRefEntry primRef = primrefs.get(ip); /* * create a cross-reference from CDS to the source sequence's * primary reference and vice versa @@ -1814,7 +1820,6 @@ public class AlignmentUtils dnaSeq.addDBRef(new DBRefEntry(source, version, cdsSeq .getName(), new Mapping(cdsSeqDss, dnaToCdsMap))); - // problem here is that the cross-reference is synthesized - // cdsSeq.getName() may be like 'CDS|dnaaccession' or // 'CDS|emblcdsacc' @@ -1827,7 +1832,6 @@ public class AlignmentUtils .getInverse())); proteinProduct.addDBRef(proteinToCdsRef); } - /* * transfer any features on dna that overlap the CDS */ @@ -1888,7 +1892,7 @@ public class AlignmentUtils * @param seqMappings * the set of mappings involving dnaSeq * @param aMapping - * an initial candidate from seqMappings + * a transcript-to-peptide mapping * @return */ static SequenceI findCdsForProtein(List mappings, @@ -1913,7 +1917,15 @@ public class AlignmentUtils if (mappedFromLength == dnaLength || mappedFromLength == dnaLength - CODON_LENGTH) { - return seqDss; + /* + * if sequence has CDS features, this is a transcript with no UTR + * - do not take this as the CDS sequence! (JAL-2789) + */ + if (seqDss.getFeatures().getFeaturesByOntology(SequenceOntologyI.CDS) + .isEmpty()) + { + return seqDss; + } } /* @@ -1938,10 +1950,12 @@ public class AlignmentUtils { /* * found a 3:1 mapping to the protein product which covers - * the whole dna sequence i.e. is from CDS; finally check it - * is from the dna start sequence + * the whole dna sequence i.e. is from CDS; finally check the CDS + * is mapped from the given dna start sequence */ SequenceI cdsSeq = map.getFromSeq(); + // todo this test is weak if seqMappings contains multiple mappings; + // we get away with it if transcript:cds relationship is 1:1 List dnaToCdsMaps = MappingUtils .findMappingsForSequence(cdsSeq, seqMappings); if (!dnaToCdsMaps.isEmpty()) @@ -2056,16 +2070,20 @@ public class AlignmentUtils protected static List propagateDBRefsToCDS(SequenceI cdsSeq, SequenceI contig, SequenceI proteinProduct, Mapping mapping) { + // gather direct refs from contig congruent with mapping List direct = new ArrayList<>(); HashSet directSources = new HashSet<>(); - if (contig.getDBRefs() != null) + + List refs = contig.getDBRefs(); + if (refs != null) { - for (DBRefEntry dbr : contig.getDBRefs()) + for (int ib = 0, nb = refs.size(); ib < nb; ib++) { - if (dbr.hasMap() && dbr.getMap().getMap().isTripletMap()) + DBRefEntry dbr = refs.get(ib); + MapList map; + if (dbr.hasMap() && (map = dbr.getMap().getMap()).isTripletMap()) { - MapList map = dbr.getMap().getMap(); // check if map is the CDS mapping if (mapping.getMap().equals(map)) { @@ -2075,21 +2093,22 @@ public class AlignmentUtils } } } - DBRefEntry[] onSource = DBRefUtils.selectRefs( + List onSource = DBRefUtils.selectRefs( proteinProduct.getDBRefs(), directSources.toArray(new String[0])); List propagated = new ArrayList<>(); // and generate appropriate mappings - for (DBRefEntry cdsref : direct) + for (int ic = 0, nc = direct.size(); ic < nc; ic++) { + DBRefEntry cdsref = direct.get(ic); + Mapping m = cdsref.getMap(); // clone maplist and mapping MapList cdsposmap = new MapList( Arrays.asList(new int[][] { new int[] { cdsSeq.getStart(), cdsSeq.getEnd() } }), - cdsref.getMap().getMap().getToRanges(), 3, 1); - Mapping cdsmap = new Mapping(cdsref.getMap().getTo(), - cdsref.getMap().getMap()); + m.getMap().getToRanges(), 3, 1); + Mapping cdsmap = new Mapping(m.getTo(),m.getMap()); // create dbref DBRefEntry newref = new DBRefEntry(cdsref.getSource(), @@ -2235,12 +2254,13 @@ public class AlignmentUtils int mappedDnaLength = MappingUtils.getLength(ranges); /* - * if not a whole number of codons, something is wrong, - * abort mapping + * if not a whole number of codons, truncate mapping */ - if (mappedDnaLength % CODON_LENGTH > 0) + int codonRemainder = mappedDnaLength % CODON_LENGTH; + if (codonRemainder > 0) { - return null; + mappedDnaLength -= codonRemainder; + MappingUtils.removeEndPositions(codonRemainder, ranges); } int proteinLength = proteinSeq.getLength(); @@ -2307,10 +2327,14 @@ public class AlignmentUtils int phase = 0; try { - phase = Integer.parseInt(sf.getPhase()); + String s = sf.getPhase(); + if (s != null) + { + phase = Integer.parseInt(s); + } } catch (NumberFormatException e) { - // ignore + // SwingJS -- need to avoid these. } /* * phase > 0 on first codon means 5' incomplete - skip to the start @@ -2421,20 +2445,23 @@ public class AlignmentUtils /* * variants in first codon base */ - for (DnaVariant var : codonVariants[0]) + for (DnaVariant dnavar : codonVariants[0]) { - if (var.variant != null) + if (dnavar.variant != null) { - String alleles = (String) var.variant.getValue(Gff3Helper.ALLELES); + String alleles = (String) dnavar.variant.getValue(Gff3Helper.ALLELES); if (alleles != null) { for (String base : alleles.split(",")) { - if (!base1.equals(base)) + if (!base1.equalsIgnoreCase(base)) { - String codon = base + base2 + base3; - if (addPeptideVariant(peptide, peptidePos, residue, var, - codon)) + String codon = base.toUpperCase() + base2.toLowerCase() + + base3.toLowerCase(); + String canonical = base1.toUpperCase() + base2.toLowerCase() + + base3.toLowerCase(); + if (addPeptideVariant(peptide, peptidePos, residue, dnavar, + codon, canonical)) { count++; } @@ -2456,11 +2483,14 @@ public class AlignmentUtils { for (String base : alleles.split(",")) { - if (!base2.equals(base)) + if (!base2.equalsIgnoreCase(base)) { - String codon = base1 + base + base3; + String codon = base1.toLowerCase() + base.toUpperCase() + + base3.toLowerCase(); + String canonical = base1.toLowerCase() + base2.toUpperCase() + + base3.toLowerCase(); if (addPeptideVariant(peptide, peptidePos, residue, var, - codon)) + codon, canonical)) { count++; } @@ -2482,11 +2512,14 @@ public class AlignmentUtils { for (String base : alleles.split(",")) { - if (!base3.equals(base)) + if (!base3.equalsIgnoreCase(base)) { - String codon = base1 + base2 + base; + String codon = base1.toLowerCase() + base2.toLowerCase() + + base.toUpperCase(); + String canonical = base1.toLowerCase() + base2.toLowerCase() + + base3.toUpperCase(); if (addPeptideVariant(peptide, peptidePos, residue, var, - codon)) + codon, canonical)) { count++; } @@ -2509,10 +2542,13 @@ public class AlignmentUtils * @param residue * @param var * @param codon + * the variant codon e.g. aCg + * @param canonical + * the 'normal' codon e.g. aTg * @return true if a feature was added, else false */ static boolean addPeptideVariant(SequenceI peptide, int peptidePos, - String residue, DnaVariant var, String codon) + String residue, DnaVariant var, String codon, String canonical) { /* * get peptide translation of codon e.g. GAT -> D @@ -2527,7 +2563,7 @@ public class AlignmentUtils { return false; } - String desc = codon; + String desc = canonical + "/" + codon; String featureType = ""; if (trans.equals(residue)) { @@ -2550,15 +2586,15 @@ public class AlignmentUtils peptidePos, var.getSource()); StringBuilder attributes = new StringBuilder(32); - String id = (String) var.variant.getValue(ID); + String id = (String) var.variant.getValue(VARIANT_ID); if (id != null) { if (id.startsWith(SEQUENCE_VARIANT)) { id = id.substring(SEQUENCE_VARIANT.length()); } - sf.setValue(ID, id); - attributes.append(ID).append("=").append(id); + sf.setValue(VARIANT_ID, id); + attributes.append(VARIANT_ID).append("=").append(id); // TODO handle other species variants JAL-2064 StringBuilder link = new StringBuilder(32); try @@ -2738,19 +2774,25 @@ public class AlignmentUtils SequenceIdMatcher matcher = new SequenceIdMatcher(seqs); if (xrefs != null) { - for (SequenceI xref : xrefs) + // BH 2019.01.25 streamlined this triply nested loop to remove all iterators + + for (int ix = 0, nx = xrefs.length; ix < nx; ix++) { - DBRefEntry[] dbrefs = xref.getDBRefs(); + SequenceI xref = xrefs[ix]; + List dbrefs = xref.getDBRefs(); if (dbrefs != null) { - for (DBRefEntry dbref : dbrefs) + for (int ir = 0, nir = dbrefs.size(); ir < nir; ir++) { - if (dbref.getMap() == null || dbref.getMap().getTo() == null - || dbref.getMap().getTo().isProtein() != isProtein) + DBRefEntry dbref = dbrefs.get(ir); + Mapping map = dbref.getMap(); + SequenceI mto; + if (map == null || (mto = map.getTo()) == null + || mto.isProtein() != isProtein) { continue; } - SequenceI mappedTo = dbref.getMap().getTo(); + SequenceI mappedTo = mto; SequenceI match = matcher.findIdMatch(mappedTo); if (match == null) {