X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fanalysis%2FDna.java;h=a2b047212b5ea4991cac7adfa145d22b415b08dc;hb=d043ce47fc710d3eb2629ba926a8a7417bd67d8c;hp=810290bffd090cc00398f19fc2c5a39c972586ba;hpb=838e4f91d4a53dd315640dbc9ff6ef7a815ee576;p=jalview.git diff --git a/src/jalview/analysis/Dna.java b/src/jalview/analysis/Dna.java index 810290b..a2b0472 100644 --- a/src/jalview/analysis/Dna.java +++ b/src/jalview/analysis/Dna.java @@ -1,6 +1,6 @@ /* - * Jalview - A Sequence Alignment Editor and Viewer (Version 2.9.0b1) - * Copyright (C) 2015 The Jalview Authors + * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) + * Copyright (C) $$Year-Rel$$ The Jalview Authors * * This file is part of Jalview. * @@ -20,6 +20,12 @@ */ package jalview.analysis; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Comparator; +import java.util.Iterator; +import java.util.List; + import jalview.api.AlignViewportI; import jalview.datamodel.AlignedCodon; import jalview.datamodel.AlignedCodonFrame; @@ -28,7 +34,6 @@ import jalview.datamodel.AlignmentAnnotation; import jalview.datamodel.AlignmentI; import jalview.datamodel.Annotation; import jalview.datamodel.DBRefEntry; -import jalview.datamodel.DBRefSource; import jalview.datamodel.FeatureProperties; import jalview.datamodel.GraphLine; import jalview.datamodel.Mapping; @@ -41,12 +46,6 @@ import jalview.util.DBRefUtils; import jalview.util.MapList; import jalview.util.ShiftList; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Comparator; -import java.util.List; -import java.util.Map; - public class Dna { private static final String STOP_ASTERIX = "*"; @@ -57,19 +56,23 @@ public class Dna * 'final' variables describe the inputs to the translation, which should not * be modified. */ - final private List selection; + private final List selection; + + private final String[] seqstring; + + private final Iterator contigs; - final private String[] seqstring; + private final char gapChar; - final private int[] contigs; + private final AlignmentAnnotation[] annotations; - final private char gapChar; + private final int dnaWidth; - final private AlignmentAnnotation[] annotations; + private final AlignmentI dataset; - final private int dnaWidth; + private ShiftList vismapping; - final private Alignment dataset; + private int[] startcontigs; /* * Working variables for the translation. @@ -92,7 +95,7 @@ public class Dna * @param viewport * @param visibleContigs */ - public Dna(AlignViewportI viewport, int[] visibleContigs) + public Dna(AlignViewportI viewport, Iterator visibleContigs) { this.selection = Arrays.asList(viewport.getSequenceSelection()); this.seqstring = viewport.getViewAsString(true); @@ -101,6 +104,45 @@ public class Dna this.annotations = viewport.getAlignment().getAlignmentAnnotation(); this.dnaWidth = viewport.getAlignment().getWidth(); this.dataset = viewport.getAlignment().getDataset(); + initContigs(); + } + + /** + * Initialise contigs used as starting point for translateCodingRegion + */ + private void initContigs() + { + vismapping = new ShiftList(); // map from viscontigs to seqstring + // intervals + + int npos = 0; + int[] lastregion = null; + ArrayList tempcontigs = new ArrayList<>(); + while (contigs.hasNext()) + { + int[] region = contigs.next(); + if (lastregion == null) + { + vismapping.addShift(npos, region[0]); + } + else + { + // hidden region + vismapping.addShift(npos, region[0] - lastregion[1] + 1); + } + lastregion = region; + tempcontigs.add(region[0]); + tempcontigs.add(region[1]); + } + + startcontigs = new int[tempcontigs.size()]; + int i = 0; + for (Integer val : tempcontigs) + { + startcontigs[i] = val; + i++; + } + tempcontigs = null; } /** @@ -120,7 +162,8 @@ public class Dna * @param ac2 * @return */ - public static final int compareCodonPos(AlignedCodon ac1, AlignedCodon ac2) + public static final int compareCodonPos(AlignedCodon ac1, + AlignedCodon ac2) { return comparator.compare(ac1, ac2); // return jalview_2_8_2compare(ac1, ac2); @@ -134,7 +177,8 @@ public class Dna * @param ac2 * @return */ - private static int jalview_2_8_2compare(AlignedCodon ac1, AlignedCodon ac2) + private static int jalview_2_8_2compare(AlignedCodon ac1, + AlignedCodon ac2) { if (ac1 == null || ac2 == null || (ac1.equals(ac2))) { @@ -150,10 +194,11 @@ public class Dna } /** + * Translates cDNA using the specified code table * * @return */ - public AlignmentI translateCdna() + public AlignmentI translateCdna(GeneticCodeI codeTable) { AlignedCodonFrame acf = new AlignedCodonFrame(); @@ -161,11 +206,11 @@ public class Dna int s; int sSize = selection.size(); - List pepseqs = new ArrayList(); + List pepseqs = new ArrayList<>(); for (s = 0; s < sSize; s++) { SequenceI newseq = translateCodingRegion(selection.get(s), - seqstring[s], acf, pepseqs); + seqstring[s], acf, pepseqs, codeTable); if (newseq != null) { @@ -208,29 +253,30 @@ public class Dna for (int gd = 0; gd < selection.length; gd++) { SequenceI dna = selection[gd]; - DBRefEntry[] dnarefs = DBRefUtils.selectRefs(dna.getDBRef(), + List dnarefs = DBRefUtils.selectRefs(dna.getDBRefs(), jalview.datamodel.DBRefSource.DNACODINGDBS); if (dnarefs != null) { // intersect with pep - List mappedrefs = new ArrayList(); - DBRefEntry[] refs = dna.getDBRef(); - for (int d = 0; d < refs.length; d++) + List mappedrefs = new ArrayList<>(); + List refs = dna.getDBRefs(); + for (int d = 0, nd = refs.size(); d < nd; d++) { - if (refs[d].getMap() != null && refs[d].getMap().getMap() != null - && refs[d].getMap().getMap().getFromRatio() == 3 - && refs[d].getMap().getMap().getToRatio() == 1) + DBRefEntry ref = refs.get(d); + if (ref.getMap() != null && ref.getMap().getMap() != null + && ref.getMap().getMap().getFromRatio() == 3 + && ref.getMap().getMap().getToRatio() == 1) { - mappedrefs.add(refs[d]); // add translated protein maps + mappedrefs.add(ref); // add translated protein maps } } - dnarefs = mappedrefs.toArray(new DBRefEntry[mappedrefs.size()]); - for (int d = 0; d < dnarefs.length; d++) + dnarefs = mappedrefs;// .toArray(new DBRefEntry[mappedrefs.size()]); + for (int d = 0, nd = dnarefs.size(); d < nd; d++) { - Mapping mp = dnarefs[d].getMap(); + Mapping mp = dnarefs.get(d).getMap(); if (mp != null) { - for (int vc = 0; vc < viscontigs.length; vc += 2) + for (int vc = 0, nv = viscontigs.length; vc < nv; vc += 2) { int[] mpr = mp.locateMappedRange(viscontigs[vc], viscontigs[vc + 1]); @@ -385,33 +431,21 @@ public class Dna * @param acf * Definition of global ORF alignment reference frame * @param proteinSeqs + * @param codeTable * @return sequence ready to be added to alignment. */ protected SequenceI translateCodingRegion(SequenceI selection, String seqstring, AlignedCodonFrame acf, - List proteinSeqs) + List proteinSeqs, GeneticCodeI codeTable) { - List skip = new ArrayList(); - int skipint[] = null; - ShiftList vismapping = new ShiftList(); // map from viscontigs to seqstring - // intervals - int vc; - int[] scontigs = new int[contigs.length]; + List skip = new ArrayList<>(); + int[] skipint = null; + int npos = 0; - for (vc = 0; vc < contigs.length; vc += 2) - { - if (vc == 0) - { - vismapping.addShift(npos, contigs[vc]); - } - else - { - // hidden region - vismapping.addShift(npos, contigs[vc] - contigs[vc - 1] + 1); - } - scontigs[vc] = contigs[vc]; - scontigs[vc + 1] = contigs[vc + 1]; - } + int vc = 0; + + int[] scontigs = new int[startcontigs.length]; + System.arraycopy(startcontigs, 0, scontigs, 0, startcontigs.length); // allocate a roughly sized buffer for the protein sequence StringBuilder protein = new StringBuilder(seqstring.length() / 2); @@ -435,8 +469,9 @@ public class Dna /* * Filled up a reading frame... */ - AlignedCodon alignedCodon = new AlignedCodon(cdp[0], cdp[1], cdp[2]); - String aa = ResidueProperties.codonTranslate(new String(codon)); + AlignedCodon alignedCodon = new AlignedCodon(cdp[0], cdp[1], + cdp[2]); + String aa = codeTable.translate(new String(codon)); rf = 0; final String gapString = String.valueOf(gapChar); if (aa == null) @@ -444,10 +479,11 @@ public class Dna aa = gapString; if (skipint == null) { - skipint = new int[] { alignedCodon.pos1, alignedCodon.pos3 /* - * cdp[0], - * cdp[2] - */}; + skipint = new int[] { alignedCodon.pos1, + alignedCodon.pos3 /* + * cdp[0], + * cdp[2] + */ }; } skipint[1] = alignedCodon.pos3; // cdp[2]; } @@ -502,8 +538,8 @@ public class Dna } if (vc + 2 < t.length) { - System.arraycopy(scontigs, vc + 2, t, vc, t.length - - vc + 2); + System.arraycopy(scontigs, vc + 2, t, vc, + t.length - vc + 2); } scontigs = t; } @@ -542,7 +578,7 @@ public class Dna skip.add(skipint); skipint = null; } - if (aa.equals("STOP")) + if (aa.equals(ResidueProperties.STOP)) { aa = STOP_ASTERIX; } @@ -596,9 +632,9 @@ public class Dna } else if (!alignedCodons[aspos].equals(alignedCodon)) { - throw new IllegalStateException("Tried to coalign " - + alignedCodons[aspos].toString() + " with " - + alignedCodon.toString()); + throw new IllegalStateException( + "Tried to coalign " + alignedCodons[aspos].toString() + + " with " + alignedCodon.toString()); } if (aspos >= aaWidth) { @@ -685,7 +721,7 @@ public class Dna */ MapList map = new MapList(scontigs, new int[] { 1, resSize }, 3, 1); - transferCodedFeatures(selection, newseq, map, null, null); + transferCodedFeatures(selection, newseq, map); /* * Construct a dataset sequence for our new peptide. @@ -754,56 +790,215 @@ public class Dna /** * Given a peptide newly translated from a dna sequence, copy over and set any - * features on the peptide from the DNA. If featureTypes is null, all features - * on the dna sequence are searched (rather than just the displayed ones), and - * similarly for featureGroups. + * features on the peptide from the DNA. * * @param dna * @param pep * @param map - * @param featureTypes - * hash whose keys are the displayed feature type strings - * @param featureGroups - * hash where keys are feature groups and values are Boolean objects - * indicating if they are displayed. */ private static void transferCodedFeatures(SequenceI dna, SequenceI pep, - MapList map, Map featureTypes, - Map featureGroups) + MapList map) { - SequenceFeature[] sfs = dna.getSequenceFeatures(); - Boolean fgstate; - DBRefEntry[] dnarefs = DBRefUtils.selectRefs(dna.getDBRef(), - DBRefSource.DNACODINGDBS); - if (dnarefs != null) + // BH 2019.01.25 nop? + // List dnarefs = DBRefUtils.selectRefs(dna.getDBRefs(), + // DBRefSource.DNACODINGDBS); + // if (dnarefs != null) + // { + // // intersect with pep + // for (int d = 0, nd = dnarefs.size(); d < nd; d++) + // { + // Mapping mp = dnarefs.get(d).getMap(); + // if (mp != null) + // { + // } + // } + // } + for (SequenceFeature sf : dna.getFeatures().getAllFeatures()) { - // intersect with pep - for (int d = 0; d < dnarefs.length; d++) + if (FeatureProperties.isCodingFeature(null, sf.getType())) { - Mapping mp = dnarefs[d].getMap(); - if (mp != null) + // if (map.intersectsFrom(sf[f].begin, sf[f].end)) { + } } } - if (sfs != null) + } + + /** + * Returns an alignment consisting of the reversed (and optionally + * complemented) sequences set in this object's constructor + * + * @param complement + * @return + */ + public AlignmentI reverseCdna(boolean complement) + { + int sSize = selection.size(); + List reversed = new ArrayList<>(); + for (int s = 0; s < sSize; s++) { - for (SequenceFeature sf : sfs) + SequenceI newseq = reverseSequence(selection.get(s).getName(), + seqstring[s], complement); + + if (newseq != null) { - fgstate = (featureGroups == null) ? null : featureGroups - .get(sf.featureGroup); - if ((featureTypes == null || featureTypes.containsKey(sf.getType())) - && (fgstate == null || fgstate.booleanValue())) - { - if (FeatureProperties.isCodingFeature(null, sf.getType())) - { - // if (map.intersectsFrom(sf[f].begin, sf[f].end)) - { + reversed.add(newseq); + } + } - } - } - } + SequenceI[] newseqs = reversed.toArray(new SequenceI[reversed.size()]); + AlignmentI al = new Alignment(newseqs); + ((Alignment) al).createDatasetAlignment(); + return al; + } + + /** + * Returns a reversed, and optionally complemented, sequence. The new + * sequence's name is the original name with "|rev" or "|revcomp" appended. + * aAcCgGtT and DNA ambiguity codes are complemented, any other characters are + * left unchanged. + * + * @param seq + * @param complement + * @return + */ + public static SequenceI reverseSequence(String seqName, String sequence, + boolean complement) + { + String newName = seqName + "|rev" + (complement ? "comp" : ""); + char[] originalSequence = sequence.toCharArray(); + int length = originalSequence.length; + char[] reversedSequence = new char[length]; + int bases = 0; + for (int i = 0; i < length; i++) + { + char c = complement ? getComplement(originalSequence[i]) + : originalSequence[i]; + reversedSequence[length - i - 1] = c; + if (!Comparison.isGap(c)) + { + bases++; } } + SequenceI reversed = new Sequence(newName, reversedSequence, 1, bases); + return reversed; + } + + /** + * Answers the reverse complement of the input string + * + * @see #getComplement(char) + * @param s + * @return + */ + public static String reverseComplement(String s) + { + StringBuilder sb = new StringBuilder(s.length()); + for (int i = s.length() - 1; i >= 0; i--) + { + sb.append(Dna.getComplement(s.charAt(i))); + } + return sb.toString(); + } + + /** + * Returns dna complement (preserving case) for aAcCgGtTuU. Ambiguity codes + * are treated as on http://reverse-complement.com/. Anything else is left + * unchanged. + * + * @param c + * @return + */ + public static char getComplement(char c) + { + char result = c; + switch (c) + { + case '-': + case '.': + case ' ': + break; + case 'a': + result = 't'; + break; + case 'A': + result = 'T'; + break; + case 'c': + result = 'g'; + break; + case 'C': + result = 'G'; + break; + case 'g': + result = 'c'; + break; + case 'G': + result = 'C'; + break; + case 't': + result = 'a'; + break; + case 'T': + result = 'A'; + break; + case 'u': + result = 'a'; + break; + case 'U': + result = 'A'; + break; + case 'r': + result = 'y'; + break; + case 'R': + result = 'Y'; + break; + case 'y': + result = 'r'; + break; + case 'Y': + result = 'R'; + break; + case 'k': + result = 'm'; + break; + case 'K': + result = 'M'; + break; + case 'm': + result = 'k'; + break; + case 'M': + result = 'K'; + break; + case 'b': + result = 'v'; + break; + case 'B': + result = 'V'; + break; + case 'v': + result = 'b'; + break; + case 'V': + result = 'B'; + break; + case 'd': + result = 'h'; + break; + case 'D': + result = 'H'; + break; + case 'h': + result = 'd'; + break; + case 'H': + result = 'D'; + break; + } + + return result; } }