X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fanalysis%2FDna.java;h=2ef63d408652e7a47dd94ba1665c768086504d2b;hb=61f1a8b75ea5ce352d6214c34fbdcd58bafbbb73;hp=0e7d73611d63d57642c9dca23ef55d447cbb35e1;hpb=316032bb49f563f07b3cf23f9d97bfceeda21961;p=jalview.git diff --git a/src/jalview/analysis/Dna.java b/src/jalview/analysis/Dna.java index 0e7d736..2ef63d4 100644 --- a/src/jalview/analysis/Dna.java +++ b/src/jalview/analysis/Dna.java @@ -20,7 +20,9 @@ */ package jalview.analysis; +import jalview.api.AlignViewportI; import jalview.bin.Cache; +import jalview.datamodel.AlignedCodon; import jalview.datamodel.AlignedCodonFrame; import jalview.datamodel.Alignment; import jalview.datamodel.AlignmentAnnotation; @@ -42,11 +44,66 @@ import jalview.util.ShiftList; import java.util.ArrayList; import java.util.Arrays; -import java.util.Hashtable; +import java.util.Comparator; import java.util.List; +import java.util.Map; public class Dna { + private static final String STOP_X = "X"; + + private static final Comparator comparator = new CodonComparator(); + + /* + * 'final' variables describe the inputs to the translation, which should not + * be modified. + */ + final private List selection; + + final private String[] seqstring; + + final private int[] contigs; + + final private char gapChar; + + final private AlignmentAnnotation[] annotations; + + final private int dnaWidth; + + final private Alignment dataset; + + /* + * Working variables for the translation. + * + * The width of the translation-in-progress protein alignment. + */ + private int aaWidth = 0; + + /* + * This array will be built up so that position i holds the codon positions + * e.g. [7, 9, 10] that match column i (base 0) in the aligned translation. + * Note this implies a contract that if two codons do not align exactly, their + * translated products must occupy different column positions. + */ + private AlignedCodon[] alignedCodons; + + /** + * Constructor given a viewport and the visible contigs. + * + * @param viewport + * @param visibleContigs + */ + public Dna(AlignViewportI viewport, int[] visibleContigs) + { + this.selection = Arrays.asList(viewport.getSequenceSelection()); + this.seqstring = viewport.getViewAsString(true); + this.contigs = visibleContigs; + this.gapChar = viewport.getGapCharacter(); + this.annotations = viewport.getAlignment().getAlignmentAnnotation(); + this.dnaWidth = viewport.getAlignment().getWidth(); + this.dataset = viewport.getAlignment().getDataset(); + } + /** * Test whether codon positions cdp1 should align before, with, or after cdp2. * Returns zero if all positions match (or either argument is null). Returns @@ -60,19 +117,31 @@ public class Dna *
  • compareCodonPos([3,4,5], [2,5,6]) also returns -1
  • * * - * @param cdp1 - * @param cdp2 + * @param ac1 + * @param ac2 + * @return + */ + public static final int compareCodonPos(AlignedCodon ac1, AlignedCodon ac2) + { + return comparator.compare(ac1, ac2); + // return jalview_2_8_2compare(ac1, ac2); + } + + /** + * Codon comparison up to Jalview 2.8.2. This rule is sequence order dependent + * - see http://issues.jalview.org/browse/JAL-1635 + * + * @param ac1 + * @param ac2 * @return */ - public static int compareCodonPos(int[] cdp1, int[] cdp2) + private static int jalview_2_8_2compare(AlignedCodon ac1, AlignedCodon ac2) { - if (cdp1 == null - || cdp2 == null - || (cdp1[0] == cdp2[0] && cdp1[1] == cdp2[1] && cdp1[2] == cdp2[2])) + if (ac1 == null || ac2 == null || (ac1.equals(ac2))) { return 0; } - if (cdp1[0] < cdp2[0] || cdp1[1] < cdp2[1] || cdp1[2] < cdp2[2]) + if (ac1.pos1 < ac2.pos1 || ac1.pos2 < ac2.pos2 || ac1.pos3 < ac2.pos3) { // one base in cdp1 precedes the corresponding base in the other codon return -1; @@ -82,80 +151,22 @@ public class Dna } /** - * DNA->mapped protein sequence alignment translation given set of sequences - * 1. id distinct coding regions within selected region for each sequence 2. - * generate peptides based on inframe (or given) translation or (optionally - * and where specified) out of frame translations (annotated appropriately) 3. - * align peptides based on codon alignment - */ - /** - * id potential products from dna 1. search for distinct products within - * selected region for each selected sequence 2. group by associated DB type. - * 3. return as form for input into above function - */ - /** - * - */ - /** - * create a new alignment of protein sequences by an inframe translation of - * the provided NA sequences - * - * @param selection - * @param seqstring - * @param viscontigs - * @param gapCharacter - * @param annotations - * @param aWidth - * @param dataset - * destination dataset for translated sequences and mappings - * @return - */ - public static AlignmentI cdnaTranslate(SequenceI[] selection, - String[] seqstring, int viscontigs[], char gapCharacter, - AlignmentAnnotation[] annotations, int aWidth, Alignment dataset) - { - return cdnaTranslate(Arrays.asList(selection), seqstring, null, - viscontigs, gapCharacter, annotations, aWidth, dataset); - } - - /** * - * @param cdnaseqs - * @param seqstring - * @param product - * - array of DbRefEntry objects from which exon map in seqstring is - * derived - * @param viscontigs - * @param gapCharacter - * @param annotations - * @param aWidth - * @param dataset * @return */ - public static AlignmentI cdnaTranslate(List cdnaseqs, - String[] seqstring, DBRefEntry[] product, int viscontigs[], - char gapCharacter, AlignmentAnnotation[] annotations, int aWidth, - Alignment dataset) + public AlignmentI translateCdna() { - AlignedCodonFrame acf = new AlignedCodonFrame(aWidth); + AlignedCodonFrame acf = new AlignedCodonFrame(); - /* - * This array will be built up so that position i holds the codon positions - * e.g. [7, 9, 10] that match column i (base 0) in the aligned translation. - * Note this implies a contract that if two codons do not align exactly, - * their translated products must occupy different column positions. - */ - int[][] alignedCodons = new int[aWidth][]; + alignedCodons = new AlignedCodon[dnaWidth]; int s; - int sSize = cdnaseqs.size(); + int sSize = selection.size(); List pepseqs = new ArrayList(); for (s = 0; s < sSize; s++) { - SequenceI newseq = translateCodingRegion(cdnaseqs.get(s), - seqstring[s], viscontigs, acf, alignedCodons, pepseqs, - gapCharacter, - false); + SequenceI newseq = translateCodingRegion(selection.get(s), + seqstring[s], acf, pepseqs); if (newseq != null) { @@ -178,7 +189,7 @@ public class Dna al.padGaps(); // link the protein translation to the DNA dataset al.setDataset(dataset); - translateAlignedAnnotations(annotations, al, acf, alignedCodons); + translateAlignedAnnotations(al, acf); al.addCodonFrame(acf); return al; } @@ -238,16 +249,14 @@ public class Dna } /** - * Translate na alignment annotations onto translated amino acid alignment al - * using codon mapping codons + * Translate nucleotide alignment annotations onto translated amino acid + * alignment using codon mapping codons * - * @param annotations * @param al - * @param acf + * the translated protein alignment */ - protected static void translateAlignedAnnotations( - AlignmentAnnotation[] annotations, AlignmentI al, - AlignedCodonFrame acf, int[][] codons) + protected void translateAlignedAnnotations(AlignmentI al, + AlignedCodonFrame acf) { // Can only do this for columns with consecutive codons, or where // annotation is sequence associated. @@ -268,7 +277,7 @@ public class Dna continue; } - int aSize = acf.getaaWidth(); // aa alignment width. + int aSize = aaWidth; Annotation[] anots = (annotation.annotations == null) ? null : new Annotation[aSize]; if (anots != null) @@ -276,10 +285,10 @@ public class Dna for (int a = 0; a < aSize; a++) { // process through codon map. - if (a < codons.length && codons[a] != null - && codons[a][0] == (codons[a][2] - 2)) + if (a < alignedCodons.length && alignedCodons[a] != null + && alignedCodons[a].pos1 == (alignedCodons[a].pos3 - 2)) { - anots[a] = getCodonAnnotation(codons[a], + anots[a] = getCodonAnnotation(alignedCodons[a], annotation.annotations); } } @@ -320,26 +329,27 @@ public class Dna } } - private static Annotation getCodonAnnotation(int[] is, + private static Annotation getCodonAnnotation(AlignedCodon is, Annotation[] annotations) { // Have a look at all the codon positions for annotation and put the first // one found into the translated annotation pos. int contrib = 0; Annotation annot = null; - for (int p = 0; p < 3; p++) + for (int p = 1; p <= 3; p++) { - if (annotations[is[p]] != null) + int dnaCol = is.getBaseColumn(p); + if (annotations[dnaCol] != null) { if (annot == null) { - annot = new Annotation(annotations[is[p]]); + annot = new Annotation(annotations[dnaCol]); contrib = 1; } else { // merge with last - Annotation cpy = new Annotation(annotations[is[p]]); + Annotation cpy = new Annotation(annotations[dnaCol]); if (annot.colour == null) { annot.colour = cpy.colour; @@ -375,49 +385,45 @@ public class Dna * sequence displayed under viscontigs visible columns * @param seqstring * ORF read in some global alignment reference frame - * @param viscontigs - * mapping from global reference frame to visible seqstring ORF read * @param acf * Definition of global ORF alignment reference frame - * @param alignedCodons * @param proteinSeqs - * @param gapCharacter - * @param starForStop - * when true stop codons will translate as '*', otherwise as 'X' * @return sequence ready to be added to alignment. */ - protected static SequenceI translateCodingRegion(SequenceI selection, - String seqstring, int[] viscontigs, AlignedCodonFrame acf, - int[][] alignedCodons, List proteinSeqs, - char gapCharacter, - final boolean starForStop) + protected SequenceI translateCodingRegion(SequenceI selection, + String seqstring, AlignedCodonFrame acf, + List proteinSeqs) { List skip = new ArrayList(); int skipint[] = null; ShiftList vismapping = new ShiftList(); // map from viscontigs to seqstring // intervals - int vc, scontigs[] = new int[viscontigs.length]; + int vc; + int[] scontigs = new int[contigs.length]; int npos = 0; - for (vc = 0; vc < viscontigs.length; vc += 2) + for (vc = 0; vc < contigs.length; vc += 2) { if (vc == 0) { - vismapping.addShift(npos, viscontigs[vc]); + vismapping.addShift(npos, contigs[vc]); } else { // hidden region - vismapping.addShift(npos, viscontigs[vc] - viscontigs[vc - 1] + 1); + vismapping.addShift(npos, contigs[vc] - contigs[vc - 1] + 1); } - scontigs[vc] = viscontigs[vc]; - scontigs[vc + 1] = viscontigs[vc + 1]; + scontigs[vc] = contigs[vc]; + scontigs[vc + 1] = contigs[vc + 1]; } // allocate a roughly sized buffer for the protein sequence StringBuilder protein = new StringBuilder(seqstring.length() / 2); String seq = seqstring.replace('U', 'T').replace('u', 'T'); char codon[] = new char[3]; - int cdp[] = new int[3], rf = 0, lastnpos = 0, nend; + int cdp[] = new int[3]; + int rf = 0; + int lastnpos = 0; + int nend; int aspos = 0; int resSize = 0; for (npos = 0, nend = seq.length(); npos < nend; npos++) @@ -432,18 +438,19 @@ public class Dna /* * Filled up a reading frame... */ + AlignedCodon alignedCodon = new AlignedCodon(cdp[0], cdp[1], cdp[2]); String aa = ResidueProperties.codonTranslate(new String(codon)); rf = 0; - final String gapString = String.valueOf(gapCharacter); + final String gapString = String.valueOf(gapChar); if (aa == null) { aa = gapString; if (skipint == null) { skipint = new int[] - { cdp[0], cdp[2] }; + { alignedCodon.pos1, alignedCodon.pos3 /* cdp[0], cdp[2] */}; } - skipint[1] = cdp[2]; + skipint[1] = alignedCodon.pos3; // cdp[2]; } else { @@ -538,31 +545,19 @@ public class Dna } if (aa.equals("STOP")) { - aa = starForStop ? "*" : "X"; + aa = STOP_X; } resSize++; } - // insert gaps prior to this codon - if necessary boolean findpos = true; while (findpos) { - // expand the codons array if necessary - alignedCodons = checkCodonFrameWidth(alignedCodons, aspos); - /* * Compare this codon's base positions with those currently aligned to * this column in the translation. */ - final int compareCodonPos = Dna.compareCodonPos(cdp, + final int compareCodonPos = compareCodonPos(alignedCodon, alignedCodons[aspos]); - // debug - System.out.println(seq + "/" + aa + " codons: " - + Arrays.deepToString(alignedCodons)); - System.out - .println(("Compare " + Arrays.toString(cdp) + " at pos " - + aspos + " with " - + Arrays.toString(alignedCodons[aspos]) + " got " + compareCodonPos)); - // end debug switch (compareCodonPos) { case -1: @@ -571,7 +566,7 @@ public class Dna * This codon should precede the mapped positions - need to insert a * gap in all prior sequences. */ - insertAAGap(aspos, gapCharacter, alignedCodons, proteinSeqs); + insertAAGap(aspos, proteinSeqs); findpos = false; break; @@ -598,21 +593,18 @@ public class Dna if (alignedCodons[aspos] == null) { // mark this column as aligning to this aligned reading frame - alignedCodons[aspos] = new int[] - { cdp[0], cdp[1], cdp[2] }; + alignedCodons[aspos] = alignedCodon; } - else if (!Arrays.equals(alignedCodons[aspos], cdp)) + else if (!alignedCodons[aspos].equals(alignedCodon)) { throw new IllegalStateException("Tried to coalign " - + Arrays.asList(alignedCodons[aspos], cdp)); + + alignedCodons[aspos].toString() + " with " + + alignedCodon.toString()); } - if (aspos >= acf.aaWidth) + if (aspos >= aaWidth) { // update maximum alignment width - // (we can do this without calling checkCodonFrameWidth because it was - // already done above) - System.out.println("aspos " + aspos + " >= " + acf.aaWidth); - acf.setAaWidth(aspos); + aaWidth = aspos; } // ready for next translated reading frame alignment position (if any) aspos++; @@ -727,53 +719,47 @@ public class Dna * Insert a gap into the aligned proteins and the codon mapping array. * * @param pos - * @param gapCharacter - * @param alignedCodons * @param proteinSeqs + * @return */ - protected static void insertAAGap(int pos, char gapCharacter, - int[][] alignedCodons, List proteinSeqs) + protected void insertAAGap(int pos, + List proteinSeqs) { - System.out.println("insertAAGap " + pos + "/" + proteinSeqs.size()); - // aaWidth++; + aaWidth++; for (SequenceI seq : proteinSeqs) { - seq.insertCharAt(pos, gapCharacter); + seq.insertCharAt(pos, gapChar); } - // if (pos < aaWidth) - // { - // aaWidth++; - System.arraycopy(alignedCodons, pos, alignedCodons, pos + 1, - alignedCodons.length - pos - 1); - alignedCodons[pos] = null; // clear so new codon position can be marked. - // } + checkCodonFrameWidth(); + if (pos < aaWidth) + { + aaWidth++; + + /* + * Shift from [pos] to the end one to the right, and null out [pos] + */ + System.arraycopy(alignedCodons, pos, alignedCodons, pos + 1, + alignedCodons.length - pos - 1); + alignedCodons[pos] = null; + } } /** - * Check the codons array is big enough to accommodate the given position, if - * not resize it. - * - * @param alignedCodons - * @param aspos - * @return the resized array (or the original if no resize needed) + * Check the codons array can accommodate a single insertion, if not resize + * it. */ - protected static int[][] checkCodonFrameWidth(int[][] alignedCodons, - int aspos) + protected void checkCodonFrameWidth() { - // TODO why not codons.length < aspos ? - // should codons expand if length is 2 or 3 and aslen==2 ? - if (alignedCodons.length <= aspos + 1) + if (alignedCodons[alignedCodons.length - 1] != null) { - // probably never have to do this ? - int[][] c = new int[alignedCodons.length + 10][]; - for (int i = 0; i < alignedCodons.length; i++) - { - c[i] = alignedCodons[i]; - } - return c; + /* + * arraycopy insertion would bump a filled slot off the end, so expand. + */ + AlignedCodon[] c = new AlignedCodon[alignedCodons.length + 10]; + System.arraycopy(alignedCodons, 0, c, 0, alignedCodons.length); + alignedCodons = c; } - return alignedCodons; } /** @@ -786,15 +772,16 @@ public class Dna * @param pep * @param map * @param featureTypes - * hash who's keys are the displayed feature type strings + * hash whose keys are the displayed feature type strings * @param featureGroups * hash where keys are feature groups and values are Boolean objects * indicating if they are displayed. */ private static void transferCodedFeatures(SequenceI dna, SequenceI pep, - MapList map, Hashtable featureTypes, Hashtable featureGroups) + MapList map, Map featureTypes, + Map featureGroups) { - SequenceFeature[] sf = (dna.getDatasetSequence() != null ? dna + SequenceFeature[] sfs = (dna.getDatasetSequence() != null ? dna .getDatasetSequence() : dna).getSequenceFeatures(); Boolean fgstate; DBRefEntry[] dnarefs = DBRefUtils.selectRefs(dna.getDBRef(), @@ -810,16 +797,16 @@ public class Dna } } } - if (sf != null) + if (sfs != null) { - for (int f = 0; f < sf.length; f++) + for (SequenceFeature sf : sfs) { - fgstate = (featureGroups == null) ? null : ((Boolean) featureGroups - .get(sf[f].featureGroup)); - if ((featureTypes == null || featureTypes.containsKey(sf[f] - .getType())) && (fgstate == null || fgstate.booleanValue())) + fgstate = (featureGroups == null) ? null : featureGroups + .get(sf.featureGroup); + if ((featureTypes == null || featureTypes.containsKey(sf.getType())) + && (fgstate == null || fgstate.booleanValue())) { - if (FeatureProperties.isCodingFeature(null, sf[f].getType())) + if (FeatureProperties.isCodingFeature(null, sf.getType())) { // if (map.intersectsFrom(sf[f].begin, sf[f].end)) {