X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fanalysis%2FAlignmentUtils.java;h=b65096c224f947e86c10e59e8a5f24d7b50ebd46;hb=00918171094ad58563bd0b82e18ecf19537ba132;hp=7b867ac294b9bd85ff5f46f5a363bfc006618a34;hpb=87a85be7fc7678455c298287349b03fdd12fd3ad;p=jalview.git diff --git a/src/jalview/analysis/AlignmentUtils.java b/src/jalview/analysis/AlignmentUtils.java index 7b867ac..b65096c 100644 --- a/src/jalview/analysis/AlignmentUtils.java +++ b/src/jalview/analysis/AlignmentUtils.java @@ -650,15 +650,16 @@ public class AlignmentUtils int toOffset = alignTo.getStart() - 1; int sourceGapMappedLength = 0; boolean inExon = false; - final char[] thisSeq = alignTo.getSequence(); - final char[] thatAligned = alignFrom.getSequence(); - StringBuilder thisAligned = new StringBuilder(2 * thisSeq.length); + final int toLength = alignTo.getLength(); + final int fromLength = alignFrom.getLength(); + StringBuilder thisAligned = new StringBuilder(2 * toLength); /* * Traverse the 'model' aligned sequence */ - for (char sourceChar : thatAligned) + for (int i = 0; i < fromLength; i++) { + char sourceChar = alignFrom.getCharAt(i); if (sourceChar == sourceGap) { sourceGapMappedLength += ratio; @@ -698,9 +699,9 @@ public class AlignmentUtils */ int intronLength = 0; while (basesWritten + toOffset < mappedCodonEnd - && thisSeqPos < thisSeq.length) + && thisSeqPos < toLength) { - final char c = thisSeq[thisSeqPos++]; + final char c = alignTo.getCharAt(thisSeqPos++); if (c != myGapChar) { basesWritten++; @@ -726,7 +727,7 @@ public class AlignmentUtils int gapsToAdd = calculateGapsToInsert(preserveMappedGaps, preserveUnmappedGaps, sourceGapMappedLength, inExon, trailingCopiedGap.length(), intronLength, startOfCodon); - for (int i = 0; i < gapsToAdd; i++) + for (int k = 0; k < gapsToAdd; k++) { thisAligned.append(myGapChar); } @@ -754,9 +755,9 @@ public class AlignmentUtils * At end of model aligned sequence. Copy any remaining target sequence, optionally * including (intron) gaps. */ - while (thisSeqPos < thisSeq.length) + while (thisSeqPos < toLength) { - final char c = thisSeq[thisSeqPos++]; + final char c = alignTo.getCharAt(thisSeqPos++); if (c != myGapChar || preserveUnmappedGaps) { thisAligned.append(c); @@ -946,7 +947,7 @@ public class AlignmentUtils SequenceI peptide = mapping.findAlignedSequence(cdsSeq, protein); if (peptide != null) { - int peptideLength = peptide.getLength(); + final int peptideLength = peptide.getLength(); Mapping map = mapping.getMappingBetween(cdsSeq, peptide); if (map != null) { @@ -955,7 +956,7 @@ public class AlignmentUtils { mapList = mapList.getInverse(); } - int cdsLength = cdsDss.getLength(); + final int cdsLength = cdsDss.getLength(); int mappedFromLength = MappingUtils.getLength(mapList .getFromRanges()); int mappedToLength = MappingUtils @@ -983,14 +984,15 @@ public class AlignmentUtils * walk over the aligned peptide sequence and insert mapped * codons for residues in the aligned cds sequence */ - char[] alignedPeptide = peptide.getSequence(); - char[] nucleotides = cdsDss.getSequence(); int copiedBases = 0; int cdsStart = cdsDss.getStart(); int proteinPos = peptide.getStart() - 1; int cdsCol = 0; - for (char residue : alignedPeptide) + + for (int col = 0; col < peptideLength; col++) { + char residue = peptide.getCharAt(col); + if (Comparison.isGap(residue)) { cdsCol += CODON_LENGTH; @@ -1008,7 +1010,7 @@ public class AlignmentUtils { for (int j = codon[0]; j <= codon[1]; j++) { - char mappedBase = nucleotides[j - cdsStart]; + char mappedBase = cdsDss.getCharAt(j - cdsStart); alignedCds[cdsCol++] = mappedBase; copiedBases++; } @@ -1020,7 +1022,7 @@ public class AlignmentUtils * append stop codon if not mapped from protein, * closing it up to the end of the mapped sequence */ - if (copiedBases == nucleotides.length - CODON_LENGTH) + if (copiedBases == cdsLength - CODON_LENGTH) { for (int i = alignedCds.length - 1; i >= 0; i--) { @@ -1030,9 +1032,9 @@ public class AlignmentUtils break; } } - for (int i = nucleotides.length - CODON_LENGTH; i < nucleotides.length; i++) + for (int i = cdsLength - CODON_LENGTH; i < cdsLength; i++) { - alignedCds[cdsCol++] = nucleotides[i]; + alignedCds[cdsCol++] = cdsDss.getCharAt(i); } } cdsSeq.setSequence(new String(alignedCds)); @@ -1202,21 +1204,26 @@ public class AlignmentUtils List unmappedProtein) { /* - * Prefill aligned sequences with gaps before inserting aligned protein - * residues. + * prefill peptide sequences with gaps */ int alignedWidth = alignedCodons.size(); char[] gaps = new char[alignedWidth]; Arrays.fill(gaps, protein.getGapCharacter()); - String allGaps = String.valueOf(gaps); + Map peptides = new HashMap<>(); for (SequenceI seq : protein.getSequences()) { if (!unmappedProtein.contains(seq)) { - seq.setSequence(allGaps); + peptides.put(seq, Arrays.copyOf(gaps, gaps.length)); } } + /* + * Traverse the codons left to right (as defined by CodonComparator) + * and insert peptides in each column where the sequence is mapped. + * This gives a peptide 'alignment' where residues are aligned if their + * corresponding codons occupy the same columns in the cdna alignment. + */ int column = 0; for (AlignedCodon codon : alignedCodons.keySet()) { @@ -1224,12 +1231,20 @@ public class AlignmentUtils .get(codon); for (Entry entry : columnResidues.entrySet()) { - // place translated codon at its column position in sequence - entry.getKey().getSequence()[column] = entry.getValue().product - .charAt(0); + char residue = entry.getValue().product.charAt(0); + peptides.get(entry.getKey())[column] = residue; } column++; } + + /* + * and finally set the constructed sequences + */ + for (Entry entry : peptides.entrySet()) + { + entry.getKey().setSequence(new String(entry.getValue())); + } + return 0; } @@ -2134,7 +2149,7 @@ public class AlignmentUtils int newBegin = Math.min(mappedTo[0], mappedTo[1]); int newEnd = Math.max(mappedTo[0], mappedTo[1]); SequenceFeature copy = new SequenceFeature(sf, newBegin, newEnd, - sf.getFeatureGroup()); + sf.getFeatureGroup(), sf.getScore()); copyTo.addSequenceFeature(copy); count++; } @@ -2312,24 +2327,6 @@ public class AlignmentUtils count += computePeptideVariants(peptide, peptidePos, codonVariants); } - /* - * sort to get sequence features in start position order - * - would be better to store in Sequence as a TreeSet or NCList? - */ - // if (peptide.getSequenceFeatures() != null) - // { - // Arrays.sort(peptide.getSequenceFeatures(), - // new Comparator() - // { - // @Override - // public int compare(SequenceFeature o1, SequenceFeature o2) - // { - // int c = Integer.compare(o1.getBegin(), o2.getBegin()); - // return c == 0 ? Integer.compare(o1.getEnd(), o2.getEnd()) - // : c; - // } - // }); - // } return count; } @@ -2457,10 +2454,9 @@ public class AlignmentUtils String trans3Char = StringUtils .toSentenceCase(ResidueProperties.aa2Triplet.get(trans)); String desc = "p." + residue3Char + peptidePos + trans3Char; - // set score to 0f so 'graduated colour' option is offered! JAL-2060 SequenceFeature sf = new SequenceFeature( SequenceOntologyI.SEQUENCE_VARIANT, desc, peptidePos, - peptidePos, 0f, var.getSource()); + peptidePos, var.getSource()); StringBuilder attributes = new StringBuilder(32); String id = (String) var.variant.getValue(ID); if (id != null) @@ -2893,9 +2889,7 @@ public class AlignmentUtils .getInverse()); } - char[] fromChars = fromSeq.getSequence(); int toStart = seq.getStart(); - char[] toChars = seq.getSequence(); /* * traverse [start, end, start, end...] ranges in fromSeq @@ -2926,10 +2920,10 @@ public class AlignmentUtils * of the next character of the mapped-to sequence; stop when all * the characters of the range have been counted */ - while (mappedCharPos <= range[1] && fromCol <= fromChars.length + while (mappedCharPos <= range[1] && fromCol <= fromSeq.getLength() && fromCol >= 0) { - if (!Comparison.isGap(fromChars[fromCol - 1])) + if (!Comparison.isGap(fromSeq.getCharAt(fromCol - 1))) { /* * mapped from sequence has a character in this column @@ -2941,7 +2935,7 @@ public class AlignmentUtils seqsMap = new HashMap(); map.put(fromCol, seqsMap); } - seqsMap.put(seq, toChars[mappedCharPos - toStart]); + seqsMap.put(seq, seq.getCharAt(mappedCharPos - toStart)); mappedCharPos++; } fromCol += (forward ? 1 : -1);