X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fanalysis%2FAlignmentUtils.java;h=4bb3fde0affe8c47e19bc76c33c64bb69677838d;hb=57dd16688caa6dacaeaf465bab3ee8b6126e1a51;hp=3af4913dabeadb13dafa4dc9d98962ed37215d15;hpb=134057b8c18c9be3c77d830b96c396885db32c6c;p=jalview.git diff --git a/src/jalview/analysis/AlignmentUtils.java b/src/jalview/analysis/AlignmentUtils.java index 3af4913..4bb3fde 100644 --- a/src/jalview/analysis/AlignmentUtils.java +++ b/src/jalview/analysis/AlignmentUtils.java @@ -1,6 +1,6 @@ /* - * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) - * Copyright (C) $$Year-Rel$$ The Jalview Authors + * Jalview - A Sequence Alignment Editor and Viewer (Version 2.9) + * Copyright (C) 2015 The Jalview Authors * * This file is part of Jalview. * @@ -228,9 +228,8 @@ public class AlignmentUtils * @param cdnaAlignment * @return */ - public static boolean mapProteinToCdna( - final AlignmentI proteinAlignment, - final AlignmentI cdnaAlignment) + public static boolean mapProteinAlignmentToCdna( + final AlignmentI proteinAlignment, final AlignmentI cdnaAlignment) { if (proteinAlignment == null || cdnaAlignment == null) { @@ -276,7 +275,7 @@ public class AlignmentUtils final AlignmentI cdnaAlignment, Set mappedDna, Set mappedProtein, boolean xrefsOnly) { - boolean mappingPerformed = false; + boolean mappingExistsOrAdded = false; List thisSeqs = proteinAlignment.getSequences(); for (SequenceI aaSeq : thisSeqs) { @@ -309,14 +308,18 @@ public class AlignmentUtils { continue; } - if (!mappingExists(proteinAlignment.getCodonFrames(), + if (mappingExists(proteinAlignment.getCodonFrames(), aaSeq.getDatasetSequence(), cdnaSeq.getDatasetSequence())) { - MapList map = mapProteinToCdna(aaSeq, cdnaSeq); + mappingExistsOrAdded = true; + } + else + { + MapList map = mapProteinSequenceToCdna(aaSeq, cdnaSeq); if (map != null) { acf.addMap(cdnaSeq, aaSeq, map); - mappingPerformed = true; + mappingExistsOrAdded = true; proteinMapped = true; mappedDna.add(cdnaSeq); mappedProtein.add(aaSeq); @@ -328,7 +331,7 @@ public class AlignmentUtils proteinAlignment.addCodonFrame(acf); } } - return mappingPerformed; + return mappingExistsOrAdded; } /** @@ -361,7 +364,7 @@ public class AlignmentUtils * @param cdnaSeq * @return */ - public static MapList mapProteinToCdna(SequenceI proteinSeq, + public static MapList mapProteinSequenceToCdna(SequenceI proteinSeq, SequenceI cdnaSeq) { /* @@ -385,10 +388,10 @@ public class AlignmentUtils */ final int mappedLength = 3 * aaSeqChars.length; int cdnaLength = cdnaSeqChars.length; - int cdnaStart = 1; - int cdnaEnd = cdnaLength; - final int proteinStart = 1; - final int proteinEnd = aaSeqChars.length; + int cdnaStart = cdnaSeq.getStart(); + int cdnaEnd = cdnaSeq.getEnd(); + final int proteinStart = proteinSeq.getStart(); + final int proteinEnd = proteinSeq.getEnd(); /* * If lengths don't match, try ignoring stop codon. @@ -411,12 +414,13 @@ public class AlignmentUtils /* * If lengths still don't match, try ignoring start codon. */ + int startOffset = 0; if (cdnaLength != mappedLength && cdnaLength > 2 && String.valueOf(cdnaSeqChars, 0, 3).toUpperCase() - .equals( - ResidueProperties.START)) + .equals(ResidueProperties.START)) { + startOffset += 3; cdnaStart += 3; cdnaLength -= 3; } @@ -425,13 +429,12 @@ public class AlignmentUtils { return null; } - if (!translatesAs(cdnaSeqChars, cdnaStart - 1, aaSeqChars)) + if (!translatesAs(cdnaSeqChars, startOffset, aaSeqChars)) { return null; } - MapList map = new MapList(new int[] - { cdnaStart, cdnaEnd }, new int[] - { proteinStart, proteinEnd }, 3, 1); + MapList map = new MapList(new int[] { cdnaStart, cdnaEnd }, new int[] { + proteinStart, proteinEnd }, 3, 1); return map; } @@ -458,8 +461,7 @@ public class AlignmentUtils && aaResidue < aaSeqChars.length; i += 3, aaResidue++) { String codon = String.valueOf(cdnaSeqChars, i, 3); - final String translated = ResidueProperties.codonTranslate( - codon); + final String translated = ResidueProperties.codonTranslate(codon); /* * allow * in protein to match untranslatable in dna */ @@ -468,8 +470,7 @@ public class AlignmentUtils { continue; } - if (translated == null - || !(aaRes == translated.charAt(0))) + if (translated == null || !(aaRes == translated.charAt(0))) { // debug // System.out.println(("Mismatch at " + i + "/" + aaResidue + ": " @@ -500,7 +501,8 @@ public class AlignmentUtils boolean preserveUnmappedGaps) { /* - * Get any mappings from the source alignment to the target (dataset) sequence. + * Get any mappings from the source alignment to the target (dataset) + * sequence. */ // TODO there may be one AlignedCodonFrame per dataset sequence, or one with // all mappings. Would it help to constrain this? @@ -509,7 +511,7 @@ public class AlignmentUtils { return false; } - + /* * Locate the aligned source sequence whose dataset sequence is mapped. We * just take the first match here (as we can't align cDNA like more than one @@ -526,7 +528,7 @@ public class AlignmentUtils break; } } - + if (alignFrom == null) { return false; @@ -551,15 +553,15 @@ public class AlignmentUtils * @param preserveMappedGaps */ public static void alignSequenceAs(SequenceI alignTo, - SequenceI alignFrom, - AlignedCodonFrame mapping, String myGap, char sourceGap, - boolean preserveMappedGaps, boolean preserveUnmappedGaps) + SequenceI alignFrom, AlignedCodonFrame mapping, String myGap, + char sourceGap, boolean preserveMappedGaps, + boolean preserveUnmappedGaps) { // TODO generalise to work for Protein-Protein, dna-dna, dna-protein final char[] thisSeq = alignTo.getSequence(); final char[] thatAligned = alignFrom.getSequence(); StringBuilder thisAligned = new StringBuilder(2 * thisSeq.length); - + // aligned and dataset sequence positions, all base zero int thisSeqPos = 0; int sourceDsPos = 0; @@ -571,6 +573,8 @@ public class AlignmentUtils /* * Traverse the aligned protein sequence. */ + int fromOffset = alignFrom.getStart() - 1; + int toOffset = alignTo.getStart() - 1; int sourceGapMappedLength = 0; boolean inExon = false; for (char sourceChar : thatAligned) @@ -587,7 +591,7 @@ public class AlignmentUtils sourceDsPos++; // Note mapping positions are base 1, our sequence positions base 0 int[] mappedPos = mapping.getMappedRegion(alignTo, alignFrom, - sourceDsPos); + sourceDsPos + fromOffset); if (mappedPos == null) { /* @@ -611,14 +615,15 @@ public class AlignmentUtils * But then 'align dna as protein' doesn't make much sense otherwise. */ int intronLength = 0; - while (basesWritten < mappedCodonEnd && thisSeqPos < thisSeq.length) + while (basesWritten + toOffset < mappedCodonEnd + && thisSeqPos < thisSeq.length) { final char c = thisSeq[thisSeqPos++]; if (c != myGapChar) { basesWritten++; - - if (basesWritten < mappedCodonStart) + int sourcePosition = basesWritten + toOffset; + if (sourcePosition < mappedCodonStart) { /* * Found an unmapped (intron) base. First add in any preceding gaps @@ -635,7 +640,7 @@ public class AlignmentUtils } else { - final boolean startOfCodon = basesWritten == mappedCodonStart; + final boolean startOfCodon = sourcePosition == mappedCodonStart; int gapsToAdd = calculateGapsToInsert(preserveMappedGaps, preserveUnmappedGaps, sourceGapMappedLength, inExon, trailingCopiedGap.length(), intronLength, startOfCodon); @@ -696,8 +701,8 @@ public class AlignmentUtils */ protected static int calculateGapsToInsert(boolean preserveMappedGaps, boolean preserveUnmappedGaps, int sourceGapMappedLength, - boolean inExon, int trailingGapLength, - int intronLength, final boolean startOfCodon) + boolean inExon, int trailingGapLength, int intronLength, + final boolean startOfCodon) { int gapsToAdd = 0; if (startOfCodon) @@ -821,8 +826,8 @@ public class AlignmentUtils // mapping is from protein to nucleotide toDna = true; // should ideally get gap count ratio from mapping - gap = String.valueOf(new char[] - { gapCharacter, gapCharacter, gapCharacter }); + gap = String.valueOf(new char[] { gapCharacter, gapCharacter, + gapCharacter }); } else { @@ -965,9 +970,9 @@ public class AlignmentUtils int column = 0; for (AlignedCodon codon : alignedCodons.keySet()) { - final Map columnResidues = alignedCodons.get(codon); - for (Entry entry : columnResidues - .entrySet()) + final Map columnResidues = alignedCodons + .get(codon); + for (Entry entry : columnResidues.entrySet()) { // place translated codon at its column position in sequence entry.getKey().getSequence()[column] = entry.getValue().charAt(0); @@ -994,8 +999,7 @@ public class AlignmentUtils * the map we are building up */ static void addCodonPositions(SequenceI dna, SequenceI protein, - char gapChar, - Mapping seqMap, + char gapChar, Mapping seqMap, Map> alignedCodons) { Iterator codons = seqMap.getCodonIterator(dna, gapChar); @@ -1068,23 +1072,25 @@ public class AlignmentUtils * @return */ protected static boolean isMappable(SequenceI dnaSeq, - SequenceI proteinSeq, - Set mappings) + SequenceI proteinSeq, Set mappings) { if (dnaSeq == null || proteinSeq == null) { return false; } - SequenceI dnaDs = dnaSeq.getDatasetSequence() == null ? dnaSeq : dnaSeq.getDatasetSequence(); + SequenceI dnaDs = dnaSeq.getDatasetSequence() == null ? dnaSeq : dnaSeq + .getDatasetSequence(); SequenceI proteinDs = proteinSeq.getDatasetSequence() == null ? proteinSeq : proteinSeq.getDatasetSequence(); - + /* * Already mapped? */ - for (AlignedCodonFrame mapping : mappings) { - if ( proteinDs == mapping.getAaForDnaSeq(dnaDs)) { + for (AlignedCodonFrame mapping : mappings) + { + if (proteinDs == mapping.getAaForDnaSeq(dnaDs)) + { return true; } } @@ -1093,7 +1099,7 @@ public class AlignmentUtils * Just try to make a mapping (it is not yet stored), test whether * successful. */ - return mapProteinToCdna(proteinDs, dnaDs) != null; + return mapProteinSequenceToCdna(proteinDs, dnaDs) != null; } /** @@ -1112,7 +1118,8 @@ public class AlignmentUtils * the alignment to check for presence of annotations */ public static void findAddableReferenceAnnotations( - List sequenceScope, Map labelForCalcId, + List sequenceScope, + Map labelForCalcId, final Map> candidates, AlignmentI al) { @@ -1120,7 +1127,7 @@ public class AlignmentUtils { return; } - + /* * For each sequence in scope, make a list of any annotations on the * underlying dataset sequence which are not already on the alignment. @@ -1148,8 +1155,7 @@ public class AlignmentUtils * sequence. */ final Iterable matchedAlignmentAnnotations = al - .findAnnotations(seq, dsann.getCalcId(), - dsann.label); + .findAnnotations(seq, dsann.getCalcId(), dsann.label); if (!matchedAlignmentAnnotations.iterator().hasNext()) { result.add(dsann); @@ -1197,7 +1203,7 @@ public class AlignmentUtils endRes = selectionGroup.getEndRes(); } copyAnn.restrict(startRes, endRes); - + /* * Add to the sequence (sets copyAnn.datasetSequence), unless the * original annotation is already on the sequence. @@ -1235,8 +1241,7 @@ public class AlignmentUtils Collection types, List forSequences, boolean anyType, boolean doShow) { - for (AlignmentAnnotation aa : al - .getAlignmentAnnotation()) + for (AlignmentAnnotation aa : al.getAlignmentAnnotation()) { if (anyType || types.contains(aa.label)) { @@ -1311,7 +1316,7 @@ public class AlignmentUtils { Set newMappings = new LinkedHashSet(); List exonSequences = new ArrayList(); - + for (SequenceI dnaSeq : dna) { final SequenceI ds = dnaSeq.getDatasetSequence(); @@ -1414,11 +1419,9 @@ public class AlignmentUtils * contiguous exons */ List exonRange = new ArrayList(); - exonRange.add(new int[] - { 1, newSequence.length() }); + exonRange.add(new int[] { 1, newSequence.length() }); MapList map = new MapList(exonRange, seqMapping.getMap() - .getToRanges(), - 3, 1); + .getToRanges(), 3, 1); newMapping.addMap(exon.getDatasetSequence(), seqMapping.getTo(), map); MapList cdsToDnaMap = new MapList(dnaExonRanges, exonRange, 1, 1); newMapping.addMap(dnaSeq, exon.getDatasetSequence(), cdsToDnaMap);