X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fanalysis%2FAlignmentUtils.java;h=f9d4c081a59a16c0d9a176fe155020bfc09491c9;hb=27f5f6fdfea52770ebd513c4607a6dde87821d48;hp=7dec4ec40a399d7f9e7f5320a2c6e6a96104474f;hpb=e29e164306d88242fe615973eeebc8c4a49d1bb0;p=jalview.git diff --git a/src/jalview/analysis/AlignmentUtils.java b/src/jalview/analysis/AlignmentUtils.java index 7dec4ec..f9d4c08 100644 --- a/src/jalview/analysis/AlignmentUtils.java +++ b/src/jalview/analysis/AlignmentUtils.java @@ -20,19 +20,6 @@ */ package jalview.analysis; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; -import java.util.Set; -import java.util.TreeMap; - import jalview.datamodel.AlignedCodon; import jalview.datamodel.AlignedCodonFrame; import jalview.datamodel.Alignment; @@ -51,6 +38,20 @@ import jalview.util.DBRefUtils; import jalview.util.MapList; import jalview.util.MappingUtils; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Set; +import java.util.TreeMap; + /** * grab bag of useful alignment manipulation operations Expect these to be * refactored elsewhere at some point. @@ -76,18 +77,22 @@ public class AlignmentUtils for (SequenceI s : core.getSequences()) { SequenceI newSeq = s.deriveSequence(); - if (newSeq.getStart() > maxoffset + final int newSeqStart = newSeq.getStart() - 1; + if (newSeqStart > maxoffset && newSeq.getDatasetSequence().getStart() < s.getStart()) { - maxoffset = newSeq.getStart(); + maxoffset = newSeqStart; } sq.add(newSeq); } if (flankSize > -1) { - maxoffset = flankSize; + maxoffset = Math.min(maxoffset, flankSize); } - // now add offset to create a new expanded alignment + + /* + * now add offset left and right to create an expanded alignment + */ for (SequenceI s : sq) { SequenceI ds = s; @@ -97,8 +102,8 @@ public class AlignmentUtils } int s_end = s.findPosition(s.getStart() + s.getLength()); // find available flanking residues for sequence - int ustream_ds = s.getStart() - ds.getStart(), dstream_ds = ds - .getEnd() - s_end; + int ustream_ds = s.getStart() - ds.getStart(); + int dstream_ds = ds.getEnd() - s_end; // build new flanked sequence @@ -114,27 +119,27 @@ public class AlignmentUtils offset = maxoffset - flankSize; ustream_ds = flankSize; } - if (flankSize < dstream_ds) + if (flankSize <= dstream_ds) { - dstream_ds = flankSize; + dstream_ds = flankSize - 1; } } + // TODO use Character.toLowerCase to avoid creating String objects? char[] upstream = new String(ds.getSequence(s.getStart() - 1 - ustream_ds, s.getStart() - 1)).toLowerCase().toCharArray(); - char[] downstream = new String(ds.getSequence(s_end - 1, s_end + 1 + char[] downstream = new String(ds.getSequence(s_end - 1, s_end + dstream_ds)).toLowerCase().toCharArray(); char[] coreseq = s.getSequence(); char[] nseq = new char[offset + upstream.length + downstream.length + coreseq.length]; char c = core.getGapCharacter(); - // TODO could lowercase the flanking regions + int p = 0; for (; p < offset; p++) { nseq[p] = c; } - // s.setSequence(new String(upstream).toLowerCase()+new String(coreseq) + - // new String(downstream).toLowerCase()); + System.arraycopy(upstream, 0, nseq, p, upstream.length); System.arraycopy(coreseq, 0, nseq, p + upstream.length, coreseq.length); @@ -152,6 +157,7 @@ public class AlignmentUtils { for (AlignmentAnnotation aa : s.getAnnotation()) { + aa.adjustForAlignment(); // JAL-1712 fix newAl.addAnnotation(aa); } } @@ -442,6 +448,11 @@ public class AlignmentUtils protected static boolean translatesAs(char[] cdnaSeqChars, int cdnaStart, char[] aaSeqChars) { + if (cdnaSeqChars == null || aaSeqChars == null) + { + return false; + } + int aaResidue = 0; for (int i = cdnaStart; i < cdnaSeqChars.length - 2 && aaResidue < aaSeqChars.length; i += 3, aaResidue++) @@ -856,7 +867,7 @@ public class AlignmentUtils { mapping.markMappedRegion(seq, pos, sr); } - newseq.append(sr.toString()); + newseq.append(sr.getCharacters()); if (first) { first = false; @@ -1010,6 +1021,11 @@ public class AlignmentUtils */ public static boolean isMappable(AlignmentI al1, AlignmentI al2) { + if (al1 == null || al2 == null) + { + return false; + } + /* * Require one nucleotide and one protein */ @@ -1042,9 +1058,15 @@ public class AlignmentUtils * @param mappings * @return */ - public static boolean isMappable(SequenceI dnaSeq, SequenceI proteinSeq, + protected static boolean isMappable(SequenceI dnaSeq, + SequenceI proteinSeq, Set mappings) { + if (dnaSeq == null || proteinSeq == null) + { + return false; + } + SequenceI dnaDs = dnaSeq.getDatasetSequence() == null ? dnaSeq : dnaSeq.getDatasetSequence(); SequenceI proteinDs = proteinSeq.getDatasetSequence() == null ? proteinSeq : proteinSeq.getDatasetSequence(); @@ -1235,7 +1257,7 @@ public class AlignmentUtils /** * Returns true if seq1 has a cross-reference to seq2. Currently this assumes - * that sequence name is structured as Source|AccessId. + * that sequence name is structured as Source|AccessionId. * * @param seq1 * @param seq2 @@ -1278,7 +1300,7 @@ public class AlignmentUtils public static AlignmentI makeExonAlignment(SequenceI[] dna, Set mappings) { - Set newMappings = new HashSet(); + Set newMappings = new LinkedHashSet(); List exonSequences = new ArrayList(); for (SequenceI dnaSeq : dna) @@ -1346,8 +1368,8 @@ public class AlignmentUtils /* * Get the codon regions as { [2, 5], [7, 12], [14, 14] etc } */ - List exonRanges = seqMapping.getMap().getFromRanges(); - for (int[] range : exonRanges) + final List dnaExonRanges = seqMapping.getMap().getFromRanges(); + for (int[] range : dnaExonRanges) { for (int pos = range[0]; pos <= range[1]; pos++) { @@ -1389,6 +1411,8 @@ public class AlignmentUtils .getToRanges(), 3, 1); newMapping.addMap(exon.getDatasetSequence(), seqMapping.getTo(), map); + MapList cdsToDnaMap = new MapList(dnaExonRanges, exonRange, 1, 1); + newMapping.addMap(dnaSeq, exon.getDatasetSequence(), cdsToDnaMap); exonSequences.add(exon); }