X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fanalysis%2FAlignmentUtils.java;h=681589cda58198d439f61dd8ec59b820af2b8957;hb=5589f9fbe9e7e743e164cb1e287cd3e5a5290a0b;hp=5659d19f5267e1e150ad6c6c605563e259d22e7a;hpb=47168f025aefdaa044802bd5f8f510ffe43a4808;p=jalview.git diff --git a/src/jalview/analysis/AlignmentUtils.java b/src/jalview/analysis/AlignmentUtils.java index 5659d19..681589c 100644 --- a/src/jalview/analysis/AlignmentUtils.java +++ b/src/jalview/analysis/AlignmentUtils.java @@ -20,23 +20,32 @@ */ package jalview.analysis; +import jalview.datamodel.AlignedCodonFrame; +import jalview.datamodel.AlignmentAnnotation; +import jalview.datamodel.AlignmentI; +import jalview.datamodel.SequenceI; +import jalview.schemes.ResidueProperties; +import jalview.util.MapList; + import java.util.ArrayList; +import java.util.LinkedHashMap; import java.util.List; - -import jalview.datamodel.SequenceI; -import jalview.datamodel.AlignmentI; +import java.util.Map; /** - * grab bag of useful alignment manipulation operations - * Expect these to be refactored elsewhere at some point. + * grab bag of useful alignment manipulation operations Expect these to be + * refactored elsewhere at some point. + * * @author jimp - * + * */ public class AlignmentUtils { /** - * given an existing alignment, create a new alignment including all, or up to flankSize additional symbols from each sequence's dataset sequence + * given an existing alignment, create a new alignment including all, or up to + * flankSize additional symbols from each sequence's dataset sequence + * * @param core * @param flankSize * @return AlignmentI @@ -45,68 +54,258 @@ public class AlignmentUtils { List sq = new ArrayList(); int maxoffset = 0; - for (SequenceI s:core.getSequences()) + for (SequenceI s : core.getSequences()) { SequenceI newSeq = s.deriveSequence(); - if (newSeq.getStart()>maxoffset && newSeq.getDatasetSequence().getStart() maxoffset + && newSeq.getDatasetSequence().getStart() < s.getStart()) { maxoffset = newSeq.getStart(); } sq.add(newSeq); } - if (flankSize>-1) { + if (flankSize > -1) + { maxoffset = flankSize; } // now add offset to create a new expanded alignment - for (SequenceI s:sq) + for (SequenceI s : sq) { SequenceI ds = s; - while (ds.getDatasetSequence()!=null) { - ds=ds.getDatasetSequence(); + while (ds.getDatasetSequence() != null) + { + ds = ds.getDatasetSequence(); } - int s_end = s.findPosition(s.getStart()+s.getLength()); + int s_end = s.findPosition(s.getStart() + s.getLength()); // find available flanking residues for sequence - int ustream_ds=s.getStart()-ds.getStart(),dstream_ds=ds.getEnd()-s_end; - + int ustream_ds = s.getStart() - ds.getStart(), dstream_ds = ds + .getEnd() - s_end; + // build new flanked sequence - + // compute gap padding to start of flanking sequence - int offset=maxoffset - ustream_ds; - + int offset = maxoffset - ustream_ds; + // padding is gapChar x ( maxoffset - min(ustream_ds, flank) - if (flankSize>=0) { - if (flankSize= 0) + { + if (flankSize < ustream_ds) { - // take up to flankSize residues - offset = maxoffset-flankSize; - ustream_ds = flankSize; - } - if (flankSize> getSequencesByName( + AlignmentI al) + { + Map> theMap = new LinkedHashMap>(); + for (SequenceI seq : al.getSequences()) + { + String name = seq.getName(); + if (name != null) + { + List seqs = theMap.get(name); + if (seqs == null) + { + seqs = new ArrayList(); + theMap.put(name, seqs); + } + seqs.add(seq); + } + } + return theMap; + } + + /** + * Build mapping of protein to cDNA alignment. Mappings are made between + * sequences which have the same name and compatible lengths. Returns true if + * at least one sequence mapping was made, else false. + * + * @param proteinAlignment + * @param cdnaAlignment + * @return + */ + public static boolean mapProteinToCdna(final AlignmentI proteinAlignment, + final AlignmentI cdnaAlignment) + { + boolean mapped = false; + List thisSeqs = proteinAlignment.getSequences(); + + /* + * Build a look-up of cDNA sequences by name, for matching purposes. + */ + Map> cdnaSeqs = cdnaAlignment + .getSequencesByName(); + + for (SequenceI aaSeq : thisSeqs) + { + AlignedCodonFrame acf = new AlignedCodonFrame( + proteinAlignment.getWidth()); + List candidates = cdnaSeqs.get(aaSeq.getName()); + if (candidates == null) + { + /* + * No cDNA sequence with matching name, so no mapping for this protein + * sequence + */ + continue; + } + for (SequenceI cdnaSeq : candidates) + { + MapList map = mapProteinToCdna(aaSeq, cdnaSeq); + if (map != null) + { + acf.addMap(cdnaSeq, aaSeq, map); + mapped = true; + } + } + proteinAlignment.addCodonFrame(acf); + } + return mapped; + } + + /** + * Build a mapping (if possible) of a protein to a cDNA sequence. The cDNA + * must be three times the length of the protein, possibly after ignoring + * start and/or stop codons. Returns null if no mapping is determined. + * + * @param proteinSeqs + * @param cdnaSeq + * @return + */ + public static MapList mapProteinToCdna(SequenceI proteinSeq, + SequenceI cdnaSeq) + { + String aaSeqString = proteinSeq.getDatasetSequence() + .getSequenceAsString(); + String cdnaSeqString = cdnaSeq.getDatasetSequence() + .getSequenceAsString(); + if (aaSeqString == null || cdnaSeqString == null) + { + return null; + } + + final int mappedLength = 3 * aaSeqString.length(); + int cdnaLength = cdnaSeqString.length(); + int cdnaStart = 1; + int cdnaEnd = cdnaLength; + final int proteinStart = 1; + final int proteinEnd = aaSeqString.length(); + + /* + * If lengths don't match, try ignoring stop codon. + */ + if (cdnaLength != mappedLength) + { + for (Object stop : ResidueProperties.STOP) + { + if (cdnaSeqString.toUpperCase().endsWith((String) stop)) + { + cdnaEnd -= 3; + cdnaLength -= 3; + break; + } + } + } + + /* + * If lengths still don't match, try ignoring start codon. + */ + if (cdnaLength != mappedLength + && cdnaSeqString.toUpperCase().startsWith( + ResidueProperties.START)) + { + cdnaStart += 3; + cdnaLength -= 3; + } + + if (cdnaLength == mappedLength) + { + MapList map = new MapList(new int[] + { cdnaStart, cdnaEnd }, new int[] + { proteinStart, proteinEnd }, 3, 1); + return map; + } + else + { + return null; + } + } }