X-Git-Url: http://source.jalview.org/gitweb/?p=jalview.git;a=blobdiff_plain;f=src%2Fjalview%2Fdatamodel%2FAlignedCodonFrame.java;h=6ccc0fc0c663b6458d88377696421e0381026fde;hp=4bff3a76e7061c0f3ce164f6820f9a471870577f;hb=855af27bbb88788ac9c1ee9872a43fbb333ae380;hpb=47168f025aefdaa044802bd5f8f510ffe43a4808 diff --git a/src/jalview/datamodel/AlignedCodonFrame.java b/src/jalview/datamodel/AlignedCodonFrame.java index 4bff3a7..6ccc0fc 100644 --- a/src/jalview/datamodel/AlignedCodonFrame.java +++ b/src/jalview/datamodel/AlignedCodonFrame.java @@ -1,6 +1,6 @@ /* - * Jalview - A Sequence Alignment Editor and Viewer (Version 2.8.2) - * Copyright (C) 2014 The Jalview Authors + * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) + * Copyright (C) $$Year-Rel$$ The Jalview Authors * * This file is part of Jalview. * @@ -20,238 +20,400 @@ */ package jalview.datamodel; -import java.util.Enumeration; -import java.util.Vector; +import java.util.AbstractList; +import java.util.ArrayList; +import java.util.List; import jalview.util.MapList; +import jalview.util.MappingUtils; /** * Stores mapping between the columns of a protein alignment and a DNA alignment * and a list of individual codon to amino acid mappings between sequences. */ - public class AlignedCodonFrame { - /** - * array of nucleotide positions for aligned codons at column of aligned - * proteins. - */ - public int[][] codons = null; - - /** - * width of protein sequence alignement implicit assertion that codons.length - * >= aaWidth - */ - public int aaWidth = 0; - /** - * initialise codon frame with a nominal alignment width - * - * @param aWidth + /* + * Data bean to hold mappings from one sequence to another */ - public AlignedCodonFrame(int aWidth) + public class SequenceToSequenceMapping { - if (aWidth <= 0) + private SequenceI fromSeq; + + private Mapping mapping; + + SequenceToSequenceMapping(SequenceI from, Mapping map) { - codons = null; - return; + this.fromSeq = from; + this.mapping = map; } - codons = new int[aWidth][]; - for (int res = 0; res < aWidth; res++) - codons[res] = null; - } - /** - * ensure that codons array is at least as wide as aslen residues - * - * @param aslen - * @return (possibly newly expanded) codon array - */ - public int[][] checkCodonFrameWidth(int aslen) - { - if (codons.length <= aslen + 1) + /** + * Readable representation for debugging only, not guaranteed not to change + */ + @Override + public String toString() { - // probably never have to do this ? - int[][] c = new int[codons.length + 10][]; - for (int i = 0; i < codons.length; i++) + return String.format("From %s %s", fromSeq.getName(), + mapping.toString()); + } + + /** + * Returns a hashCode derived from the hashcodes of the mappings and fromSeq + * + * @see SequenceToSequenceMapping#hashCode() + */ + @Override + public int hashCode() + { + return (fromSeq == null ? 0 : fromSeq.hashCode() * 31) + + mapping.hashCode(); + } + + /** + * Answers true if the objects hold the same mapping between the same two + * sequences + * + * @see Mapping#equals + */ + @Override + public boolean equals(Object obj) + { + if (!(obj instanceof SequenceToSequenceMapping)) { - c[i] = codons[i]; - codons[i] = null; + return false; } - codons = c; + SequenceToSequenceMapping that = (SequenceToSequenceMapping) obj; + if (this.mapping == null) + { + return that.mapping == null; + } + // TODO: can simplify by asserting fromSeq is a dataset sequence + return (this.fromSeq == that.fromSeq + || (this.fromSeq != null && that.fromSeq != null + && this.fromSeq.getDatasetSequence() != null + && this.fromSeq.getDatasetSequence() == that.fromSeq + .getDatasetSequence())) + && this.mapping.equals(that.mapping); } - return codons; - } - /** - * @return width of aligned translated amino acid residues - */ - public int getaaWidth() - { - return aaWidth; - } + public SequenceI getFromSeq() + { + return fromSeq; + } - /** - * TODO: not an ideal solution - we reference the aligned amino acid sequences - * in order to make insertions on them Better would be dnaAlignment and - * aaAlignment reference.... - */ - Vector a_aaSeqs = new Vector(); + public Mapping getMapping() + { + return mapping; + } - /** - * increase aaWidth by one and insert a new aligned codon position space at - * aspos. - * - * @param aspos - */ - public void insertAAGap(int aspos, char gapCharacter) - { - // this aa appears before the aligned codons at aspos - so shift them in - // each pair of mapped sequences - aaWidth++; - if (a_aaSeqs != null) + /** + * Returns true if the mapping covers the full length of the given sequence. + * This allows us to distinguish the CDS that codes for a protein from + * another overlapping CDS in the parent dna sequence. + * + * @param seq + * @return + */ + public boolean covers(SequenceI seq) { - // we actually have to modify the aligned sequences here, so use the - // a_aaSeqs vector - Enumeration sq = a_aaSeqs.elements(); - while (sq.hasMoreElements()) + return covers(seq,false,false); + } + /** + * + * @param seq + * @param localCover - when true - compare extent of seq's dataset sequence rather than the local extent + * @param either - when true coverage is required for either seq or the mapped sequence + * @return true if mapping covers full length of given sequence (or the other if either==true) + */ + public boolean covers(SequenceI seq, boolean localCover,boolean either) + { + List mappedRanges = null,otherRanges=null; + MapList mapList = mapping.getMap(); + int mstart=seq.getStart(),mend=seq.getEnd(),ostart,oend; + ; + if (fromSeq == seq || fromSeq == seq.getDatasetSequence()) { - ((SequenceI) sq.nextElement()).insertCharAt(aspos, gapCharacter); + if (localCover && fromSeq !=seq) + { + mstart=fromSeq.getStart(); + mend=fromSeq.getEnd(); + } + mappedRanges = mapList.getFromRanges(); + otherRanges=mapList.getToRanges(); + ostart=mapping.to.getStart(); + oend=mapping.to.getEnd(); } + else if (mapping.to == seq || mapping.to == seq.getDatasetSequence()) + { + if (localCover && mapping.to !=seq) + { + mstart=mapping.to.getStart(); + mend=mapping.to.getEnd(); + } + mappedRanges = mapList.getToRanges(); + otherRanges=mapList.getFromRanges(); + ostart=fromSeq.getStart(); + oend=fromSeq.getEnd(); + } + else + { + return false; + } + + /* + * check that each mapped range lies within the sequence range + * (necessary for circular CDS - example EMBL:J03321:AAA91567) + * and mapped length covers (at least) sequence length + */ + int length = countRange(mappedRanges,mstart,mend); + + if (length != -1) + { + // add 3 to mapped length to allow for a mapped stop codon + if (length + 3 >= (mend - mstart + 1)) + { + return true; + } + } + if (either) + { + // also check coverage of the other range + length = countRange(otherRanges, ostart, oend); + if (length != -1) + { + if (length + 1 >= (oend - ostart + 1)) + { + return true; + } + } + } + return false; } - checkCodonFrameWidth(aspos); - if (aspos < aaWidth) + private int countRange(List mappedRanges,int mstart,int mend) { - aaWidth++; - System.arraycopy(codons, aspos, codons, aspos + 1, codons.length - aspos - 1); - codons[aspos] = null; // clear so new codon position can be marked. + int length=0; + for (int[] range : mappedRanges) + { + int from = Math.min(range[0], range[1]); + int to = Math.max(range[0], range[1]); + if (from < mstart || to > mend) + { + return -1; + } + length += (to - from + 1); + } + return length; } - } - public void setAaWidth(int aapos) - { - aaWidth = aapos; + /** + * Adds any regions mapped to or from position {@code pos} in sequence + * {@code seq} to the given search results + * + * @param seq + * @param pos + * @param sr + */ + public void markMappedRegion(SequenceI seq, int pos, SearchResultsI sr) + { + int[] codon = null; + SequenceI mappedSeq = null; + SequenceI ds = seq.getDatasetSequence(); + if (ds == null) + { + ds = seq; + } + + if (this.fromSeq == seq || this.fromSeq == ds) + { + codon = this.mapping.map.locateInTo(pos, pos); + mappedSeq = this.mapping.to; + } + else if (this.mapping.to == seq || this.mapping.to == ds) + { + codon = this.mapping.map.locateInFrom(pos, pos); + mappedSeq = this.fromSeq; + } + + if (codon != null) + { + for (int i = 0; i < codon.length; i += 2) + { + sr.addResult(mappedSeq, codon[i], codon[i + 1]); + } + } + } } + private List mappings; + /** - * tied array of na Sequence objects. + * Constructor */ - SequenceI[] dnaSeqs = null; + public AlignedCodonFrame() + { + mappings = new ArrayList<>(); + } /** - * tied array of Mappings to protein sequence Objects and SequenceI[] - * aaSeqs=null; MapLists where eac maps from the corresponding dnaSeqs element - * to corresponding aaSeqs element + * Adds a mapping between the dataset sequences for the associated dna and + * protein sequence objects + * + * @param dnaseq + * @param aaseq + * @param map */ - Mapping[] dnaToProt = null; + public void addMap(SequenceI dnaseq, SequenceI aaseq, MapList map) + { + addMap(dnaseq, aaseq, map, null); + } /** - * add a mapping between the dataset sequences for the associated dna and + * Adds a mapping between the dataset sequences for the associated dna and * protein sequence objects * * @param dnaseq * @param aaseq * @param map + * @param mapFromId */ - public void addMap(SequenceI dnaseq, SequenceI aaseq, MapList map) + public void addMap(SequenceI dnaseq, SequenceI aaseq, MapList map, + String mapFromId) { - int nlen = 1; - if (dnaSeqs != null) - { - nlen = dnaSeqs.length + 1; - } - SequenceI[] ndna = new SequenceI[nlen]; - Mapping[] ndtp = new Mapping[nlen]; - if (dnaSeqs != null) - { - System.arraycopy(dnaSeqs, 0, ndna, 0, dnaSeqs.length); - System.arraycopy(dnaToProt, 0, ndtp, 0, dnaSeqs.length); - } - dnaSeqs = ndna; - dnaToProt = ndtp; - nlen--; - dnaSeqs[nlen] = (dnaseq.getDatasetSequence() == null) ? dnaseq : dnaseq - .getDatasetSequence(); - Mapping mp = new Mapping(map); // JBPNote DEBUG! THIS ! // dnaseq.transferAnnotation(aaseq, mp); // aaseq.transferAnnotation(dnaseq, new Mapping(map.getInverse())); - mp.to = (aaseq.getDatasetSequence() == null) ? aaseq : aaseq - .getDatasetSequence(); - a_aaSeqs.addElement(aaseq); - dnaToProt[nlen] = mp; + + SequenceI fromSeq = (dnaseq.getDatasetSequence() == null) ? dnaseq + : dnaseq.getDatasetSequence(); + SequenceI toSeq = (aaseq.getDatasetSequence() == null) ? aaseq + : aaseq.getDatasetSequence(); + + /* + * if we already hold a mapping between these sequences, just add to it + * note that 'adding' a duplicate map does nothing; this protects against + * creating duplicate mappings in AlignedCodonFrame + */ + for (SequenceToSequenceMapping ssm : mappings) + { + if (ssm.fromSeq == fromSeq && ssm.mapping.to == toSeq) + { + ssm.mapping.map.addMapList(map); + return; + } + } + + /* + * otherwise, add a new sequence mapping + */ + Mapping mp = new Mapping(toSeq, map); + mp.setMappedFromId(mapFromId); + mappings.add(new SequenceToSequenceMapping(fromSeq, mp)); } public SequenceI[] getdnaSeqs() { - return dnaSeqs; + // TODO return a list instead? + // return dnaSeqs; + List seqs = new ArrayList<>(); + for (SequenceToSequenceMapping ssm : mappings) + { + seqs.add(ssm.fromSeq); + } + return seqs.toArray(new SequenceI[seqs.size()]); } public SequenceI[] getAaSeqs() { - if (dnaToProt == null) - return null; - SequenceI[] sqs = new SequenceI[dnaToProt.length]; - for (int sz = 0; sz < dnaToProt.length; sz++) + // TODO not used - remove? + List seqs = new ArrayList<>(); + for (SequenceToSequenceMapping ssm : mappings) { - sqs[sz] = dnaToProt[sz].to; + seqs.add(ssm.mapping.to); } - return sqs; + return seqs.toArray(new SequenceI[seqs.size()]); } public MapList[] getdnaToProt() { - if (dnaToProt == null) - return null; - MapList[] sqs = new MapList[dnaToProt.length]; - for (int sz = 0; sz < dnaToProt.length; sz++) + List maps = new ArrayList<>(); + for (SequenceToSequenceMapping ssm : mappings) { - sqs[sz] = dnaToProt[sz].map; + maps.add(ssm.mapping.map); } - return sqs; + return maps.toArray(new MapList[maps.size()]); } public Mapping[] getProtMappings() { - return dnaToProt; + List maps = new ArrayList<>(); + for (SequenceToSequenceMapping ssm : mappings) + { + maps.add(ssm.mapping); + } + return maps.toArray(new Mapping[maps.size()]); } /** + * Returns the first mapping found which is to or from the given sequence, or + * null if none is found * - * @param sequenceRef - * @return null or corresponding aaSeq entry for dnaSeq entry + * @param seq + * @return */ - public SequenceI getAaForDnaSeq(SequenceI dnaSeqRef) + public Mapping getMappingForSequence(SequenceI seq) { - if (dnaSeqs == null) + SequenceI seqDs = seq.getDatasetSequence(); + seqDs = seqDs != null ? seqDs : seq; + + for (SequenceToSequenceMapping ssm : mappings) { - return null; + if (ssm.fromSeq == seqDs || ssm.mapping.to == seqDs) + { + return ssm.mapping; + } } + return null; + } + + /** + * Return the corresponding aligned or dataset aa sequence for given dna + * sequence, null if not found. + * + * @param sequenceRef + * @return + */ + public SequenceI getAaForDnaSeq(SequenceI dnaSeqRef) + { SequenceI dnads = dnaSeqRef.getDatasetSequence(); - for (int ds = 0; ds < dnaSeqs.length; ds++) + for (SequenceToSequenceMapping ssm : mappings) { - if (dnaSeqs[ds] == dnaSeqRef || dnaSeqs[ds] == dnads) - return dnaToProt[ds].to; + if (ssm.fromSeq == dnaSeqRef || ssm.fromSeq == dnads) + { + return ssm.mapping.to; + } } return null; } /** + * Return the corresponding aligned or dataset dna sequence for given amino + * acid sequence, or null if not found. returns the sequence from the first + * mapping found that involves the protein sequence. * - * @param sequenceRef - * @return null or corresponding aaSeq entry for dnaSeq entry + * @param aaSeqRef + * @return */ public SequenceI getDnaForAaSeq(SequenceI aaSeqRef) { - if (dnaToProt == null) - { - return null; - } SequenceI aads = aaSeqRef.getDatasetSequence(); - for (int as = 0; as < dnaToProt.length; as++) + for (SequenceToSequenceMapping ssm : mappings) { - if (dnaToProt[as].to == aaSeqRef || dnaToProt[as].to == aads) - return dnaSeqs[as]; + if (ssm.mapping.to == aaSeqRef || ssm.mapping.to == aads) + { + return ssm.fromSeq; + } } return null; } @@ -271,7 +433,8 @@ public class AlignedCodonFrame /** * Add search results for regions in other sequences that translate or are - * translated from a particular position in seq + * translated from a particular position in seq (which may be an aligned or + * dataset sequence) * * @param seq * @param index @@ -280,42 +443,543 @@ public class AlignedCodonFrame * where highlighted regions go */ public void markMappedRegion(SequenceI seq, int index, - SearchResults results) + SearchResultsI results) { - if (dnaToProt == null) + SequenceI ds = seq.getDatasetSequence(); + if (ds == null) { - return; + ds = seq; } - int[] codon; - SequenceI ds = seq.getDatasetSequence(); - for (int mi = 0; mi < dnaToProt.length; mi++) + for (SequenceToSequenceMapping ssm : mappings) + { + ssm.markMappedRegion(ds, index, results); + } + } + + /** + * Returns the DNA codon positions (base 1) for the given position (base 1) in + * a mapped protein sequence, or null if no mapping is found. + * + * Intended for use in aligning cDNA to match aligned protein. Only the first + * mapping found is returned, so not suitable for use if multiple protein + * sequences are mapped to the same cDNA (but aligning cDNA as protein is + * ill-defined for this case anyway). + * + * @param seq + * the DNA dataset sequence + * @param aaPos + * residue position (base 1) in a protein sequence + * @return + */ + public int[] getDnaPosition(SequenceI seq, int aaPos) + { + /* + * Adapted from markMappedRegion(). + */ + MapList ml = null; + int i = 0; + for (SequenceToSequenceMapping ssm : mappings) { - if (dnaSeqs[mi] == seq || dnaSeqs[mi] == ds) + if (ssm.fromSeq == seq) { - // DEBUG System.err.println("dna pos "+index); - codon = dnaToProt[mi].map.locateInTo(index, index); - if (codon != null) + ml = getdnaToProt()[i]; + break; + } + i++; + } + return ml == null ? null : ml.locateInFrom(aaPos, aaPos); + } + + /** + * Convenience method to return the first aligned sequence in the given + * alignment whose dataset has a mapping with the given (aligned or dataset) + * sequence. + * + * @param seq + * + * @param al + * @return + */ + public SequenceI findAlignedSequence(SequenceI seq, AlignmentI al) + { + return findAlignedSequence(seq, al, null); + } + /** + * Convenience method to return the first aligned sequence in the given + * alignment whose dataset has a mapping with the given (aligned or dataset) + * sequence, and optionally the mapping that relates them + * + * @param seq + * @param al + * @param map - list to add the mapping to + * @return sequence from al that maps to seq + */ + public SequenceI findAlignedSequence(SequenceI seq, AlignmentI al,List map) + { + /* + * Search mapped protein ('to') sequences first. + */ + for (SequenceToSequenceMapping ssm : mappings) + { + int mStart=ssm.getMapping().getMap().getFromLowest(),mEnd=ssm.getMapping().map.getFromHighest(); + if ((ssm.fromSeq == seq || ssm.fromSeq == seq.getDatasetSequence()) + // here AlignmentUtilsTest. testAlignProteinAsDna_incompleteStartCodon fails because mStart/mEnd is contained by seq + // without this filter, we don't get the correct mapping, however + )// && seq.getStart()>=mStart && seq.getEnd()<=mEnd) + { + for (SequenceI sourceAligned : al.getSequences()) { - for (int i = 0; i < codon.length; i += 2) + if (ssm.covers(sourceAligned,true,false)) { - results.addResult(dnaToProt[mi].to, codon[i], codon[i + 1]); + if (map != null) + { + map.add(ssm); + } + return sourceAligned; } } } - else if (dnaToProt[mi].to == seq || dnaToProt[mi].to == ds) + } + + /* + * Then try mapped dna sequences. + */ + for (SequenceToSequenceMapping ssm : mappings) + { + int mStart=ssm.getMapping().getMap().getToLowest(),mEnd=ssm.getMapping().map.getToHighest(); + if ((ssm.mapping.to == seq + || ssm.mapping.to == seq.getDatasetSequence()) + && seq.getStart()>=mStart && seq.getEnd()<=mEnd) { - // DEBUG System.err.println("aa pos "+index); + for (SequenceI sourceAligned : al.getSequences()) { - codon = dnaToProt[mi].map.locateInFrom(index, index); - if (codon != null) + if (ssm.covers(sourceAligned,true,true)) { - for (int i = 0; i < codon.length; i += 2) + if (map != null) { - results.addResult(dnaSeqs[mi], codon[i], codon[i + 1]); + map.add(ssm); } + return sourceAligned; } } } } + + return null; + } + + /** + * Returns the region in the target sequence's dataset that is mapped to the + * given position (base 1) in the query sequence's dataset. The region is a + * set of start/end position pairs. + * + * @param target + * @param query + * @param queryPos + * @return + */ + public int[] getMappedRegion(SequenceI target, SequenceI query, + int queryPos) + { + SequenceI targetDs = target.getDatasetSequence() == null ? target + : target.getDatasetSequence(); + SequenceI queryDs = query.getDatasetSequence() == null ? query + : query.getDatasetSequence(); + if (targetDs == null || queryDs == null /*|| dnaToProt == null*/) + { + return null; + } + for (SequenceToSequenceMapping ssm : mappings) + { + /* + * try mapping from target to query + */ + if (ssm.fromSeq == targetDs && ssm.mapping.to == queryDs) + { + int[] codon = ssm.mapping.map.locateInFrom(queryPos, queryPos); + if (codon != null) + { + return codon; + } + } + /* + * else try mapping from query to target + */ + else if (ssm.fromSeq == queryDs && ssm.mapping.to == targetDs) + { + int[] codon = ssm.mapping.map.locateInTo(queryPos, queryPos); + if (codon != null) + { + return codon; + } + } + } + return null; + } + + /** + * Returns the mapped DNA codons for the given position in a protein sequence, + * or null if no mapping is found. Returns a list of (e.g.) ['g', 'c', 't'] + * codons. There may be more than one codon mapped to the protein if (for + * example), there are mappings to cDNA variants. + * + * @param protein + * the peptide dataset sequence + * @param aaPos + * residue position (base 1) in the peptide sequence + * @return + */ + public List getMappedCodons(SequenceI protein, int aaPos) + { + MapList ml = null; + SequenceI dnaSeq = null; + List result = new ArrayList<>(); + + for (SequenceToSequenceMapping ssm : mappings) + { + if (ssm.mapping.to == protein + && ssm.mapping.getMap().getFromRatio() == 3) + { + ml = ssm.mapping.map; + dnaSeq = ssm.fromSeq; + + int[] codonPos = ml.locateInFrom(aaPos, aaPos); + if (codonPos == null) + { + return null; + } + + /* + * Read off the mapped nucleotides (converting to position base 0) + */ + codonPos = MappingUtils.flattenRanges(codonPos); + int start = dnaSeq.getStart(); + char c1 = dnaSeq.getCharAt(codonPos[0] - start); + char c2 = dnaSeq.getCharAt(codonPos[1] - start); + char c3 = dnaSeq.getCharAt(codonPos[2] - start); + result.add(new char[] { c1, c2, c3 }); + } + } + return result.isEmpty() ? null : result; + } + + /** + * Returns any mappings found which are from the given sequence, and to + * distinct sequences. + * + * @param seq + * @return + */ + public List getMappingsFromSequence(SequenceI seq) + { + List result = new ArrayList<>(); + List related = new ArrayList<>(); + SequenceI seqDs = seq.getDatasetSequence(); + seqDs = seqDs != null ? seqDs : seq; + + for (SequenceToSequenceMapping ssm : mappings) + { + final Mapping mapping = ssm.mapping; + if (ssm.fromSeq == seqDs) + { + if (!related.contains(mapping.to)) + { + result.add(mapping); + related.add(mapping.to); + } + } + } + return result; + } + + /** + * Test whether the given sequence is substitutable for one or more dummy + * sequences in this mapping + * + * @param map + * @param seq + * @return + */ + public boolean isRealisableWith(SequenceI seq) + { + return realiseWith(seq, false) > 0; + } + + /** + * Replace any matchable mapped dummy sequences with the given real one. + * Returns the count of sequence mappings instantiated. + * + * @param seq + * @return + */ + public int realiseWith(SequenceI seq) + { + return realiseWith(seq, true); + } + + /** + * Returns the number of mapped dummy sequences that could be replaced with + * the given real sequence. + * + * @param seq + * a dataset sequence + * @param doUpdate + * if true, performs replacements, else only counts + * @return + */ + protected int realiseWith(SequenceI seq, boolean doUpdate) + { + SequenceI ds = seq.getDatasetSequence() != null + ? seq.getDatasetSequence() + : seq; + int count = 0; + + /* + * check for replaceable DNA ('map from') sequences + */ + for (SequenceToSequenceMapping ssm : mappings) + { + SequenceI dna = ssm.fromSeq; + if (dna instanceof SequenceDummy + && dna.getName().equals(ds.getName())) + { + Mapping mapping = ssm.mapping; + int mapStart = mapping.getMap().getFromLowest(); + int mapEnd = mapping.getMap().getFromHighest(); + boolean mappable = couldRealiseSequence(dna, ds, mapStart, mapEnd); + if (mappable) + { + count++; + if (doUpdate) + { + // TODO: new method ? ds.realise(dna); + // might want to copy database refs as well + ds.setSequenceFeatures(dna.getSequenceFeatures()); + // dnaSeqs[i] = ds; + ssm.fromSeq = ds; + System.out.println("Realised mapped sequence " + ds.getName()); + } + } + } + + /* + * check for replaceable protein ('map to') sequences + */ + Mapping mapping = ssm.mapping; + SequenceI prot = mapping.getTo(); + int mapStart = mapping.getMap().getToLowest(); + int mapEnd = mapping.getMap().getToHighest(); + boolean mappable = couldRealiseSequence(prot, ds, mapStart, mapEnd); + if (mappable) + { + count++; + if (doUpdate) + { + // TODO: new method ? ds.realise(dna); + // might want to copy database refs as well + ds.setSequenceFeatures(dna.getSequenceFeatures()); + ssm.mapping.setTo(ds); + } + } + } + return count; + } + + /** + * Helper method to test whether a 'real' sequence could replace a 'dummy' + * sequence in the map. The criteria are that they have the same name, and + * that the mapped region overlaps the candidate sequence. + * + * @param existing + * @param replacement + * @param mapStart + * @param mapEnd + * @return + */ + protected static boolean couldRealiseSequence(SequenceI existing, + SequenceI replacement, int mapStart, int mapEnd) + { + if (existing instanceof SequenceDummy + && !(replacement instanceof SequenceDummy) + && existing.getName().equals(replacement.getName())) + { + int start = replacement.getStart(); + int end = replacement.getEnd(); + boolean mappingOverlapsSequence = (mapStart >= start + && mapStart <= end) || (mapEnd >= start && mapEnd <= end); + if (mappingOverlapsSequence) + { + return true; + } + } + return false; + } + + /** + * Change any mapping to the given sequence to be to its dataset sequence + * instead. For use when mappings are created before their referenced + * sequences are instantiated, for example when parsing GFF data. + * + * @param seq + */ + public void updateToDataset(SequenceI seq) + { + if (seq == null || seq.getDatasetSequence() == null) + { + return; + } + SequenceI ds = seq.getDatasetSequence(); + + for (SequenceToSequenceMapping ssm : mappings) + /* + * 'from' sequences + */ + { + if (ssm.fromSeq == seq) + { + ssm.fromSeq = ds; + } + + /* + * 'to' sequences + */ + if (ssm.mapping.to == seq) + { + ssm.mapping.to = ds; + } + } + } + + /** + * Answers true if this object contains no mappings + * + * @return + */ + public boolean isEmpty() + { + return mappings.isEmpty(); + } + + /** + * Method for debug / inspection purposes only, may change in future + */ + @Override + public String toString() + { + return mappings == null ? "null" : mappings.toString(); + } + + /** + * Returns the first mapping found that is between 'fromSeq' and 'toSeq', or + * null if none found + * + * @param fromSeq + * aligned or dataset sequence + * @param toSeq + * aligned or dataset sequence + * @return + */ + public Mapping getMappingBetween(SequenceI fromSeq, SequenceI toSeq) + { + SequenceI dssFrom = fromSeq.getDatasetSequence() == null ? fromSeq + : fromSeq.getDatasetSequence(); + SequenceI dssTo = toSeq.getDatasetSequence() == null ? toSeq + : toSeq.getDatasetSequence(); + + for (SequenceToSequenceMapping mapping : mappings) + { + SequenceI from = mapping.fromSeq; + SequenceI to = mapping.mapping.to; + if ((from == dssFrom && to == dssTo) + || (from == dssTo && to == dssFrom)) + { + return mapping.mapping; + } + } + return null; + } + + /** + * Returns a hashcode derived from the list of sequence mappings + * + * @see SequenceToSequenceMapping#hashCode() + * @see AbstractList#hashCode() + */ + @Override + public int hashCode() + { + return this.mappings.hashCode(); + } + + /** + * Two AlignedCodonFrame objects are equal if they hold the same ordered list + * of mappings + * + * @see SequenceToSequenceMapping#equals + */ + @Override + public boolean equals(Object obj) + { + if (!(obj instanceof AlignedCodonFrame)) + { + return false; + } + return this.mappings.equals(((AlignedCodonFrame) obj).mappings); + } + + public List getMappings() + { + return mappings; + } + + /** + * Returns the first mapping found which is between the two given sequences, + * and covers the full extent of both. + * + * @param seq1 + * @param seq2 + * @return + */ + public SequenceToSequenceMapping getCoveringMapping(SequenceI seq1, + SequenceI seq2) + { + for (SequenceToSequenceMapping mapping : mappings) + { + if (mapping.covers(seq2) && mapping.covers(seq1)) + { + return mapping; + } + } + return null; + } + + /** + * Returns the first mapping found which is between the given dataset sequence + * and another, is a triplet mapping (3:1 or 1:3), and covers the full extent + * of both sequences involved + * + * @param seq + * @return + */ + public SequenceToSequenceMapping getCoveringCodonMapping(SequenceI seq) + { + for (SequenceToSequenceMapping mapping : mappings) + { + if (mapping.getMapping().getMap().isTripletMap() + && mapping.covers(seq)) + { + if (mapping.fromSeq == seq + && mapping.covers(mapping.getMapping().getTo())) + { + return mapping; + } + else if (mapping.getMapping().getTo() == seq + && mapping.covers(mapping.fromSeq)) + { + return mapping; + } + } + } + return null; } }