X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fdatamodel%2FAlignment.java;h=300c950bdb28e09c2a504545e51a6ecc04693724;hb=2f87d72ea4f1bbf768918a253be05771f9c430cb;hp=ef961d09c419da80074f27865f49b066212d6459;hpb=838e4f91d4a53dd315640dbc9ff6ef7a815ee576;p=jalview.git diff --git a/src/jalview/datamodel/Alignment.java b/src/jalview/datamodel/Alignment.java index ef961d0..300c950 100755 --- a/src/jalview/datamodel/Alignment.java +++ b/src/jalview/datamodel/Alignment.java @@ -1,6 +1,6 @@ /* - * Jalview - A Sequence Alignment Editor and Viewer (Version 2.9.0b1) - * Copyright (C) 2015 The Jalview Authors + * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) + * Copyright (C) $$Year-Rel$$ The Jalview Authors * * This file is part of Jalview. * @@ -21,14 +21,20 @@ package jalview.datamodel; import jalview.analysis.AlignmentUtils; +import jalview.datamodel.AlignedCodonFrame.SequenceToSequenceMapping; import jalview.io.FastaFile; +import jalview.util.Comparison; +import jalview.util.LinkedIdentityHashSet; import jalview.util.MessageManager; import java.util.ArrayList; +import java.util.Arrays; +import java.util.BitSet; +import java.util.Collections; import java.util.Enumeration; import java.util.HashSet; import java.util.Hashtable; -import java.util.LinkedHashSet; +import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; @@ -43,49 +49,40 @@ import java.util.Vector; */ public class Alignment implements AlignmentI { - protected Alignment dataset; + private Alignment dataset; - protected List sequences; + private List sequences; - protected List groups = java.util.Collections - .synchronizedList(new ArrayList()); + protected List groups; protected char gapCharacter = '-'; - protected int type = NUCLEOTIDE; - - public static final int PROTEIN = 0; - - public static final int NUCLEOTIDE = 1; + private boolean nucleotide = true; public boolean hasRNAStructure = false; - /** DOCUMENT ME!! */ public AlignmentAnnotation[] annotations; - HiddenSequences hiddenSequences = new HiddenSequences(this); + HiddenSequences hiddenSequences; + + HiddenColumns hiddenCols; public Hashtable alignmentProperties; - private Set codonFrameList = new LinkedHashSet(); + private List codonFrameList; private void initAlignment(SequenceI[] seqs) { - int i = 0; + groups = Collections.synchronizedList(new ArrayList()); + hiddenSequences = new HiddenSequences(this); + hiddenCols = new HiddenColumns(); + codonFrameList = new ArrayList<>(); - if (jalview.util.Comparison.isNucleotide(seqs)) - { - type = NUCLEOTIDE; - } - else - { - type = PROTEIN; - } + nucleotide = Comparison.isNucleotide(seqs); - sequences = java.util.Collections - .synchronizedList(new ArrayList()); + sequences = Collections.synchronizedList(new ArrayList()); - for (i = 0; i < seqs.length; i++) + for (int i = 0; i < seqs.length; i++) { sequences.add(seqs[i]); } @@ -104,13 +101,15 @@ public class Alignment implements AlignmentI seqs[i] = new Sequence(seqs[i]); } + initAlignment(seqs); + /* - * Share the same dataset sequence mappings (if any). TODO: find a better - * place for these to live (alignment dataset?). + * Share the same dataset sequence mappings (if any). */ - this.codonFrameList = ((Alignment) al).codonFrameList; - - initAlignment(seqs); + if (dataset == null && al.getDataset() == null) + { + this.setCodonFrames(al.getCodonFrames()); + } } /** @@ -132,7 +131,7 @@ public class Alignment implements AlignmentI public Alignment(SeqCigar[] alseqs) { SequenceI[] seqs = SeqCigar.createAlignmentSequences(alseqs, - gapCharacter, new ColumnSelection(), null); + gapCharacter, new HiddenColumns(), null); initAlignment(seqs); } @@ -147,9 +146,8 @@ public class Alignment implements AlignmentI */ public static AlignmentI createAlignment(CigarArray compactAlignment) { - throw new Error( - MessageManager - .getString("error.alignment_cigararray_not_implemented")); + throw new Error(MessageManager + .getString("error.alignment_cigararray_not_implemented")); // this(compactAlignment.refCigars); } @@ -192,29 +190,47 @@ public class Alignment implements AlignmentI return AlignmentUtils.getSequencesByName(this); } - /** - * DOCUMENT ME! - * - * @param i - * DOCUMENT ME! - * - * @return DOCUMENT ME! - */ @Override public SequenceI getSequenceAt(int i) { synchronized (sequences) { + if (i > -1 && i < sequences.size()) { return sequences.get(i); } } + return null; } + @Override + public SequenceI getSequenceAtAbsoluteIndex(int i) + { + SequenceI seq = null; + if (getHiddenSequences().getSize() > 0) + { + seq = getHiddenSequences().getHiddenSequence(i); + if (seq == null) + { + // didn't find the sequence in the hidden sequences, get it from the + // alignment + int index = getHiddenSequences().findIndexWithoutHiddenSeqs(i); + seq = getSequenceAt(index); + } + } + else + { + seq = getSequenceAt(i); + } + return seq; + } + /** - * Adds a sequence to the alignment. Recalculates maxLength and size. + * Adds a sequence to the alignment. Recalculates maxLength and size. Note + * this currently does not recalculate whether or not the alignment is + * nucleotide, so mixed alignments may have undefined behaviour. * * @param snew */ @@ -223,18 +239,21 @@ public class Alignment implements AlignmentI { if (dataset != null) { + // maintain dataset integrity - if (snew.getDatasetSequence() != null) - { - getDataset().addSequence(snew.getDatasetSequence()); - } - else + SequenceI dsseq = snew.getDatasetSequence(); + if (dsseq == null) { // derive new sequence SequenceI adding = snew.deriveSequence(); - getDataset().addSequence(adding.getDatasetSequence()); snew = adding; + dsseq = snew.getDatasetSequence(); } + if (getDataset().findIndex(dsseq) == -1) + { + getDataset().addSequence(dsseq); + } + } if (sequences == null) { @@ -253,18 +272,22 @@ public class Alignment implements AlignmentI } } - /** - * Adds a sequence to the alignment. Recalculates maxLength and size. - * - * @param snew - */ @Override - public void setSequenceAt(int i, SequenceI snew) + public SequenceI replaceSequenceAt(int i, SequenceI snew) { synchronized (sequences) { - deleteSequence(i); - sequences.set(i, snew); + if (sequences.size() > i) + { + return sequences.set(i, snew); + + } + else + { + sequences.add(snew); + hiddenSequences.adjustHeightSequenceAdded(); + } + return null; } } @@ -280,13 +303,23 @@ public class Alignment implements AlignmentI } @Override - public void finalize() + public void finalize() throws Throwable { if (getDataset() != null) { getDataset().removeAlignmentRef(); } + nullReferences(); + super.finalize(); + } + + /** + * Defensively nulls out references in case this object is not garbage + * collected + */ + void nullReferences() + { dataset = null; sequences = null; groups = null; @@ -295,41 +328,34 @@ public class Alignment implements AlignmentI } /** - * decrement the alignmentRefs counter by one and call finalize if it goes to - * zero. + * decrement the alignmentRefs counter by one and null references if it goes + * to zero. + * + * @throws Throwable */ - private void removeAlignmentRef() + private void removeAlignmentRef() throws Throwable { if (--alignmentRefs == 0) { - finalize(); + nullReferences(); } } - /** - * DOCUMENT ME! - * - * @param s - * DOCUMENT ME! - */ @Override public void deleteSequence(SequenceI s) { - deleteSequence(findIndex(s)); + synchronized (sequences) + { + deleteSequence(findIndex(s)); + } } - /** - * DOCUMENT ME! - * - * @param i - * DOCUMENT ME! - */ @Override public void deleteSequence(int i) { - if (i > -1 && i < getHeight()) + synchronized (sequences) { - synchronized (sequences) + if (i > -1 && i < getHeight()) { sequences.remove(i); hiddenSequences.adjustHeightSequenceDeleted(i); @@ -337,23 +363,36 @@ public class Alignment implements AlignmentI } } + @Override + public void deleteHiddenSequence(int i) + { + synchronized (sequences) + { + if (i > -1 && i < getHeight()) + { + sequences.remove(i); + } + } + } + /* * (non-Javadoc) * * @see jalview.datamodel.AlignmentI#findGroup(jalview.datamodel.SequenceI) */ @Override - public SequenceGroup findGroup(SequenceI s) + public SequenceGroup findGroup(SequenceI seq, int position) { synchronized (groups) { - for (int i = 0; i < this.groups.size(); i++) + for (SequenceGroup sg : groups) { - SequenceGroup sg = groups.get(i); - - if (sg.getSequences(null).contains(s)) + if (sg.getSequences(null).contains(seq)) { - return sg; + if (position >= sg.getStartRes() && position <= sg.getEndRes()) + { + return sg; + } } } } @@ -369,7 +408,7 @@ public class Alignment implements AlignmentI @Override public SequenceGroup[] findAllGroups(SequenceI s) { - ArrayList temp = new ArrayList(); + ArrayList temp = new ArrayList<>(); synchronized (groups) { @@ -420,7 +459,7 @@ public class Alignment implements AlignmentI return; } } - sg.setContext(this); + sg.setContext(this, true); groups.add(sg); } } @@ -439,7 +478,9 @@ public class Alignment implements AlignmentI return; } // remove annotation very quickly - AlignmentAnnotation[] t, todelete = new AlignmentAnnotation[annotations.length], tokeep = new AlignmentAnnotation[annotations.length]; + AlignmentAnnotation[] t, + todelete = new AlignmentAnnotation[annotations.length], + tokeep = new AlignmentAnnotation[annotations.length]; int i, p, k; if (gp == null) { @@ -497,7 +538,7 @@ public class Alignment implements AlignmentI } for (SequenceGroup sg : groups) { - sg.setContext(null); + sg.setContext(null, false); } groups.clear(); } @@ -513,7 +554,7 @@ public class Alignment implements AlignmentI { removeAnnotationForGroup(g); groups.remove(g); - g.setContext(null); + g.setContext(null, false); } } } @@ -549,11 +590,12 @@ public class Alignment implements AlignmentI int i = 0; SequenceI sq = null; String sqname = null; + int nseq = sequences.size(); if (startAfter != null) { // try to find the sequence in the alignment boolean matched = false; - while (i < sequences.size()) + while (i < nseq) { if (getSequenceAt(i++) == startAfter) { @@ -566,13 +608,13 @@ public class Alignment implements AlignmentI i = 0; } } - while (i < sequences.size()) + while (i < nseq) { sq = getSequenceAt(i); sqname = sq.getName(); if (sqname.equals(token) // exact match || (b && // allow imperfect matches - case varies - (sqname.equalsIgnoreCase(token)))) + (sqname.equalsIgnoreCase(token)))) { return getSequenceAt(i); } @@ -638,7 +680,7 @@ public class Alignment implements AlignmentI * jalview.datamodel.AlignmentI#findIndex(jalview.datamodel.SearchResults) */ @Override - public int findIndex(SearchResults results) + public int findIndex(SearchResultsI results) { int i = 0; @@ -653,35 +695,27 @@ public class Alignment implements AlignmentI return -1; } - /** - * DOCUMENT ME! - * - * @return DOCUMENT ME! - */ @Override public int getHeight() { return sequences.size(); } - /** - * DOCUMENT ME! - * - * @return DOCUMENT ME! - */ + @Override + public int getAbsoluteHeight() + { + return sequences.size() + getHiddenSequences().getSize(); + } + @Override public int getWidth() { int maxLength = -1; - + for (int i = 0; i < sequences.size(); i++) { - if (getSequenceAt(i).getLength() > maxLength) - { - maxLength = getSequenceAt(i).getLength(); - } + maxLength = Math.max(maxLength, getSequenceAt(i).getLength()); } - return maxLength; } @@ -754,6 +788,12 @@ public class Alignment implements AlignmentI return true; } + @Override + public boolean isHidden(int alignmentIndex) + { + return (getHiddenSequences().getHiddenSequence(alignmentIndex) != null); + } + /** * Delete all annotations, including auto-calculated if the flag is set true. * Returns true if at least one annotation was deleted, else false. @@ -954,29 +994,9 @@ public class Alignment implements AlignmentI } @Override - public void setNucleotide(boolean b) - { - if (b) - { - type = NUCLEOTIDE; - } - else - { - type = PROTEIN; - } - } - - @Override public boolean isNucleotide() { - if (type == NUCLEOTIDE) - { - return true; - } - else - { - return false; - } + return nucleotide; } @Override @@ -987,33 +1007,24 @@ public class Alignment implements AlignmentI } @Override - public void setDataset(Alignment data) + public void setDataset(AlignmentI data) { if (dataset == null && data == null) { - // Create a new dataset for this alignment. - // Can only be done once, if dataset is not null - // This will not be performed - SequenceI[] seqs = new SequenceI[getHeight()]; - SequenceI currentSeq; - for (int i = 0; i < getHeight(); i++) - { - currentSeq = getSequenceAt(i); - if (currentSeq.getDatasetSequence() != null) - { - seqs[i] = currentSeq.getDatasetSequence(); - } - else - { - seqs[i] = currentSeq.createDatasetSequence(); - } - } - - dataset = new Alignment(seqs); + createDatasetAlignment(); } else if (dataset == null && data != null) { - dataset = data; + if (data == this) + { + throw new IllegalArgumentException("Circular dataset reference"); + } + if (!(data instanceof Alignment)) + { + throw new Error( + "Implementation Error: jalview.datamodel.Alignment does not yet support other implementations of AlignmentI as its dataset reference"); + } + dataset = (Alignment) data; for (int i = 0; i < getHeight(); i++) { SequenceI currentSeq = getSequenceAt(i); @@ -1040,6 +1051,107 @@ public class Alignment implements AlignmentI } /** + * add dataset sequences to seq for currentSeq and any sequences it references + */ + private void resolveAndAddDatasetSeq(SequenceI currentSeq, + Set seqs, boolean createDatasetSequence) + { + SequenceI alignedSeq = currentSeq; + if (currentSeq.getDatasetSequence() != null) + { + currentSeq = currentSeq.getDatasetSequence(); + } + else + { + if (createDatasetSequence) + { + currentSeq = currentSeq.createDatasetSequence(); + } + } + + List toProcess = new ArrayList<>(); + toProcess.add(currentSeq); + while (toProcess.size() > 0) + { + // use a queue ? + SequenceI curDs = toProcess.remove(0); + + if (!seqs.add(curDs)) + { + continue; + } + // iterate over database references, making sure we add forward referenced + // sequences + if (curDs.getDBRefs() != null) + { + for (DBRefEntry dbr : curDs.getDBRefs()) + { + if (dbr.getMap() != null && dbr.getMap().getTo() != null) + { + if (dbr.getMap().getTo() == alignedSeq) + { + /* + * update mapping to be to the newly created dataset sequence + */ + dbr.getMap().setTo(currentSeq); + } + if (dbr.getMap().getTo().getDatasetSequence() != null) + { + throw new Error("Implementation error: Map.getTo() for dbref " + + dbr + " from " + curDs.getName() + + " is not a dataset sequence."); + } + // we recurse to add all forward references to dataset sequences via + // DBRefs/etc + toProcess.add(dbr.getMap().getTo()); + } + } + } + } + } + + /** + * Creates a new dataset for this alignment. Can only be done once - if + * dataset is not null this will not be performed. + */ + public void createDatasetAlignment() + { + if (dataset != null) + { + return; + } + // try to avoid using SequenceI.equals at this stage, it will be expensive + Set seqs = new LinkedIdentityHashSet<>(); + + for (int i = 0; i < getHeight(); i++) + { + SequenceI currentSeq = getSequenceAt(i); + resolveAndAddDatasetSeq(currentSeq, seqs, true); + } + + // verify all mappings are in dataset + for (AlignedCodonFrame cf : codonFrameList) + { + for (SequenceToSequenceMapping ssm : cf.getMappings()) + { + if (!seqs.contains(ssm.getFromSeq())) + { + resolveAndAddDatasetSeq(ssm.getFromSeq(), seqs, false); + } + if (!seqs.contains(ssm.getMapping().getTo())) + { + resolveAndAddDatasetSeq(ssm.getMapping().getTo(), seqs, false); + } + } + } + // finally construct dataset + dataset = new Alignment(seqs.toArray(new SequenceI[seqs.size()])); + // move mappings to the dataset alignment + dataset.codonFrameList = this.codonFrameList; + this.codonFrameList = null; + } + + /** * reference count for number of alignments referencing this one. */ int alignmentRefs = 0; @@ -1124,8 +1236,8 @@ public class Alignment implements AlignmentI current = getSequenceAt(i); // This should really be a sequence method ends[i * 2] = current.findIndex(current.getStart()); - ends[i * 2 + 1] = current.findIndex(current.getStart() - + current.getLength()); + ends[i * 2 + 1] = current + .findIndex(current.getStart() + current.getLength()); boolean hitres = false; for (int j = 0, rs = 0, ssiz = current.getLength(); j < ssiz; j++) { @@ -1215,6 +1327,12 @@ public class Alignment implements AlignmentI } @Override + public HiddenColumns getHiddenColumns() + { + return hiddenCols; + } + + @Override public CigarArray getCompactAlignment() { synchronized (sequences) @@ -1261,19 +1379,17 @@ public class Alignment implements AlignmentI return alignmentProperties; } - /* - * (non-Javadoc) - * - * @see - * jalview.datamodel.AlignmentI#addCodonFrame(jalview.datamodel.AlignedCodonFrame - * ) + /** + * Adds the given mapping to the stored set. Note this may be held on the + * dataset alignment. */ @Override public void addCodonFrame(AlignedCodonFrame codons) { - if (codons != null) + List acfs = getCodonFrames(); + if (codons != null && acfs != null && !acfs.contains(codons)) { - codonFrameList.add(codons); + acfs.add(codons); } } @@ -1290,8 +1406,8 @@ public class Alignment implements AlignmentI { return null; } - List cframes = new ArrayList(); - for (AlignedCodonFrame acf : codonFrameList) + List cframes = new ArrayList<>(); + for (AlignedCodonFrame acf : getCodonFrames()) { if (acf.involvesSequence(seq)) { @@ -1302,80 +1418,89 @@ public class Alignment implements AlignmentI } /** - * Sets the codon frame mappings (replacing any existing mappings). + * Sets the codon frame mappings (replacing any existing mappings). Note the + * mappings are set on the dataset alignment instead if there is one. * * @see jalview.datamodel.AlignmentI#setCodonFrames() */ @Override - public void setCodonFrames(Set acfs) + public void setCodonFrames(List acfs) { - this.codonFrameList = acfs; + if (dataset != null) + { + dataset.setCodonFrames(acfs); + } + else + { + this.codonFrameList = acfs; + } } /** * Returns the set of codon frame mappings. Any changes to the returned set - * will affect the alignment. + * will affect the alignment. The mappings are held on (and read from) the + * dataset alignment if there is one. * * @see jalview.datamodel.AlignmentI#getCodonFrames() */ @Override - public Set getCodonFrames() + public List getCodonFrames() { - return codonFrameList; + // TODO: Fix this method to fix failing AlignedCodonFrame tests + // this behaviour is currently incorrect. method should return codon frames + // for just the alignment, + // selected from dataset + return dataset != null ? dataset.getCodonFrames() : codonFrameList; } - /* - * (non-Javadoc) - * - * @seejalview.datamodel.AlignmentI#removeCodonFrame(jalview.datamodel. - * AlignedCodonFrame) + /** + * Removes the given mapping from the stored set. Note that the mappings are + * held on the dataset alignment if there is one. */ @Override public boolean removeCodonFrame(AlignedCodonFrame codons) { - if (codons == null || codonFrameList == null) + List acfs = getCodonFrames(); + if (codons == null || acfs == null) { return false; } - return codonFrameList.remove(codons); + return acfs.remove(codons); } @Override public void append(AlignmentI toappend) { - if (toappend == this) - { - System.err.println("Self append may cause a deadlock."); - } - // TODO test this method for a future 2.5 release + // TODO JAL-1270 needs test coverage // currently tested for use in jalview.gui.SequenceFetcher - boolean samegap = toappend.getGapCharacter() == getGapCharacter(); char oldc = toappend.getGapCharacter(); + boolean samegap = oldc == getGapCharacter(); boolean hashidden = toappend.getHiddenSequences() != null && toappend.getHiddenSequences().hiddenSequences != null; // get all sequences including any hidden ones - List sqs = (hashidden) ? toappend.getHiddenSequences() - .getFullAlignment().getSequences() : toappend.getSequences(); + List sqs = (hashidden) + ? toappend.getHiddenSequences().getFullAlignment() + .getSequences() + : toappend.getSequences(); if (sqs != null) { + // avoid self append deadlock by + List toappendsq = new ArrayList<>(); synchronized (sqs) { for (SequenceI addedsq : sqs) { if (!samegap) { - char[] oldseq = addedsq.getSequence(); - for (int c = 0; c < oldseq.length; c++) - { - if (oldseq[c] == oldc) - { - oldseq[c] = gapCharacter; - } - } + addedsq.replace(oldc, gapCharacter); } - addSequence(addedsq); + toappendsq.add(addedsq); } } + for (SequenceI addedsq : toappendsq) + { + addSequence(addedsq); + } } AlignmentAnnotation[] alan = toappend.getAlignmentAnnotation(); for (int a = 0; alan != null && a < alan.length; a++) @@ -1383,7 +1508,8 @@ public class Alignment implements AlignmentI addAnnotation(alan[a]); } - this.codonFrameList.addAll(toappend.getCodonFrames()); + // use add method + getCodonFrames().addAll(toappend.getCodonFrames()); List sg = toappend.getGroups(); if (sg != null) @@ -1430,8 +1556,8 @@ public class Alignment implements AlignmentI if (ourval instanceof String) { // append strings - this.setProperty(k, ((String) ourval) + "; " - + ((String) toapprop)); + this.setProperty(k, + ((String) ourval) + "; " + ((String) toapprop)); } else { @@ -1462,7 +1588,6 @@ public class Alignment implements AlignmentI String calcId, boolean autoCalc, SequenceI seqRef, SequenceGroup groupRef) { - assert (name != null); if (annotations != null) { for (AlignmentAnnotation annot : getAlignmentAnnotation()) @@ -1479,7 +1604,10 @@ public class Alignment implements AlignmentI AlignmentAnnotation annot = new AlignmentAnnotation(name, name, new Annotation[1], 0f, 0f, AlignmentAnnotation.BAR_GRAPH); annot.hasText = false; - annot.setCalcId(new String(calcId)); + if (calcId != null) + { + annot.setCalcId(new String(calcId)); + } annot.autoCalculated = autoCalc; if (seqRef != null) { @@ -1494,38 +1622,21 @@ public class Alignment implements AlignmentI @Override public Iterable findAnnotation(String calcId) { - ArrayList aa = new ArrayList(); - for (AlignmentAnnotation a : getAlignmentAnnotation()) + AlignmentAnnotation[] alignmentAnnotation = getAlignmentAnnotation(); + if (alignmentAnnotation != null) { - if (a.getCalcId() == calcId - || (a.getCalcId() != null && calcId != null && a.getCalcId() - .equals(calcId))) - { - aa.add(a); - } + return AlignmentAnnotation.findAnnotation( + Arrays.asList(getAlignmentAnnotation()), calcId); } - return aa; + return Arrays.asList(new AlignmentAnnotation[] {}); } - /** - * Returns an iterable collection of any annotations that match on given - * sequence ref, calcId and label (ignoring null values). - */ @Override public Iterable findAnnotations(SequenceI seq, String calcId, String label) { - ArrayList aa = new ArrayList(); - for (AlignmentAnnotation ann : getAlignmentAnnotation()) - { - if (ann.getCalcId() != null && ann.getCalcId().equals(calcId) - && ann.sequenceRef != null && ann.sequenceRef == seq - && ann.label != null && ann.label.equals(label)) - { - aa.add(ann); - } - } - return aa; + return AlignmentAnnotation.findAnnotations( + Arrays.asList(getAlignmentAnnotation()), seq, calcId, label); } @Override @@ -1595,6 +1706,7 @@ public class Alignment implements AlignmentI * * @return the representative sequence for this group */ + @Override public SequenceI getSeqrep() { return seqrep; @@ -1607,6 +1719,7 @@ public class Alignment implements AlignmentI * @param seqrep * the seqrep to set (null means no sequence representative) */ + @Override public void setSeqrep(SequenceI seqrep) { this.seqrep = seqrep; @@ -1616,6 +1729,7 @@ public class Alignment implements AlignmentI * * @return true if group has a sequence representative */ + @Override public boolean hasSeqrep() { return seqrep != null; @@ -1670,9 +1784,11 @@ public class Alignment implements AlignmentI * Parameters control whether gaps in exon (mapped) and intron (unmapped) * regions are preserved. Gaps that connect introns to exons are treated * conservatively, i.e. only preserved if both intron and exon gaps are - * preserved. + * preserved. TODO: check caveats below where the implementation fails * * @param al + * - must have same dataset, and sequences in al must have equivalent + * dataset sequence and start/end bounds under given mapping * @param preserveMappedGaps * if true, gaps within and between mapped codons are preserved * @param preserveUnmappedGaps @@ -1683,31 +1799,18 @@ public class Alignment implements AlignmentI boolean preserveUnmappedGaps) { // TODO should this method signature be the one in the interface? - int count = 0; + // JBPComment - yes - neither flag is used, so should be deleted. boolean thisIsNucleotide = this.isNucleotide(); boolean thatIsProtein = !al.isNucleotide(); if (!thatIsProtein && !thisIsNucleotide) { return AlignmentUtils.alignProteinAsDna(this, al); } - - char thisGapChar = this.getGapCharacter(); - String gap = thisIsNucleotide && thatIsProtein ? String - .valueOf(new char[] { thisGapChar, thisGapChar, thisGapChar }) - : String.valueOf(thisGapChar); - - // TODO handle intron regions? Needs a 'holistic' alignment of dna, - // not just sequence by sequence. But how to 'gap' intron regions? - - /* - * Get mappings from 'that' alignment's sequences to this. - */ - for (SequenceI alignTo : getSequences()) + else if (thatIsProtein && thisIsNucleotide) { - count += AlignmentUtils.alignSequenceAs(alignTo, al, gap, - preserveMappedGaps, preserveUnmappedGaps) ? 1 : 0; + return AlignmentUtils.alignCdsAsProtein(this, al); } - return count; + return AlignmentUtils.alignAs(this, al); } /** @@ -1717,7 +1820,7 @@ public class Alignment implements AlignmentI @Override public String toString() { - return new FastaFile().print(getSequencesArray()); + return new FastaFile().print(getSequencesArray(), true); } /** @@ -1726,7 +1829,7 @@ public class Alignment implements AlignmentI @Override public Set getSequenceNames() { - Set names = new HashSet(); + Set names = new HashSet<>(); for (SequenceI seq : getSequences()) { names.add(seq.getName()); @@ -1748,4 +1851,165 @@ public class Alignment implements AlignmentI } return hasValidSeq; } + + /** + * Update any mappings to 'virtual' sequences to compatible real ones, if + * present in the added sequences. Returns a count of mappings updated. + * + * @param seqs + * @return + */ + @Override + public int realiseMappings(List seqs) + { + int count = 0; + for (SequenceI seq : seqs) + { + for (AlignedCodonFrame mapping : getCodonFrames()) + { + count += mapping.realiseWith(seq); + } + } + return count; + } + + /** + * Returns the first AlignedCodonFrame that has a mapping between the given + * dataset sequences + * + * @param mapFrom + * @param mapTo + * @return + */ + @Override + public AlignedCodonFrame getMapping(SequenceI mapFrom, SequenceI mapTo) + { + for (AlignedCodonFrame acf : getCodonFrames()) + { + if (acf.getAaForDnaSeq(mapFrom) == mapTo) + { + return acf; + } + } + return null; + } + + @Override + public void setHiddenColumns(HiddenColumns cols) + { + hiddenCols = cols; + } + + @Override + public void setupJPredAlignment() + { + SequenceI repseq = getSequenceAt(0); + setSeqrep(repseq); + HiddenColumns cs = new HiddenColumns(); + cs.hideList(repseq.getInsertions()); + setHiddenColumns(cs); + } + + @Override + public HiddenColumns propagateInsertions(SequenceI profileseq, + AlignmentView input) + { + int profsqpos = 0; + + char gc = getGapCharacter(); + Object[] alandhidden = input.getAlignmentAndHiddenColumns(gc); + HiddenColumns nview = (HiddenColumns) alandhidden[1]; + SequenceI origseq = ((SequenceI[]) alandhidden[0])[profsqpos]; + return propagateInsertions(profileseq, origseq, nview); + } + + /** + * + * @param profileseq + * sequence in al which corresponds to origseq + * @param al + * alignment which is to have gaps inserted into it + * @param origseq + * sequence corresponding to profileseq which defines gap map for + * modifying al + */ + private HiddenColumns propagateInsertions(SequenceI profileseq, + SequenceI origseq, HiddenColumns hc) + { + // take the set of hidden columns, and the set of gaps in origseq, + // and remove all the hidden gaps from hiddenColumns + + // first get the gaps as a Bitset + // then calculate hidden ^ not(gap) + BitSet gaps = origseq.gapBitset(); + hc.andNot(gaps); + + // for each sequence in the alignment, except the profile sequence, + // insert gaps corresponding to each hidden region but where each hidden + // column region is shifted backwards by the number of preceding visible + // gaps update hidden columns at the same time + HiddenColumns newhidden = new HiddenColumns(); + + int numGapsBefore = 0; + int gapPosition = 0; + Iterator it = hc.iterator(); + while (it.hasNext()) + { + int[] region = it.next(); + + // get region coordinates accounting for gaps + // we can rely on gaps not being *in* hidden regions because we already + // removed those + while (gapPosition < region[0]) + { + gapPosition++; + if (gaps.get(gapPosition)) + { + numGapsBefore++; + } + } + + int left = region[0] - numGapsBefore; + int right = region[1] - numGapsBefore; + + newhidden.hideColumns(left, right); + padGaps(left, right, profileseq); + } + return newhidden; + } + + /** + * Pad gaps in all sequences in alignment except profileseq + * + * @param left + * position of first gap to insert + * @param right + * position of last gap to insert + * @param profileseq + * sequence not to pad + */ + private void padGaps(int left, int right, SequenceI profileseq) + { + char gc = getGapCharacter(); + + // make a string with number of gaps = length of hidden region + StringBuilder sb = new StringBuilder(); + for (int g = 0; g < right - left + 1; g++) + { + sb.append(gc); + } + + // loop over the sequences and pad with gaps where required + for (int s = 0, ns = getHeight(); s < ns; s++) + { + SequenceI sqobj = getSequenceAt(s); + if ((sqobj != profileseq) && (sqobj.getLength() >= left)) + { + String sq = sqobj.getSequenceAsString(); + sqobj.setSequence( + sq.substring(0, left) + sb.toString() + sq.substring(left)); + } + } + } + }