X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fdatamodel%2FAlignment.java;h=2289ac6314196fe08a62aa4797c8ef5897f0cc58;hb=37de9310bec3501cbc6381e0c3dcb282fcaad812;hp=4057773aefccaff82cce16f2c15012d22debcce2;hpb=fd3e9637614543e5e6f039a0ad10dd5c59b6136f;p=jalview.git diff --git a/src/jalview/datamodel/Alignment.java b/src/jalview/datamodel/Alignment.java index 4057773..2289ac6 100755 --- a/src/jalview/datamodel/Alignment.java +++ b/src/jalview/datamodel/Alignment.java @@ -1,6 +1,6 @@ /* - * Jalview - A Sequence Alignment Editor and Viewer (Version 2.8.2) - * Copyright (C) 2014 The Jalview Authors + * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) + * Copyright (C) $$Year-Rel$$ The Jalview Authors * * This file is part of Jalview. * @@ -20,13 +20,21 @@ */ package jalview.datamodel; +import jalview.analysis.AlignmentUtils; +import jalview.datamodel.AlignedCodonFrame.SequenceToSequenceMapping; +import jalview.io.FastaFile; +import jalview.util.Comparison; +import jalview.util.LinkedIdentityHashSet; import jalview.util.MessageManager; import java.util.ArrayList; +import java.util.Collections; import java.util.Enumeration; +import java.util.HashSet; import java.util.Hashtable; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.Vector; /** @@ -38,12 +46,11 @@ import java.util.Vector; */ public class Alignment implements AlignmentI { - protected Alignment dataset; + private Alignment dataset; protected List sequences; - protected List groups = java.util.Collections - .synchronizedList(new ArrayList()); + protected List groups; protected char gapCharacter = '-'; @@ -55,18 +62,21 @@ public class Alignment implements AlignmentI public boolean hasRNAStructure = false; - /** DOCUMENT ME!! */ public AlignmentAnnotation[] annotations; - HiddenSequences hiddenSequences = new HiddenSequences(this); + HiddenSequences hiddenSequences; public Hashtable alignmentProperties; + private List codonFrameList; + private void initAlignment(SequenceI[] seqs) { - int i = 0; + groups = Collections.synchronizedList(new ArrayList()); + hiddenSequences = new HiddenSequences(this); + codonFrameList = new ArrayList(); - if (jalview.util.Comparison.isNucleotide(seqs)) + if (Comparison.isNucleotide(seqs)) { type = NUCLEOTIDE; } @@ -75,10 +85,9 @@ public class Alignment implements AlignmentI type = PROTEIN; } - sequences = java.util.Collections - .synchronizedList(new ArrayList()); + sequences = Collections.synchronizedList(new ArrayList()); - for (i = 0; i < seqs.length; i++) + for (int i = 0; i < seqs.length; i++) { sequences.add(seqs[i]); } @@ -86,6 +95,29 @@ public class Alignment implements AlignmentI } /** + * Make a 'copy' alignment - sequences have new copies of features and + * annotations, but share the original dataset sequences. + */ + public Alignment(AlignmentI al) + { + SequenceI[] seqs = al.getSequencesArray(); + for (int i = 0; i < seqs.length; i++) + { + seqs[i] = new Sequence(seqs[i]); + } + + initAlignment(seqs); + + /* + * Share the same dataset sequence mappings (if any). + */ + if (dataset == null && al.getDataset() == null) + { + this.setCodonFrames(al.getCodonFrames()); + } + } + + /** * Make an alignment from an array of Sequences. * * @param sequences @@ -119,15 +151,12 @@ public class Alignment implements AlignmentI */ public static AlignmentI createAlignment(CigarArray compactAlignment) { - throw new Error(MessageManager.getString("error.alignment_cigararray_not_implemented")); + throw new Error( + MessageManager + .getString("error.alignment_cigararray_not_implemented")); // this(compactAlignment.refCigars); } - /** - * DOCUMENT ME! - * - * @return DOCUMENT ME! - */ @Override public List getSequences() { @@ -157,6 +186,17 @@ public class Alignment implements AlignmentI } /** + * Returns a map of lists of sequences keyed by sequence name. + * + * @return + */ + @Override + public Map> getSequencesByName() + { + return AlignmentUtils.getSequencesByName(this); + } + + /** * DOCUMENT ME! * * @param i @@ -187,23 +227,25 @@ public class Alignment implements AlignmentI { if (dataset != null) { + // maintain dataset integrity - if (snew.getDatasetSequence() != null) - { - getDataset().addSequence(snew.getDatasetSequence()); - } - else + SequenceI dsseq = snew.getDatasetSequence(); + if (dsseq == null) { // derive new sequence SequenceI adding = snew.deriveSequence(); - getDataset().addSequence(adding.getDatasetSequence()); snew = adding; + dsseq = snew.getDatasetSequence(); + } + if (getDataset().findIndex(dsseq) == -1) + { + getDataset().addSequence(dsseq); } + } if (sequences == null) { - initAlignment(new SequenceI[] - { snew }); + initAlignment(new SequenceI[] { snew }); } else { @@ -218,19 +260,22 @@ public class Alignment implements AlignmentI } } - /** - * Adds a sequence to the alignment. Recalculates maxLength and size. - * - * @param snew - */ @Override - public void setSequenceAt(int i, SequenceI snew) + public SequenceI replaceSequenceAt(int i, SequenceI snew) { - SequenceI oldseq = getSequenceAt(i); - deleteSequence(i); synchronized (sequences) { - sequences.set(i, snew); + if (sequences.size() > i) + { + return sequences.set(i, snew); + + } + else + { + sequences.add(snew); + hiddenSequences.adjustHeightSequenceAdded(); + } + return null; } } @@ -246,13 +291,23 @@ public class Alignment implements AlignmentI } @Override - public void finalize() + public void finalize() throws Throwable { if (getDataset() != null) { getDataset().removeAlignmentRef(); } + nullReferences(); + super.finalize(); + } + + /** + * Defensively nulls out references in case this object is not garbage + * collected + */ + void nullReferences() + { dataset = null; sequences = null; groups = null; @@ -261,14 +316,16 @@ public class Alignment implements AlignmentI } /** - * decrement the alignmentRefs counter by one and call finalize if it goes to - * zero. + * decrement the alignmentRefs counter by one and null references if it goes + * to zero. + * + * @throws Throwable */ - private void removeAlignmentRef() + private void removeAlignmentRef() throws Throwable { if (--alignmentRefs == 0) { - finalize(); + nullReferences(); } } @@ -298,8 +355,8 @@ public class Alignment implements AlignmentI synchronized (sequences) { sequences.remove(i); + hiddenSequences.adjustHeightSequenceDeleted(i); } - hiddenSequences.adjustHeightSequenceDeleted(i); } } @@ -343,14 +400,14 @@ public class Alignment implements AlignmentI for (int i = 0; i < gSize; i++) { SequenceGroup sg = groups.get(i); - if (sg == null || sg.getSequences(null) == null) + if (sg == null || sg.getSequences() == null) { this.deleteGroup(sg); gSize--; continue; } - if (sg.getSequences(null).contains(s)) + if (sg.getSequences().contains(s)) { temp.add(sg); } @@ -720,6 +777,28 @@ public class Alignment implements AlignmentI return true; } + /** + * Delete all annotations, including auto-calculated if the flag is set true. + * Returns true if at least one annotation was deleted, else false. + * + * @param includingAutoCalculated + * @return + */ + @Override + public boolean deleteAllAnnotations(boolean includingAutoCalculated) + { + boolean result = false; + for (AlignmentAnnotation alan : getAlignmentAnnotation()) + { + if (!alan.autoCalculated || includingAutoCalculated) + { + deleteAnnotation(alan); + result = true; + } + } + return result; + } + /* * (non-Javadoc) * @@ -931,33 +1010,20 @@ public class Alignment implements AlignmentI } @Override - public void setDataset(Alignment data) + public void setDataset(AlignmentI data) { if (dataset == null && data == null) { - // Create a new dataset for this alignment. - // Can only be done once, if dataset is not null - // This will not be performed - SequenceI[] seqs = new SequenceI[getHeight()]; - SequenceI currentSeq; - for (int i = 0; i < getHeight(); i++) - { - currentSeq = getSequenceAt(i); - if (currentSeq.getDatasetSequence() != null) - { - seqs[i] = currentSeq.getDatasetSequence(); - } - else - { - seqs[i] = currentSeq.createDatasetSequence(); - } - } - - dataset = new Alignment(seqs); + createDatasetAlignment(); } else if (dataset == null && data != null) { - dataset = data; + if (!(data instanceof Alignment)) + { + throw new Error( + "Implementation Error: jalview.datamodel.Alignment does not yet support other implementations of AlignmentI as its dataset reference"); + } + dataset = (Alignment) data; for (int i = 0; i < getHeight(); i++) { SequenceI currentSeq = getSequenceAt(i); @@ -984,6 +1050,111 @@ public class Alignment implements AlignmentI } /** + * add dataset sequences to seq for currentSeq and any sequences it references + */ + private void resolveAndAddDatasetSeq(SequenceI currentSeq, + Set seqs, boolean createDatasetSequence) + { + SequenceI alignedSeq = currentSeq; + if (currentSeq.getDatasetSequence() != null) + { + currentSeq = currentSeq.getDatasetSequence(); + } + else + { + if (createDatasetSequence) + { + currentSeq = currentSeq.createDatasetSequence(); + } + } + if (seqs.contains(currentSeq)) + { + return; + } + List toProcess = new ArrayList(); + toProcess.add(currentSeq); + while (toProcess.size() > 0) + { + // use a queue ? + SequenceI curDs = toProcess.remove(0); + if (seqs.contains(curDs)) + { + continue; + } + seqs.add(curDs); + // iterate over database references, making sure we add forward referenced + // sequences + if (curDs.getDBRefs() != null) + { + for (DBRefEntry dbr : curDs.getDBRefs()) + { + if (dbr.getMap() != null && dbr.getMap().getTo() != null) + { + if (dbr.getMap().getTo() == alignedSeq) + { + /* + * update mapping to be to the newly created dataset sequence + */ + dbr.getMap().setTo(currentSeq); + } + if (dbr.getMap().getTo().getDatasetSequence() != null) + { + throw new Error( + "Implementation error: Map.getTo() for dbref " + dbr + + " from " + curDs.getName() + + " is not a dataset sequence."); + } + // we recurse to add all forward references to dataset sequences via + // DBRefs/etc + toProcess.add(dbr.getMap().getTo()); + } + } + } + } + } + + /** + * Creates a new dataset for this alignment. Can only be done once - if + * dataset is not null this will not be performed. + */ + public void createDatasetAlignment() + { + if (dataset != null) + { + return; + } + // try to avoid using SequenceI.equals at this stage, it will be expensive + Set seqs = new LinkedIdentityHashSet(); + + for (int i = 0; i < getHeight(); i++) + { + SequenceI currentSeq = getSequenceAt(i); + resolveAndAddDatasetSeq(currentSeq, seqs, true); + } + + // verify all mappings are in dataset + for (AlignedCodonFrame cf : codonFrameList) + { + for (SequenceToSequenceMapping ssm : cf.getMappings()) + { + if (!seqs.contains(ssm.getFromSeq())) + { + resolveAndAddDatasetSeq(ssm.getFromSeq(), seqs, false); + } + if (!seqs.contains(ssm.getMapping().getTo())) + { + resolveAndAddDatasetSeq(ssm.getMapping().getTo(), seqs, false); + } + } + } + // finally construct dataset + dataset = new Alignment(seqs.toArray(new SequenceI[seqs.size()])); + // move mappings to the dataset alignment + dataset.codonFrameList = this.codonFrameList; + this.codonFrameList = null; + } + + /** * reference count for number of alignments referencing this one. */ int alignmentRefs = 0; @@ -1205,43 +1376,18 @@ public class Alignment implements AlignmentI return alignmentProperties; } - AlignedCodonFrame[] codonFrameList = null; - - /* - * (non-Javadoc) - * - * @see - * jalview.datamodel.AlignmentI#addCodonFrame(jalview.datamodel.AlignedCodonFrame - * ) + /** + * Adds the given mapping to the stored set. Note this may be held on the + * dataset alignment. */ @Override public void addCodonFrame(AlignedCodonFrame codons) { - if (codons == null) + List acfs = getCodonFrames(); + if (codons != null && acfs != null && !acfs.contains(codons)) { - return; + acfs.add(codons); } - if (codonFrameList == null) - { - codonFrameList = new AlignedCodonFrame[] - { codons }; - return; - } - AlignedCodonFrame[] t = new AlignedCodonFrame[codonFrameList.length + 1]; - System.arraycopy(codonFrameList, 0, t, 0, codonFrameList.length); - t[codonFrameList.length] = codons; - codonFrameList = t; - } - - /* - * (non-Javadoc) - * - * @see jalview.datamodel.AlignmentI#getCodonFrame(int) - */ - @Override - public AlignedCodonFrame getCodonFrame(int index) - { - return codonFrameList[index]; } /* @@ -1251,83 +1397,78 @@ public class Alignment implements AlignmentI * jalview.datamodel.AlignmentI#getCodonFrame(jalview.datamodel.SequenceI) */ @Override - public AlignedCodonFrame[] getCodonFrame(SequenceI seq) + public List getCodonFrame(SequenceI seq) { - if (seq == null || codonFrameList == null) + if (seq == null) { return null; } - Vector cframes = new Vector(); - for (int f = 0; f < codonFrameList.length; f++) + List cframes = new ArrayList(); + for (AlignedCodonFrame acf : getCodonFrames()) { - if (codonFrameList[f].involvesSequence(seq)) + if (acf.involvesSequence(seq)) { - cframes.addElement(codonFrameList[f]); + cframes.add(acf); } } - if (cframes.size() == 0) + return cframes; + } + + /** + * Sets the codon frame mappings (replacing any existing mappings). Note the + * mappings are set on the dataset alignment instead if there is one. + * + * @see jalview.datamodel.AlignmentI#setCodonFrames() + */ + @Override + public void setCodonFrames(List acfs) + { + if (dataset != null) + { + dataset.setCodonFrames(acfs); + } + else { - return null; + this.codonFrameList = acfs; } - AlignedCodonFrame[] cfr = new AlignedCodonFrame[cframes.size()]; - cframes.copyInto(cfr); - return cfr; } - /* - * (non-Javadoc) + /** + * Returns the set of codon frame mappings. Any changes to the returned set + * will affect the alignment. The mappings are held on (and read from) the + * dataset alignment if there is one. * * @see jalview.datamodel.AlignmentI#getCodonFrames() */ @Override - public AlignedCodonFrame[] getCodonFrames() + public List getCodonFrames() { - return codonFrameList; + // TODO: Fix this method to fix failing AlignedCodonFrame tests + // this behaviour is currently incorrect. method should return codon frames + // for just the alignment, + // selected from dataset + return dataset != null ? dataset.getCodonFrames() : codonFrameList; } - /* - * (non-Javadoc) - * - * @seejalview.datamodel.AlignmentI#removeCodonFrame(jalview.datamodel. - * AlignedCodonFrame) + /** + * Removes the given mapping from the stored set. Note that the mappings are + * held on the dataset alignment if there is one. */ @Override public boolean removeCodonFrame(AlignedCodonFrame codons) { - if (codons == null || codonFrameList == null) + List acfs = getCodonFrames(); + if (codons == null || acfs == null) { return false; } - boolean removed = false; - int i = 0, iSize = codonFrameList.length; - while (i < iSize) - { - if (codonFrameList[i] == codons) - { - removed = true; - if (i + 1 < iSize) - { - System.arraycopy(codonFrameList, i + 1, codonFrameList, i, iSize - - i - 1); - } - iSize--; - } - else - { - i++; - } - } - return removed; + return acfs.remove(codons); } @Override public void append(AlignmentI toappend) { - if (toappend == this) - { - System.err.println("Self append may cause a deadlock."); - } - // TODO test this method for a future 2.5 release + // TODO JAL-1270 needs test coverage // currently tested for use in jalview.gui.SequenceFetcher boolean samegap = toappend.getGapCharacter() == getGapCharacter(); char oldc = toappend.getGapCharacter(); @@ -1338,6 +1479,8 @@ public class Alignment implements AlignmentI .getFullAlignment().getSequences() : toappend.getSequences(); if (sqs != null) { + // avoid self append deadlock by + List toappendsq = new ArrayList(); synchronized (sqs) { for (SequenceI addedsq : sqs) @@ -1353,20 +1496,23 @@ public class Alignment implements AlignmentI } } } - addSequence(addedsq); + toappendsq.add(addedsq); } } + for (SequenceI addedsq : toappendsq) + { + addSequence(addedsq); + } } AlignmentAnnotation[] alan = toappend.getAlignmentAnnotation(); for (int a = 0; alan != null && a < alan.length; a++) { addAnnotation(alan[a]); } - AlignedCodonFrame[] acod = toappend.getCodonFrames(); - for (int a = 0; acod != null && a < acod.length; a++) - { - this.addCodonFrame(acod[a]); - } + + // use add method + getCodonFrames().addAll(toappend.getCodonFrames()); + List sg = toappend.getGroups(); if (sg != null) { @@ -1489,6 +1635,27 @@ public class Alignment implements AlignmentI return aa; } + /** + * Returns an iterable collection of any annotations that match on given + * sequence ref, calcId and label (ignoring null values). + */ + @Override + public Iterable findAnnotations(SequenceI seq, + String calcId, String label) + { + ArrayList aa = new ArrayList(); + for (AlignmentAnnotation ann : getAlignmentAnnotation()) + { + if (ann.getCalcId() != null && ann.getCalcId().equals(calcId) + && ann.sequenceRef != null && ann.sequenceRef == seq + && ann.label != null && ann.label.equals(label)) + { + aa.add(ann); + } + } + return aa; + } + @Override public void moveSelectedSequencesByOne(SequenceGroup sg, Map map, boolean up) @@ -1550,6 +1717,41 @@ public class Alignment implements AlignmentI } } + private SequenceI seqrep = null; + + /** + * + * @return the representative sequence for this group + */ + @Override + public SequenceI getSeqrep() + { + return seqrep; + } + + /** + * set the representative sequence for this group. Note - this affects the + * interpretation of the Hidereps attribute. + * + * @param seqrep + * the seqrep to set (null means no sequence representative) + */ + @Override + public void setSeqrep(SequenceI seqrep) + { + this.seqrep = seqrep; + } + + /** + * + * @return true if group has a sequence representative + */ + @Override + public boolean hasSeqrep() + { + return seqrep != null; + } + @Override public int getEndRes() { @@ -1573,4 +1775,175 @@ public class Alignment implements AlignmentI { return dataset; } + + /** + * Align this alignment like the given (mapped) one. + */ + @Override + public int alignAs(AlignmentI al) + { + /* + * Currently retains unmapped gaps (in introns), regaps mapped regions + * (exons) + */ + return alignAs(al, false, true); + } + + /** + * Align this alignment 'the same as' the given one. Mapped sequences only are + * realigned. If both of the same type (nucleotide/protein) then align both + * identically. If this is nucleotide and the other is protein, make 3 gaps + * for each gap in the protein sequences. If this is protein and the other is + * nucleotide, insert a gap for each 3 gaps (or part thereof) between + * nucleotide bases. If this is protein and the other is nucleotide, gaps + * protein to match the relative ordering of codons in the nucleotide. + * + * Parameters control whether gaps in exon (mapped) and intron (unmapped) + * regions are preserved. Gaps that connect introns to exons are treated + * conservatively, i.e. only preserved if both intron and exon gaps are + * preserved. TODO: check caveats below where the implementation fails + * + * @param al + * - must have same dataset, and sequences in al must have equivalent + * dataset sequence and start/end bounds under given mapping + * @param preserveMappedGaps + * if true, gaps within and between mapped codons are preserved + * @param preserveUnmappedGaps + * if true, gaps within and between unmapped codons are preserved + */ + // @Override + public int alignAs(AlignmentI al, boolean preserveMappedGaps, + boolean preserveUnmappedGaps) + { + // TODO should this method signature be the one in the interface? + // JBPComment - yes - neither flag is used, so should be deleted. + boolean thisIsNucleotide = this.isNucleotide(); + boolean thatIsProtein = !al.isNucleotide(); + if (!thatIsProtein && !thisIsNucleotide) + { + return AlignmentUtils.alignProteinAsDna(this, al); + } + else if (thatIsProtein && thisIsNucleotide) + { + return AlignmentUtils.alignCdsAsProtein(this, al); + } + return AlignmentUtils.alignAs(this, al); + } + + /** + * Returns the alignment in Fasta format. Behaviour of this method is not + * guaranteed between versions. + */ + @Override + public String toString() + { + return new FastaFile().print(getSequencesArray()); + } + + /** + * Returns the set of distinct sequence names. No ordering is guaranteed. + */ + @Override + public Set getSequenceNames() + { + Set names = new HashSet(); + for (SequenceI seq : getSequences()) + { + names.add(seq.getName()); + } + return names; + } + + @Override + public boolean hasValidSequence() + { + boolean hasValidSeq = false; + for (SequenceI seq : getSequences()) + { + if ((seq.getEnd() - seq.getStart()) > 0) + { + hasValidSeq = true; + break; + } + } + return hasValidSeq; + } + + /** + * Update any mappings to 'virtual' sequences to compatible real ones, if + * present in the added sequences. Returns a count of mappings updated. + * + * @param seqs + * @return + */ + @Override + public int realiseMappings(List seqs) + { + int count = 0; + for (SequenceI seq : seqs) + { + for (AlignedCodonFrame mapping : getCodonFrames()) + { + count += mapping.realiseWith(seq); + } + } + return count; + } + + /** + * Returns the first AlignedCodonFrame that has a mapping between the given + * dataset sequences + * + * @param mapFrom + * @param mapTo + * @return + */ + @Override + public AlignedCodonFrame getMapping(SequenceI mapFrom, SequenceI mapTo) + { + for (AlignedCodonFrame acf : getCodonFrames()) + { + if (acf.getAaForDnaSeq(mapFrom) == mapTo) + { + return acf; + } + } + return null; + } + + @Override + public int[] getVisibleStartAndEndIndex(List hiddenCols) + { + int[] alignmentStartEnd = new int[] { 0, getWidth() - 1 }; + int startPos = alignmentStartEnd[0]; + int endPos = alignmentStartEnd[1]; + + int[] lowestRange = new int[] { -1, -1 }; + int[] higestRange = new int[] { -1, -1 }; + + for (int[] hiddenCol : hiddenCols) + { + lowestRange = (hiddenCol[0] <= startPos) ? hiddenCol : lowestRange; + higestRange = (hiddenCol[1] >= endPos) ? hiddenCol : higestRange; + } + + if (lowestRange[0] == -1 && lowestRange[1] == -1) + { + startPos = alignmentStartEnd[0]; + } + else + { + startPos = lowestRange[1] + 1; + } + + if (higestRange[0] == -1 && higestRange[1] == -1) + { + endPos = alignmentStartEnd[1]; + } + else + { + endPos = higestRange[0] - 1; + } + return new int[] { startPos, endPos }; + } }