X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fdatamodel%2FAlignment.java;h=90bdcae2ee318ae29b290bbb5ea1e5a145b2389a;hb=d2177decb24a93432ad55175b2123741bb3d8ac7;hp=3f9f03ff230b2d5d44282896a18bde9f4bd697b1;hpb=2acdd7fdaa575dd5238585ad86597f808b30d281;p=jalview.git diff --git a/src/jalview/datamodel/Alignment.java b/src/jalview/datamodel/Alignment.java index 3f9f03f..90bdcae 100755 --- a/src/jalview/datamodel/Alignment.java +++ b/src/jalview/datamodel/Alignment.java @@ -21,14 +21,17 @@ package jalview.datamodel; import jalview.analysis.AlignmentUtils; +import jalview.datamodel.AlignedCodonFrame.SequenceToSequenceMapping; import jalview.io.FastaFile; +import jalview.util.Comparison; +import jalview.util.LinkedIdentityHashSet; import jalview.util.MessageManager; import java.util.ArrayList; +import java.util.Collections; import java.util.Enumeration; import java.util.HashSet; import java.util.Hashtable; -import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Set; @@ -43,12 +46,11 @@ import java.util.Vector; */ public class Alignment implements AlignmentI { - protected Alignment dataset; + private Alignment dataset; protected List sequences; - protected List groups = java.util.Collections - .synchronizedList(new ArrayList()); + protected List groups; protected char gapCharacter = '-'; @@ -60,20 +62,21 @@ public class Alignment implements AlignmentI public boolean hasRNAStructure = false; - /** DOCUMENT ME!! */ public AlignmentAnnotation[] annotations; - HiddenSequences hiddenSequences = new HiddenSequences(this); + HiddenSequences hiddenSequences; public Hashtable alignmentProperties; - private Set codonFrameList = new LinkedHashSet(); + private List codonFrameList; private void initAlignment(SequenceI[] seqs) { - int i = 0; + groups = Collections.synchronizedList(new ArrayList()); + hiddenSequences = new HiddenSequences(this); + codonFrameList = new ArrayList(); - if (jalview.util.Comparison.isNucleotide(seqs)) + if (Comparison.isNucleotide(seqs)) { type = NUCLEOTIDE; } @@ -82,10 +85,9 @@ public class Alignment implements AlignmentI type = PROTEIN; } - sequences = java.util.Collections - .synchronizedList(new ArrayList()); + sequences = Collections.synchronizedList(new ArrayList()); - for (i = 0; i < seqs.length; i++) + for (int i = 0; i < seqs.length; i++) { sequences.add(seqs[i]); } @@ -104,13 +106,15 @@ public class Alignment implements AlignmentI seqs[i] = new Sequence(seqs[i]); } + initAlignment(seqs); + /* - * Share the same dataset sequence mappings (if any). TODO: find a better - * place for these to live (alignment dataset?). + * Share the same dataset sequence mappings (if any). */ - this.setCodonFrames(al.getCodonFrames()); - - initAlignment(seqs); + if (dataset == null && al.getDataset() == null) + { + this.setCodonFrames(al.getCodonFrames()); + } } /** @@ -223,18 +227,21 @@ public class Alignment implements AlignmentI { if (dataset != null) { + // maintain dataset integrity - if (snew.getDatasetSequence() != null) - { - getDataset().addSequence(snew.getDatasetSequence()); - } - else + SequenceI dsseq = snew.getDatasetSequence(); + if (dsseq == null) { // derive new sequence SequenceI adding = snew.deriveSequence(); - getDataset().addSequence(adding.getDatasetSequence()); snew = adding; + dsseq = snew.getDatasetSequence(); + } + if (getDataset().findIndex(dsseq) == -1) + { + getDataset().addSequence(dsseq); } + } if (sequences == null) { @@ -253,18 +260,22 @@ public class Alignment implements AlignmentI } } - /** - * Adds a sequence to the alignment. Recalculates maxLength and size. - * - * @param snew - */ @Override - public void setSequenceAt(int i, SequenceI snew) + public SequenceI replaceSequenceAt(int i, SequenceI snew) { synchronized (sequences) { - deleteSequence(i); - sequences.set(i, snew); + if (sequences.size() > i) + { + return sequences.set(i, snew); + + } + else + { + sequences.add(snew); + hiddenSequences.adjustHeightSequenceAdded(); + } + return null; } } @@ -280,13 +291,23 @@ public class Alignment implements AlignmentI } @Override - public void finalize() + public void finalize() throws Throwable { if (getDataset() != null) { getDataset().removeAlignmentRef(); } + nullReferences(); + super.finalize(); + } + + /** + * Defensively nulls out references in case this object is not garbage + * collected + */ + void nullReferences() + { dataset = null; sequences = null; groups = null; @@ -295,14 +316,16 @@ public class Alignment implements AlignmentI } /** - * decrement the alignmentRefs counter by one and call finalize if it goes to - * zero. + * decrement the alignmentRefs counter by one and null references if it goes + * to zero. + * + * @throws Throwable */ - private void removeAlignmentRef() + private void removeAlignmentRef() throws Throwable { if (--alignmentRefs == 0) { - finalize(); + nullReferences(); } } @@ -343,17 +366,18 @@ public class Alignment implements AlignmentI * @see jalview.datamodel.AlignmentI#findGroup(jalview.datamodel.SequenceI) */ @Override - public SequenceGroup findGroup(SequenceI s) + public SequenceGroup findGroup(SequenceI seq, int position) { synchronized (groups) { - for (int i = 0; i < this.groups.size(); i++) + for (SequenceGroup sg : groups) { - SequenceGroup sg = groups.get(i); - - if (sg.getSequences(null).contains(s)) + if (sg.getSequences(null).contains(seq)) { - return sg; + if (position >= sg.getStartRes() && position <= sg.getEndRes()) + { + return sg; + } } } } @@ -638,7 +662,7 @@ public class Alignment implements AlignmentI * jalview.datamodel.AlignmentI#findIndex(jalview.datamodel.SearchResults) */ @Override - public int findIndex(SearchResults results) + public int findIndex(SearchResultsI results) { int i = 0; @@ -987,7 +1011,7 @@ public class Alignment implements AlignmentI } @Override - public void setDataset(Alignment data) + public void setDataset(AlignmentI data) { if (dataset == null && data == null) { @@ -995,7 +1019,12 @@ public class Alignment implements AlignmentI } else if (dataset == null && data != null) { - dataset = data; + if (!(data instanceof Alignment)) + { + throw new Error( + "Implementation Error: jalview.datamodel.Alignment does not yet support other implementations of AlignmentI as its dataset reference"); + } + dataset = (Alignment) data; for (int i = 0; i < getHeight(); i++) { SequenceI currentSeq = getSequenceAt(i); @@ -1022,6 +1051,70 @@ public class Alignment implements AlignmentI } /** + * add dataset sequences to seq for currentSeq and any sequences it references + */ + private void resolveAndAddDatasetSeq(SequenceI currentSeq, + Set seqs, boolean createDatasetSequence) + { + SequenceI alignedSeq = currentSeq; + if (currentSeq.getDatasetSequence() != null) + { + currentSeq = currentSeq.getDatasetSequence(); + } + else + { + if (createDatasetSequence) + { + currentSeq = currentSeq.createDatasetSequence(); + } + } + if (seqs.contains(currentSeq)) + { + return; + } + List toProcess = new ArrayList(); + toProcess.add(currentSeq); + while (toProcess.size() > 0) + { + // use a queue ? + SequenceI curDs = toProcess.remove(0); + if (seqs.contains(curDs)) + { + continue; + } + seqs.add(curDs); + // iterate over database references, making sure we add forward referenced + // sequences + if (curDs.getDBRefs() != null) + { + for (DBRefEntry dbr : curDs.getDBRefs()) + { + if (dbr.getMap() != null && dbr.getMap().getTo() != null) + { + if (dbr.getMap().getTo() == alignedSeq) + { + /* + * update mapping to be to the newly created dataset sequence + */ + dbr.getMap().setTo(currentSeq); + } + if (dbr.getMap().getTo().getDatasetSequence() != null) + { + throw new Error( + "Implementation error: Map.getTo() for dbref " + dbr + + " from " + curDs.getName() + + " is not a dataset sequence."); + } + // we recurse to add all forward references to dataset sequences via + // DBRefs/etc + toProcess.add(dbr.getMap().getTo()); + } + } + } + } + } + + /** * Creates a new dataset for this alignment. Can only be done once - if * dataset is not null this will not be performed. */ @@ -1031,22 +1124,32 @@ public class Alignment implements AlignmentI { return; } - SequenceI[] seqs = new SequenceI[getHeight()]; - SequenceI currentSeq; + // try to avoid using SequenceI.equals at this stage, it will be expensive + Set seqs = new LinkedIdentityHashSet(); + for (int i = 0; i < getHeight(); i++) { - currentSeq = getSequenceAt(i); - if (currentSeq.getDatasetSequence() != null) - { - seqs[i] = currentSeq.getDatasetSequence(); - } - else + SequenceI currentSeq = getSequenceAt(i); + resolveAndAddDatasetSeq(currentSeq, seqs, true); + } + + // verify all mappings are in dataset + for (AlignedCodonFrame cf : codonFrameList) + { + for (SequenceToSequenceMapping ssm : cf.getMappings()) { - seqs[i] = currentSeq.createDatasetSequence(); + if (!seqs.contains(ssm.getFromSeq())) + { + resolveAndAddDatasetSeq(ssm.getFromSeq(), seqs, false); + } + if (!seqs.contains(ssm.getMapping().getTo())) + { + resolveAndAddDatasetSeq(ssm.getMapping().getTo(), seqs, false); + } } } - - dataset = new Alignment(seqs); + // finally construct dataset + dataset = new Alignment(seqs.toArray(new SequenceI[seqs.size()])); // move mappings to the dataset alignment dataset.codonFrameList = this.codonFrameList; this.codonFrameList = null; @@ -1281,8 +1384,8 @@ public class Alignment implements AlignmentI @Override public void addCodonFrame(AlignedCodonFrame codons) { - Set acfs = getCodonFrames(); - if (codons != null && acfs != null) + List acfs = getCodonFrames(); + if (codons != null && acfs != null && !acfs.contains(codons)) { acfs.add(codons); } @@ -1319,7 +1422,7 @@ public class Alignment implements AlignmentI * @see jalview.datamodel.AlignmentI#setCodonFrames() */ @Override - public void setCodonFrames(Set acfs) + public void setCodonFrames(List acfs) { if (dataset != null) { @@ -1339,8 +1442,12 @@ public class Alignment implements AlignmentI * @see jalview.datamodel.AlignmentI#getCodonFrames() */ @Override - public Set getCodonFrames() + public List getCodonFrames() { + // TODO: Fix this method to fix failing AlignedCodonFrame tests + // this behaviour is currently incorrect. method should return codon frames + // for just the alignment, + // selected from dataset return dataset != null ? dataset.getCodonFrames() : codonFrameList; } @@ -1351,7 +1458,7 @@ public class Alignment implements AlignmentI @Override public boolean removeCodonFrame(AlignedCodonFrame codons) { - Set acfs = getCodonFrames(); + List acfs = getCodonFrames(); if (codons == null || acfs == null) { return false; @@ -1362,11 +1469,7 @@ public class Alignment implements AlignmentI @Override public void append(AlignmentI toappend) { - if (toappend == this) - { - System.err.println("Self append may cause a deadlock."); - } - // TODO test this method for a future 2.5 release + // TODO JAL-1270 needs test coverage // currently tested for use in jalview.gui.SequenceFetcher boolean samegap = toappend.getGapCharacter() == getGapCharacter(); char oldc = toappend.getGapCharacter(); @@ -1377,6 +1480,8 @@ public class Alignment implements AlignmentI .getFullAlignment().getSequences() : toappend.getSequences(); if (sqs != null) { + // avoid self append deadlock by + List toappendsq = new ArrayList(); synchronized (sqs) { for (SequenceI addedsq : sqs) @@ -1392,9 +1497,13 @@ public class Alignment implements AlignmentI } } } - addSequence(addedsq); + toappendsq.add(addedsq); } } + for (SequenceI addedsq : toappendsq) + { + addSequence(addedsq); + } } AlignmentAnnotation[] alan = toappend.getAlignmentAnnotation(); for (int a = 0; alan != null && a < alan.length; a++) @@ -1402,6 +1511,7 @@ public class Alignment implements AlignmentI addAnnotation(alan[a]); } + // use add method getCodonFrames().addAll(toappend.getCodonFrames()); List sg = toappend.getGroups(); @@ -1692,9 +1802,11 @@ public class Alignment implements AlignmentI * Parameters control whether gaps in exon (mapped) and intron (unmapped) * regions are preserved. Gaps that connect introns to exons are treated * conservatively, i.e. only preserved if both intron and exon gaps are - * preserved. + * preserved. TODO: check caveats below where the implementation fails * * @param al + * - must have same dataset, and sequences in al must have equivalent + * dataset sequence and start/end bounds under given mapping * @param preserveMappedGaps * if true, gaps within and between mapped codons are preserved * @param preserveUnmappedGaps @@ -1705,31 +1817,18 @@ public class Alignment implements AlignmentI boolean preserveUnmappedGaps) { // TODO should this method signature be the one in the interface? - int count = 0; + // JBPComment - yes - neither flag is used, so should be deleted. boolean thisIsNucleotide = this.isNucleotide(); boolean thatIsProtein = !al.isNucleotide(); if (!thatIsProtein && !thisIsNucleotide) { return AlignmentUtils.alignProteinAsDna(this, al); } - - char thisGapChar = this.getGapCharacter(); - String gap = thisIsNucleotide && thatIsProtein ? String - .valueOf(new char[] { thisGapChar, thisGapChar, thisGapChar }) - : String.valueOf(thisGapChar); - - // TODO handle intron regions? Needs a 'holistic' alignment of dna, - // not just sequence by sequence. But how to 'gap' intron regions? - - /* - * Get mappings from 'that' alignment's sequences to this. - */ - for (SequenceI alignTo : getSequences()) + else if (thatIsProtein && thisIsNucleotide) { - count += AlignmentUtils.alignSequenceAs(alignTo, al, gap, - preserveMappedGaps, preserveUnmappedGaps) ? 1 : 0; + return AlignmentUtils.alignCdsAsProtein(this, al); } - return count; + return AlignmentUtils.alignAs(this, al); } /** @@ -1739,7 +1838,7 @@ public class Alignment implements AlignmentI @Override public String toString() { - return new FastaFile().print(getSequencesArray()); + return new FastaFile().print(getSequencesArray(), true); } /** @@ -1770,4 +1869,82 @@ public class Alignment implements AlignmentI } return hasValidSeq; } + + /** + * Update any mappings to 'virtual' sequences to compatible real ones, if + * present in the added sequences. Returns a count of mappings updated. + * + * @param seqs + * @return + */ + @Override + public int realiseMappings(List seqs) + { + int count = 0; + for (SequenceI seq : seqs) + { + for (AlignedCodonFrame mapping : getCodonFrames()) + { + count += mapping.realiseWith(seq); + } + } + return count; + } + + /** + * Returns the first AlignedCodonFrame that has a mapping between the given + * dataset sequences + * + * @param mapFrom + * @param mapTo + * @return + */ + @Override + public AlignedCodonFrame getMapping(SequenceI mapFrom, SequenceI mapTo) + { + for (AlignedCodonFrame acf : getCodonFrames()) + { + if (acf.getAaForDnaSeq(mapFrom) == mapTo) + { + return acf; + } + } + return null; + } + + @Override + public int[] getVisibleStartAndEndIndex(List hiddenCols) + { + int[] alignmentStartEnd = new int[] { 0, getWidth() - 1 }; + int startPos = alignmentStartEnd[0]; + int endPos = alignmentStartEnd[1]; + + int[] lowestRange = new int[] { -1, -1 }; + int[] higestRange = new int[] { -1, -1 }; + + for (int[] hiddenCol : hiddenCols) + { + lowestRange = (hiddenCol[0] <= startPos) ? hiddenCol : lowestRange; + higestRange = (hiddenCol[1] >= endPos) ? hiddenCol : higestRange; + } + + if (lowestRange[0] == -1 && lowestRange[1] == -1) + { + startPos = alignmentStartEnd[0]; + } + else + { + startPos = lowestRange[1] + 1; + } + + if (higestRange[0] == -1 && higestRange[1] == -1) + { + endPos = alignmentStartEnd[1]; + } + else + { + endPos = higestRange[0] - 1; + } + return new int[] { startPos, endPos }; + } }