X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fdatamodel%2FAlignment.java;h=3ba35b6bf749e8b202797a4bbfab30b6d4c74cf1;hb=aad3640b07f836362df7ea025fa09127a0a06145;hp=e9c0b4c84ea6591a6be4f99ac7037f729dceb8f8;hpb=eb481110f91c590e971499bbae5f6dd65f7f83ea;p=jalview.git diff --git a/src/jalview/datamodel/Alignment.java b/src/jalview/datamodel/Alignment.java index e9c0b4c..3ba35b6 100755 --- a/src/jalview/datamodel/Alignment.java +++ b/src/jalview/datamodel/Alignment.java @@ -24,13 +24,17 @@ import jalview.analysis.AlignmentUtils; import jalview.datamodel.AlignedCodonFrame.SequenceToSequenceMapping; import jalview.io.FastaFile; import jalview.util.Comparison; +import jalview.util.LinkedIdentityHashSet; import jalview.util.MessageManager; import java.util.ArrayList; +import java.util.Arrays; +import java.util.BitSet; import java.util.Collections; import java.util.Enumeration; import java.util.HashSet; import java.util.Hashtable; +import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; @@ -47,17 +51,13 @@ public class Alignment implements AlignmentI { private Alignment dataset; - protected List sequences; + private List sequences; protected List groups; protected char gapCharacter = '-'; - protected int type = NUCLEOTIDE; - - public static final int PROTEIN = 0; - - public static final int NUCLEOTIDE = 1; + private boolean nucleotide = true; public boolean hasRNAStructure = false; @@ -65,6 +65,8 @@ public class Alignment implements AlignmentI HiddenSequences hiddenSequences; + HiddenColumns hiddenCols; + public Hashtable alignmentProperties; private List codonFrameList; @@ -73,16 +75,10 @@ public class Alignment implements AlignmentI { groups = Collections.synchronizedList(new ArrayList()); hiddenSequences = new HiddenSequences(this); - codonFrameList = new ArrayList(); + hiddenCols = new HiddenColumns(); + codonFrameList = new ArrayList<>(); - if (Comparison.isNucleotide(seqs)) - { - type = NUCLEOTIDE; - } - else - { - type = PROTEIN; - } + nucleotide = Comparison.isNucleotide(seqs); sequences = Collections.synchronizedList(new ArrayList()); @@ -135,7 +131,7 @@ public class Alignment implements AlignmentI public Alignment(SeqCigar[] alseqs) { SequenceI[] seqs = SeqCigar.createAlignmentSequences(alseqs, - gapCharacter, new ColumnSelection(), null); + gapCharacter, new HiddenColumns(), null); initAlignment(seqs); } @@ -150,9 +146,8 @@ public class Alignment implements AlignmentI */ public static AlignmentI createAlignment(CigarArray compactAlignment) { - throw new Error( - MessageManager - .getString("error.alignment_cigararray_not_implemented")); + throw new Error(MessageManager + .getString("error.alignment_cigararray_not_implemented")); // this(compactAlignment.refCigars); } @@ -195,14 +190,6 @@ public class Alignment implements AlignmentI return AlignmentUtils.getSequencesByName(this); } - /** - * DOCUMENT ME! - * - * @param i - * DOCUMENT ME! - * - * @return DOCUMENT ME! - */ @Override public SequenceI getSequenceAt(int i) { @@ -213,11 +200,36 @@ public class Alignment implements AlignmentI return sequences.get(i); } } + return null; } + @Override + public SequenceI getSequenceAtAbsoluteIndex(int i) + { + SequenceI seq = null; + if (getHiddenSequences().getSize() > 0) + { + seq = getHiddenSequences().getHiddenSequence(i); + if (seq == null) + { + // didn't find the sequence in the hidden sequences, get it from the + // alignment + int index = getHiddenSequences().findIndexWithoutHiddenSeqs(i); + seq = getSequenceAt(index); + } + } + else + { + seq = getSequenceAt(i); + } + return seq; + } + /** - * Adds a sequence to the alignment. Recalculates maxLength and size. + * Adds a sequence to the alignment. Recalculates maxLength and size. Note + * this currently does not recalculate whether or not the alignment is + * nucleotide, so mixed alignments may have undefined behaviour. * * @param snew */ @@ -226,18 +238,21 @@ public class Alignment implements AlignmentI { if (dataset != null) { + // maintain dataset integrity - if (snew.getDatasetSequence() != null) - { - getDataset().addSequence(snew.getDatasetSequence()); - } - else + SequenceI dsseq = snew.getDatasetSequence(); + if (dsseq == null) { // derive new sequence SequenceI adding = snew.deriveSequence(); - getDataset().addSequence(adding.getDatasetSequence()); snew = adding; + dsseq = snew.getDatasetSequence(); + } + if (getDataset().findIndex(dsseq) == -1) + { + getDataset().addSequence(dsseq); } + } if (sequences == null) { @@ -256,18 +271,22 @@ public class Alignment implements AlignmentI } } - /** - * Adds a sequence to the alignment. Recalculates maxLength and size. - * - * @param snew - */ @Override - public void setSequenceAt(int i, SequenceI snew) + public SequenceI replaceSequenceAt(int i, SequenceI snew) { synchronized (sequences) { - deleteSequence(i); - sequences.set(i, snew); + if (sequences.size() > i) + { + return sequences.set(i, snew); + + } + else + { + sequences.add(snew); + hiddenSequences.adjustHeightSequenceAdded(); + } + return null; } } @@ -283,13 +302,23 @@ public class Alignment implements AlignmentI } @Override - public void finalize() + public void finalize() throws Throwable { if (getDataset() != null) { getDataset().removeAlignmentRef(); } + nullReferences(); + super.finalize(); + } + + /** + * Defensively nulls out references in case this object is not garbage + * collected + */ + void nullReferences() + { dataset = null; sequences = null; groups = null; @@ -298,41 +327,34 @@ public class Alignment implements AlignmentI } /** - * decrement the alignmentRefs counter by one and call finalize if it goes to - * zero. + * decrement the alignmentRefs counter by one and null references if it goes + * to zero. + * + * @throws Throwable */ - private void removeAlignmentRef() + private void removeAlignmentRef() throws Throwable { if (--alignmentRefs == 0) { - finalize(); + nullReferences(); } } - /** - * DOCUMENT ME! - * - * @param s - * DOCUMENT ME! - */ @Override public void deleteSequence(SequenceI s) { - deleteSequence(findIndex(s)); + synchronized (sequences) + { + deleteSequence(findIndex(s)); + } } - /** - * DOCUMENT ME! - * - * @param i - * DOCUMENT ME! - */ @Override public void deleteSequence(int i) { - if (i > -1 && i < getHeight()) + synchronized (sequences) { - synchronized (sequences) + if (i > -1 && i < getHeight()) { sequences.remove(i); hiddenSequences.adjustHeightSequenceDeleted(i); @@ -340,23 +362,36 @@ public class Alignment implements AlignmentI } } + @Override + public void deleteHiddenSequence(int i) + { + synchronized (sequences) + { + if (i > -1 && i < getHeight()) + { + sequences.remove(i); + } + } + } + /* * (non-Javadoc) * * @see jalview.datamodel.AlignmentI#findGroup(jalview.datamodel.SequenceI) */ @Override - public SequenceGroup findGroup(SequenceI s) + public SequenceGroup findGroup(SequenceI seq, int position) { synchronized (groups) { - for (int i = 0; i < this.groups.size(); i++) + for (SequenceGroup sg : groups) { - SequenceGroup sg = groups.get(i); - - if (sg.getSequences(null).contains(s)) + if (sg.getSequences(null).contains(seq)) { - return sg; + if (position >= sg.getStartRes() && position <= sg.getEndRes()) + { + return sg; + } } } } @@ -372,7 +407,7 @@ public class Alignment implements AlignmentI @Override public SequenceGroup[] findAllGroups(SequenceI s) { - ArrayList temp = new ArrayList(); + ArrayList temp = new ArrayList<>(); synchronized (groups) { @@ -423,7 +458,7 @@ public class Alignment implements AlignmentI return; } } - sg.setContext(this); + sg.setContext(this, true); groups.add(sg); } } @@ -442,7 +477,9 @@ public class Alignment implements AlignmentI return; } // remove annotation very quickly - AlignmentAnnotation[] t, todelete = new AlignmentAnnotation[annotations.length], tokeep = new AlignmentAnnotation[annotations.length]; + AlignmentAnnotation[] t, + todelete = new AlignmentAnnotation[annotations.length], + tokeep = new AlignmentAnnotation[annotations.length]; int i, p, k; if (gp == null) { @@ -500,7 +537,7 @@ public class Alignment implements AlignmentI } for (SequenceGroup sg : groups) { - sg.setContext(null); + sg.setContext(null, false); } groups.clear(); } @@ -516,7 +553,7 @@ public class Alignment implements AlignmentI { removeAnnotationForGroup(g); groups.remove(g); - g.setContext(null); + g.setContext(null, false); } } } @@ -575,7 +612,7 @@ public class Alignment implements AlignmentI sqname = sq.getName(); if (sqname.equals(token) // exact match || (b && // allow imperfect matches - case varies - (sqname.equalsIgnoreCase(token)))) + (sqname.equalsIgnoreCase(token)))) { return getSequenceAt(i); } @@ -641,7 +678,7 @@ public class Alignment implements AlignmentI * jalview.datamodel.AlignmentI#findIndex(jalview.datamodel.SearchResults) */ @Override - public int findIndex(SearchResults results) + public int findIndex(SearchResultsI results) { int i = 0; @@ -656,27 +693,23 @@ public class Alignment implements AlignmentI return -1; } - /** - * DOCUMENT ME! - * - * @return DOCUMENT ME! - */ @Override public int getHeight() { return sequences.size(); } - /** - * DOCUMENT ME! - * - * @return DOCUMENT ME! - */ + @Override + public int getAbsoluteHeight() + { + return sequences.size() + getHiddenSequences().getSize(); + } + @Override public int getWidth() { int maxLength = -1; - + for (int i = 0; i < sequences.size(); i++) { if (getSequenceAt(i).getLength() > maxLength) @@ -684,9 +717,34 @@ public class Alignment implements AlignmentI maxLength = getSequenceAt(i).getLength(); } } - + return maxLength; } + /* + @Override + public int getWidth() + { + final Wrapper temp = new Wrapper(); + + forEachSequence(new Consumer() + { + @Override + public void accept(SequenceI s) + { + if (s.getLength() > temp.inner) + { + temp.inner = s.getLength(); + } + } + }, 0, sequences.size() - 1); + + return temp.inner; + } + + public static class Wrapper + { + public int inner; + }*/ /** * DOCUMENT ME! @@ -757,6 +815,12 @@ public class Alignment implements AlignmentI return true; } + @Override + public boolean isHidden(int alignmentIndex) + { + return (getHiddenSequences().getHiddenSequence(alignmentIndex) != null); + } + /** * Delete all annotations, including auto-calculated if the flag is set true. * Returns true if at least one annotation was deleted, else false. @@ -957,29 +1021,9 @@ public class Alignment implements AlignmentI } @Override - public void setNucleotide(boolean b) - { - if (b) - { - type = NUCLEOTIDE; - } - else - { - type = PROTEIN; - } - } - - @Override public boolean isNucleotide() { - if (type == NUCLEOTIDE) - { - return true; - } - else - { - return false; - } + return nucleotide; } @Override @@ -998,6 +1042,10 @@ public class Alignment implements AlignmentI } else if (dataset == null && data != null) { + if (data == this) + { + throw new IllegalArgumentException("Circular dataset reference"); + } if (!(data instanceof Alignment)) { throw new Error( @@ -1035,6 +1083,7 @@ public class Alignment implements AlignmentI private void resolveAndAddDatasetSeq(SequenceI currentSeq, Set seqs, boolean createDatasetSequence) { + SequenceI alignedSeq = currentSeq; if (currentSeq.getDatasetSequence() != null) { currentSeq = currentSeq.getDatasetSequence(); @@ -1046,21 +1095,18 @@ public class Alignment implements AlignmentI currentSeq = currentSeq.createDatasetSequence(); } } - if (seqs.contains(currentSeq)) - { - return; - } - List toProcess = new ArrayList(); + + List toProcess = new ArrayList<>(); toProcess.add(currentSeq); while (toProcess.size() > 0) { // use a queue ? SequenceI curDs = toProcess.remove(0); - if (seqs.contains(curDs)) + + if (!seqs.add(curDs)) { continue; } - seqs.add(curDs); // iterate over database references, making sure we add forward referenced // sequences if (curDs.getDBRefs() != null) @@ -1069,12 +1115,18 @@ public class Alignment implements AlignmentI { if (dbr.getMap() != null && dbr.getMap().getTo() != null) { + if (dbr.getMap().getTo() == alignedSeq) + { + /* + * update mapping to be to the newly created dataset sequence + */ + dbr.getMap().setTo(currentSeq); + } if (dbr.getMap().getTo().getDatasetSequence() != null) { - throw new Error("Implementation error: Map.getTo() for dbref" - + dbr + " is not a dataset sequence."); - // TODO: if this happens, could also rewrite the reference to - // point to new dataset sequence + throw new Error("Implementation error: Map.getTo() for dbref " + + dbr + " from " + curDs.getName() + + " is not a dataset sequence."); } // we recurse to add all forward references to dataset sequences via // DBRefs/etc @@ -1095,8 +1147,8 @@ public class Alignment implements AlignmentI { return; } - // try to avoid using equals at this stage, it will be expensive - Set seqs = new jalview.util.LinkedIdentityHashSet(); + // try to avoid using SequenceI.equals at this stage, it will be expensive + Set seqs = new LinkedIdentityHashSet<>(); for (int i = 0; i < getHeight(); i++) { @@ -1211,8 +1263,8 @@ public class Alignment implements AlignmentI current = getSequenceAt(i); // This should really be a sequence method ends[i * 2] = current.findIndex(current.getStart()); - ends[i * 2 + 1] = current.findIndex(current.getStart() - + current.getLength()); + ends[i * 2 + 1] = current + .findIndex(current.getStart() + current.getLength()); boolean hitres = false; for (int j = 0, rs = 0, ssiz = current.getLength(); j < ssiz; j++) { @@ -1302,6 +1354,12 @@ public class Alignment implements AlignmentI } @Override + public HiddenColumns getHiddenColumns() + { + return hiddenCols; + } + + @Override public CigarArray getCompactAlignment() { synchronized (sequences) @@ -1375,7 +1433,7 @@ public class Alignment implements AlignmentI { return null; } - List cframes = new ArrayList(); + List cframes = new ArrayList<>(); for (AlignedCodonFrame acf : getCodonFrames()) { if (acf.involvesSequence(seq)) @@ -1442,31 +1500,26 @@ public class Alignment implements AlignmentI { // TODO JAL-1270 needs test coverage // currently tested for use in jalview.gui.SequenceFetcher - boolean samegap = toappend.getGapCharacter() == getGapCharacter(); char oldc = toappend.getGapCharacter(); + boolean samegap = oldc == getGapCharacter(); boolean hashidden = toappend.getHiddenSequences() != null && toappend.getHiddenSequences().hiddenSequences != null; // get all sequences including any hidden ones - List sqs = (hashidden) ? toappend.getHiddenSequences() - .getFullAlignment().getSequences() : toappend.getSequences(); + List sqs = (hashidden) + ? toappend.getHiddenSequences().getFullAlignment() + .getSequences() + : toappend.getSequences(); if (sqs != null) { // avoid self append deadlock by - List toappendsq = new ArrayList(); + List toappendsq = new ArrayList<>(); synchronized (sqs) { for (SequenceI addedsq : sqs) { if (!samegap) { - char[] oldseq = addedsq.getSequence(); - for (int c = 0; c < oldseq.length; c++) - { - if (oldseq[c] == oldc) - { - oldseq[c] = gapCharacter; - } - } + addedsq.replace(oldc, gapCharacter); } toappendsq.add(addedsq); } @@ -1530,8 +1583,8 @@ public class Alignment implements AlignmentI if (ourval instanceof String) { // append strings - this.setProperty(k, ((String) ourval) + "; " - + ((String) toapprop)); + this.setProperty(k, + ((String) ourval) + "; " + ((String) toapprop)); } else { @@ -1562,7 +1615,6 @@ public class Alignment implements AlignmentI String calcId, boolean autoCalc, SequenceI seqRef, SequenceGroup groupRef) { - assert (name != null); if (annotations != null) { for (AlignmentAnnotation annot : getAlignmentAnnotation()) @@ -1579,7 +1631,10 @@ public class Alignment implements AlignmentI AlignmentAnnotation annot = new AlignmentAnnotation(name, name, new Annotation[1], 0f, 0f, AlignmentAnnotation.BAR_GRAPH); annot.hasText = false; - annot.setCalcId(new String(calcId)); + if (calcId != null) + { + annot.setCalcId(new String(calcId)); + } annot.autoCalculated = autoCalc; if (seqRef != null) { @@ -1594,38 +1649,21 @@ public class Alignment implements AlignmentI @Override public Iterable findAnnotation(String calcId) { - ArrayList aa = new ArrayList(); - for (AlignmentAnnotation a : getAlignmentAnnotation()) + AlignmentAnnotation[] alignmentAnnotation = getAlignmentAnnotation(); + if (alignmentAnnotation != null) { - if (a.getCalcId() == calcId - || (a.getCalcId() != null && calcId != null && a.getCalcId() - .equals(calcId))) - { - aa.add(a); - } + return AlignmentAnnotation.findAnnotation( + Arrays.asList(getAlignmentAnnotation()), calcId); } - return aa; + return Arrays.asList(new AlignmentAnnotation[] {}); } - /** - * Returns an iterable collection of any annotations that match on given - * sequence ref, calcId and label (ignoring null values). - */ @Override public Iterable findAnnotations(SequenceI seq, String calcId, String label) { - ArrayList aa = new ArrayList(); - for (AlignmentAnnotation ann : getAlignmentAnnotation()) - { - if (ann.getCalcId() != null && ann.getCalcId().equals(calcId) - && ann.sequenceRef != null && ann.sequenceRef == seq - && ann.label != null && ann.label.equals(label)) - { - aa.add(ann); - } - } - return aa; + return AlignmentAnnotation.findAnnotations( + Arrays.asList(getAlignmentAnnotation()), seq, calcId, label); } @Override @@ -1809,7 +1847,7 @@ public class Alignment implements AlignmentI @Override public String toString() { - return new FastaFile().print(getSequencesArray()); + return new FastaFile().print(getSequencesArray(), true); } /** @@ -1818,7 +1856,7 @@ public class Alignment implements AlignmentI @Override public Set getSequenceNames() { - Set names = new HashSet(); + Set names = new HashSet<>(); for (SequenceI seq : getSequences()) { names.add(seq.getName()); @@ -1884,38 +1922,121 @@ public class Alignment implements AlignmentI } @Override - public int[] getVisibleStartAndEndIndex(List hiddenCols) + public void setHiddenColumns(HiddenColumns cols) { - int[] alignmentStartEnd = new int[] { 0, getWidth() - 1 }; - int startPos = alignmentStartEnd[0]; - int endPos = alignmentStartEnd[1]; + hiddenCols = cols; + } - int[] lowestRange = new int[] { -1, -1 }; - int[] higestRange = new int[] { -1, -1 }; + @Override + public void setupJPredAlignment() + { + SequenceI repseq = getSequenceAt(0); + setSeqrep(repseq); + HiddenColumns cs = new HiddenColumns(); + cs.hideList(repseq.getInsertions()); + setHiddenColumns(cs); + } - for (int[] hiddenCol : hiddenCols) - { - lowestRange = (hiddenCol[0] <= startPos) ? hiddenCol : lowestRange; - higestRange = (hiddenCol[1] >= endPos) ? hiddenCol : higestRange; - } + @Override + public HiddenColumns propagateInsertions(SequenceI profileseq, + AlignmentView input) + { + int profsqpos = 0; - if (lowestRange[0] == -1 && lowestRange[1] == -1) - { - startPos = alignmentStartEnd[0]; - } - else - { - startPos = lowestRange[1] + 1; + char gc = getGapCharacter(); + Object[] alandhidden = input.getAlignmentAndHiddenColumns(gc); + HiddenColumns nview = (HiddenColumns) alandhidden[1]; + SequenceI origseq = ((SequenceI[]) alandhidden[0])[profsqpos]; + return propagateInsertions(profileseq, origseq, nview); + } + + /** + * + * @param profileseq + * sequence in al which corresponds to origseq + * @param al + * alignment which is to have gaps inserted into it + * @param origseq + * sequence corresponding to profileseq which defines gap map for + * modifying al + */ + private HiddenColumns propagateInsertions(SequenceI profileseq, + SequenceI origseq, HiddenColumns hc) + { + // take the set of hidden columns, and the set of gaps in origseq, + // and remove all the hidden gaps from hiddenColumns + + // first get the gaps as a Bitset + // then calculate hidden ^ not(gap) + BitSet gaps = origseq.gapBitset(); + hc.andNot(gaps); + + // for each sequence in the alignment, except the profile sequence, + // insert gaps corresponding to each hidden region but where each hidden + // column region is shifted backwards by the number of preceding visible + // gaps update hidden columns at the same time + HiddenColumns newhidden = new HiddenColumns(); + + int numGapsBefore = 0; + int gapPosition = 0; + Iterator it = hc.iterator(); + while (it.hasNext()) + { + int[] region = it.next(); + + // get region coordinates accounting for gaps + // we can rely on gaps not being *in* hidden regions because we already + // removed those + while (gapPosition < region[0]) + { + gapPosition++; + if (gaps.get(gapPosition)) + { + numGapsBefore++; + } + } + + int left = region[0] - numGapsBefore; + int right = region[1] - numGapsBefore; + + newhidden.hideColumns(left, right); + padGaps(left, right, profileseq); } + return newhidden; + } - if (higestRange[0] == -1 && higestRange[1] == -1) + /** + * Pad gaps in all sequences in alignment except profileseq + * + * @param left + * position of first gap to insert + * @param right + * position of last gap to insert + * @param profileseq + * sequence not to pad + */ + private void padGaps(int left, int right, SequenceI profileseq) + { + char gc = getGapCharacter(); + + // make a string with number of gaps = length of hidden region + StringBuilder sb = new StringBuilder(); + for (int g = 0; g < right - left + 1; g++) { - endPos = alignmentStartEnd[1]; + sb.append(gc); } - else + + // loop over the sequences and pad with gaps where required + for (int s = 0, ns = getHeight(); s < ns; s++) { - endPos = higestRange[0] - 1; + SequenceI sqobj = getSequenceAt(s); + if ((sqobj != profileseq) && (sqobj.getLength() >= left)) + { + String sq = sqobj.getSequenceAsString(); + sqobj.setSequence( + sq.substring(0, left) + sb.toString() + sq.substring(left)); + } } - return new int[] { startPos, endPos }; } + }