X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fdatamodel%2FAlignment.java;h=3ba35b6bf749e8b202797a4bbfab30b6d4c74cf1;hb=aad3640b07f836362df7ea025fa09127a0a06145;hp=708e566ad6fe87389af2ed9a370514df934fe852;hpb=a79f9e113c51c032070c670e45ce3eb464691166;p=jalview.git diff --git a/src/jalview/datamodel/Alignment.java b/src/jalview/datamodel/Alignment.java index 708e566..3ba35b6 100755 --- a/src/jalview/datamodel/Alignment.java +++ b/src/jalview/datamodel/Alignment.java @@ -1,25 +1,44 @@ /* - * Jalview - A Sequence Alignment Editor and Viewer (Version 2.7) - * Copyright (C) 2011 J Procter, AM Waterhouse, J Engelhardt, LM Lui, G Barton, M Clamp, S Searle + * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) + * Copyright (C) $$Year-Rel$$ The Jalview Authors * * This file is part of Jalview. * * Jalview is free software: you can redistribute it and/or * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. - * + * as published by the Free Software Foundation, either version 3 + * of the License, or (at your option) any later version. + * * Jalview is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty * of MERCHANTABILITY or FITNESS FOR A PARTICULAR * PURPOSE. See the GNU General Public License for more details. * - * You should have received a copy of the GNU General Public License along with Jalview. If not, see . + * You should have received a copy of the GNU General Public License + * along with Jalview. If not, see . + * The Jalview Authors are detailed in the 'AUTHORS' file. */ package jalview.datamodel; -import java.util.*; - -import jalview.analysis.*; +import jalview.analysis.AlignmentUtils; +import jalview.datamodel.AlignedCodonFrame.SequenceToSequenceMapping; +import jalview.io.FastaFile; +import jalview.util.Comparison; +import jalview.util.LinkedIdentityHashSet; +import jalview.util.MessageManager; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.BitSet; +import java.util.Collections; +import java.util.Enumeration; +import java.util.HashSet; +import java.util.Hashtable; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.Vector; /** * Data structure to hold and manipulate a multiple sequence alignment @@ -30,49 +49,67 @@ import jalview.analysis.*; */ public class Alignment implements AlignmentI { - protected Alignment dataset; + private Alignment dataset; - protected Vector sequences; + private List sequences; - protected Vector groups = new Vector(); + protected List groups; protected char gapCharacter = '-'; - protected int type = NUCLEOTIDE; + private boolean nucleotide = true; - public static final int PROTEIN = 0; - - public static final int NUCLEOTIDE = 1; - public boolean hasRNAStructure = false; - /** DOCUMENT ME!! */ public AlignmentAnnotation[] annotations; - HiddenSequences hiddenSequences = new HiddenSequences(this); + HiddenSequences hiddenSequences; + + HiddenColumns hiddenCols; public Hashtable alignmentProperties; + private List codonFrameList; + private void initAlignment(SequenceI[] seqs) { - int i = 0; + groups = Collections.synchronizedList(new ArrayList()); + hiddenSequences = new HiddenSequences(this); + hiddenCols = new HiddenColumns(); + codonFrameList = new ArrayList<>(); + + nucleotide = Comparison.isNucleotide(seqs); - if (jalview.util.Comparison.isNucleotide(seqs)) + sequences = Collections.synchronizedList(new ArrayList()); + + for (int i = 0; i < seqs.length; i++) { - type = NUCLEOTIDE; + sequences.add(seqs[i]); } - else + + } + + /** + * Make a 'copy' alignment - sequences have new copies of features and + * annotations, but share the original dataset sequences. + */ + public Alignment(AlignmentI al) + { + SequenceI[] seqs = al.getSequencesArray(); + for (int i = 0; i < seqs.length; i++) { - type = PROTEIN; + seqs[i] = new Sequence(seqs[i]); } - sequences = new Vector(); + initAlignment(seqs); - for (i = 0; i < seqs.length; i++) + /* + * Share the same dataset sequence mappings (if any). + */ + if (dataset == null && al.getDataset() == null) { - sequences.addElement(seqs[i]); + this.setCodonFrames(al.getCodonFrames()); } - } /** @@ -94,7 +131,7 @@ public class Alignment implements AlignmentI public Alignment(SeqCigar[] alseqs) { SequenceI[] seqs = SeqCigar.createAlignmentSequences(alseqs, - gapCharacter, new ColumnSelection(), null); + gapCharacter, new HiddenColumns(), null); initAlignment(seqs); } @@ -109,96 +146,148 @@ public class Alignment implements AlignmentI */ public static AlignmentI createAlignment(CigarArray compactAlignment) { - throw new Error("Alignment(CigarArray) not yet implemented"); + throw new Error(MessageManager + .getString("error.alignment_cigararray_not_implemented")); // this(compactAlignment.refCigars); } - /** - * DOCUMENT ME! - * - * @return DOCUMENT ME! - */ - public Vector getSequences() + @Override + public List getSequences() { return sequences; } + @Override + public List getSequences( + Map hiddenReps) + { + // TODO: in jalview 2.8 we don't do anything with hiddenreps - fix design to + // work on this. + return sequences; + } + + @Override public SequenceI[] getSequencesArray() { if (sequences == null) + { return null; - SequenceI[] reply = new SequenceI[sequences.size()]; - for (int i = 0; i < sequences.size(); i++) + } + synchronized (sequences) { - reply[i] = (SequenceI) sequences.elementAt(i); + return sequences.toArray(new SequenceI[sequences.size()]); } - return reply; } /** - * DOCUMENT ME! - * - * @param i - * DOCUMENT ME! + * Returns a map of lists of sequences keyed by sequence name. * - * @return DOCUMENT ME! + * @return */ + @Override + public Map> getSequencesByName() + { + return AlignmentUtils.getSequencesByName(this); + } + + @Override public SequenceI getSequenceAt(int i) { - if (i>-1 && i < sequences.size()) + synchronized (sequences) { - return (SequenceI) sequences.elementAt(i); + if (i > -1 && i < sequences.size()) + { + return sequences.get(i); + } } return null; } + @Override + public SequenceI getSequenceAtAbsoluteIndex(int i) + { + SequenceI seq = null; + if (getHiddenSequences().getSize() > 0) + { + seq = getHiddenSequences().getHiddenSequence(i); + if (seq == null) + { + // didn't find the sequence in the hidden sequences, get it from the + // alignment + int index = getHiddenSequences().findIndexWithoutHiddenSeqs(i); + seq = getSequenceAt(index); + } + } + else + { + seq = getSequenceAt(i); + } + return seq; + } + /** - * Adds a sequence to the alignment. Recalculates maxLength and size. + * Adds a sequence to the alignment. Recalculates maxLength and size. Note + * this currently does not recalculate whether or not the alignment is + * nucleotide, so mixed alignments may have undefined behaviour. * * @param snew */ + @Override public void addSequence(SequenceI snew) { if (dataset != null) { + // maintain dataset integrity - if (snew.getDatasetSequence() != null) - { - getDataset().addSequence(snew.getDatasetSequence()); - } - else + SequenceI dsseq = snew.getDatasetSequence(); + if (dsseq == null) { // derive new sequence SequenceI adding = snew.deriveSequence(); - getDataset().addSequence(adding.getDatasetSequence()); snew = adding; + dsseq = snew.getDatasetSequence(); } + if (getDataset().findIndex(dsseq) == -1) + { + getDataset().addSequence(dsseq); + } + } if (sequences == null) { - initAlignment(new SequenceI[] - { snew }); + initAlignment(new SequenceI[] { snew }); } else { - sequences.addElement(snew); + synchronized (sequences) + { + sequences.add(snew); + } } if (hiddenSequences != null) + { hiddenSequences.adjustHeightSequenceAdded(); + } } - /** - * Adds a sequence to the alignment. Recalculates maxLength and size. - * - * @param snew - */ - public void setSequenceAt(int i, SequenceI snew) + @Override + public SequenceI replaceSequenceAt(int i, SequenceI snew) { - SequenceI oldseq = getSequenceAt(i); - deleteSequence(oldseq); + synchronized (sequences) + { + if (sequences.size() > i) + { + return sequences.set(i, snew); - sequences.setElementAt(snew, i); + } + else + { + sequences.add(snew); + hiddenSequences.adjustHeightSequenceAdded(); + } + return null; + } } /** @@ -206,16 +295,30 @@ public class Alignment implements AlignmentI * * @return DOCUMENT ME! */ - public Vector getGroups() + @Override + public List getGroups() { return groups; } - public void finalize() + @Override + public void finalize() throws Throwable { if (getDataset() != null) + { getDataset().removeAlignmentRef(); + } + nullReferences(); + super.finalize(); + } + + /** + * Defensively nulls out references in case this object is not garbage + * collected + */ + void nullReferences() + { dataset = null; sequences = null; groups = null; @@ -224,123 +327,140 @@ public class Alignment implements AlignmentI } /** - * decrement the alignmentRefs counter by one and call finalize if it goes to - * zero. + * decrement the alignmentRefs counter by one and null references if it goes + * to zero. + * + * @throws Throwable */ - private void removeAlignmentRef() + private void removeAlignmentRef() throws Throwable { if (--alignmentRefs == 0) { - finalize(); + nullReferences(); } } - /** - * DOCUMENT ME! - * - * @param s - * DOCUMENT ME! - */ + @Override public void deleteSequence(SequenceI s) { - deleteSequence(findIndex(s)); + synchronized (sequences) + { + deleteSequence(findIndex(s)); + } } - /** - * DOCUMENT ME! - * - * @param i - * DOCUMENT ME! - */ + @Override public void deleteSequence(int i) { - if (i > -1 && i < getHeight()) + synchronized (sequences) { - sequences.removeElementAt(i); - hiddenSequences.adjustHeightSequenceDeleted(i); + if (i > -1 && i < getHeight()) + { + sequences.remove(i); + hiddenSequences.adjustHeightSequenceDeleted(i); + } } } - /** */ - public SequenceGroup findGroup(SequenceI s) + @Override + public void deleteHiddenSequence(int i) { - for (int i = 0; i < this.groups.size(); i++) + synchronized (sequences) { - SequenceGroup sg = (SequenceGroup) groups.elementAt(i); - - if (sg.getSequences(null).contains(s)) + if (i > -1 && i < getHeight()) { - return sg; + sequences.remove(i); } } + } + /* + * (non-Javadoc) + * + * @see jalview.datamodel.AlignmentI#findGroup(jalview.datamodel.SequenceI) + */ + @Override + public SequenceGroup findGroup(SequenceI seq, int position) + { + synchronized (groups) + { + for (SequenceGroup sg : groups) + { + if (sg.getSequences(null).contains(seq)) + { + if (position >= sg.getStartRes() && position <= sg.getEndRes()) + { + return sg; + } + } + } + } return null; } - /** - * DOCUMENT ME! - * - * @param s - * DOCUMENT ME! + /* + * (non-Javadoc) * - * @return DOCUMENT ME! + * @see + * jalview.datamodel.AlignmentI#findAllGroups(jalview.datamodel.SequenceI) */ + @Override public SequenceGroup[] findAllGroups(SequenceI s) { - Vector temp = new Vector(); + ArrayList temp = new ArrayList<>(); - int gSize = groups.size(); - for (int i = 0; i < gSize; i++) + synchronized (groups) { - SequenceGroup sg = (SequenceGroup) groups.elementAt(i); - if (sg == null || sg.getSequences(null) == null) + int gSize = groups.size(); + for (int i = 0; i < gSize; i++) { - this.deleteGroup(sg); - gSize--; - continue; - } + SequenceGroup sg = groups.get(i); + if (sg == null || sg.getSequences() == null) + { + this.deleteGroup(sg); + gSize--; + continue; + } - if (sg.getSequences(null).contains(s)) - { - temp.addElement(sg); + if (sg.getSequences().contains(s)) + { + temp.add(sg); + } } } - SequenceGroup[] ret = new SequenceGroup[temp.size()]; - - for (int i = 0; i < temp.size(); i++) - { - ret[i] = (SequenceGroup) temp.elementAt(i); - } - - return ret; + return temp.toArray(ret); } /** */ + @Override public void addGroup(SequenceGroup sg) { - if (!groups.contains(sg)) + synchronized (groups) { - if (hiddenSequences.getSize() > 0) + if (!groups.contains(sg)) { - int i, iSize = sg.getSize(); - for (i = 0; i < iSize; i++) + if (hiddenSequences.getSize() > 0) { - if (!sequences.contains(sg.getSequenceAt(i))) + int i, iSize = sg.getSize(); + for (i = 0; i < iSize; i++) { - sg.deleteSequence(sg.getSequenceAt(i), false); - iSize--; - i--; + if (!sequences.contains(sg.getSequenceAt(i))) + { + sg.deleteSequence(sg.getSequenceAt(i), false); + iSize--; + i--; + } } - } - if (sg.getSize() < 1) - { - return; + if (sg.getSize() < 1) + { + return; + } } + sg.setContext(this, true); + groups.add(sg); } - - groups.addElement(sg); } } @@ -357,7 +477,9 @@ public class Alignment implements AlignmentI return; } // remove annotation very quickly - AlignmentAnnotation[] t, todelete = new AlignmentAnnotation[annotations.length], tokeep = new AlignmentAnnotation[annotations.length]; + AlignmentAnnotation[] t, + todelete = new AlignmentAnnotation[annotations.length], + tokeep = new AlignmentAnnotation[annotations.length]; int i, p, k; if (gp == null) { @@ -404,26 +526,40 @@ public class Alignment implements AlignmentI } } + @Override public void deleteAllGroups() { - if (annotations != null) + synchronized (groups) { - removeAnnotationForGroup(null); + if (annotations != null) + { + removeAnnotationForGroup(null); + } + for (SequenceGroup sg : groups) + { + sg.setContext(null, false); + } + groups.clear(); } - groups.removeAllElements(); } /** */ + @Override public void deleteGroup(SequenceGroup g) { - if (groups.contains(g)) + synchronized (groups) { - removeAnnotationForGroup(g); - groups.removeElement(g); + if (groups.contains(g)) + { + removeAnnotationForGroup(g); + groups.remove(g); + g.setContext(null, false); + } } } /** */ + @Override public SequenceI findName(String name) { return findName(name, false); @@ -434,6 +570,7 @@ public class Alignment implements AlignmentI * * @see jalview.datamodel.AlignmentI#findName(java.lang.String, boolean) */ + @Override public SequenceI findName(String token, boolean b) { return findName(null, token, b); @@ -445,6 +582,7 @@ public class Alignment implements AlignmentI * @see jalview.datamodel.AlignmentI#findName(SequenceI, java.lang.String, * boolean) */ + @Override public SequenceI findName(SequenceI startAfter, String token, boolean b) { @@ -474,7 +612,7 @@ public class Alignment implements AlignmentI sqname = sq.getName(); if (sqname.equals(token) // exact match || (b && // allow imperfect matches - case varies - (sqname.equalsIgnoreCase(token)))) + (sqname.equalsIgnoreCase(token)))) { return getSequenceAt(i); } @@ -485,6 +623,7 @@ public class Alignment implements AlignmentI return null; } + @Override public SequenceI[] findSequenceMatch(String name) { Vector matches = new Vector(); @@ -514,6 +653,7 @@ public class Alignment implements AlignmentI * * @see jalview.datamodel.AlignmentI#findIndex(jalview.datamodel.SequenceI) */ + @Override public int findIndex(SequenceI s) { int i = 0; @@ -537,7 +677,8 @@ public class Alignment implements AlignmentI * @see * jalview.datamodel.AlignmentI#findIndex(jalview.datamodel.SearchResults) */ - public int findIndex(SearchResults results) + @Override + public int findIndex(SearchResultsI results) { int i = 0; @@ -552,25 +693,23 @@ public class Alignment implements AlignmentI return -1; } - /** - * DOCUMENT ME! - * - * @return DOCUMENT ME! - */ + @Override public int getHeight() { return sequences.size(); } - /** - * DOCUMENT ME! - * - * @return DOCUMENT ME! - */ + @Override + public int getAbsoluteHeight() + { + return sequences.size() + getHiddenSequences().getSize(); + } + + @Override public int getWidth() { int maxLength = -1; - + for (int i = 0; i < sequences.size(); i++) { if (getSequenceAt(i).getLength() > maxLength) @@ -578,9 +717,34 @@ public class Alignment implements AlignmentI maxLength = getSequenceAt(i).getLength(); } } - + return maxLength; } + /* + @Override + public int getWidth() + { + final Wrapper temp = new Wrapper(); + + forEachSequence(new Consumer() + { + @Override + public void accept(SequenceI s) + { + if (s.getLength() > temp.inner) + { + temp.inner = s.getLength(); + } + } + }, 0, sequences.size() - 1); + + return temp.inner; + } + + public static class Wrapper + { + public int inner; + }*/ /** * DOCUMENT ME! @@ -588,15 +752,17 @@ public class Alignment implements AlignmentI * @param gc * DOCUMENT ME! */ + @Override public void setGapCharacter(char gc) { gapCharacter = gc; - - for (int i = 0; i < sequences.size(); i++) + synchronized (sequences) { - Sequence seq = (Sequence) sequences.elementAt(i); - seq.setSequence(seq.getSequenceAsString().replace('.', gc) - .replace('-', gc).replace(' ', gc)); + for (SequenceI seq : sequences) + { + seq.setSequence(seq.getSequenceAsString().replace('.', gc) + .replace('-', gc).replace(' ', gc)); + } } } @@ -605,6 +771,7 @@ public class Alignment implements AlignmentI * * @return DOCUMENT ME! */ + @Override public char getGapCharacter() { return gapCharacter; @@ -615,6 +782,7 @@ public class Alignment implements AlignmentI * * @see jalview.datamodel.AlignmentI#isAligned() */ + @Override public boolean isAligned() { return isAligned(false); @@ -625,6 +793,7 @@ public class Alignment implements AlignmentI * * @see jalview.datamodel.AlignmentI#isAligned(boolean) */ + @Override public boolean isAligned(boolean includeHidden) { int width = getWidth(); @@ -646,17 +815,47 @@ public class Alignment implements AlignmentI return true; } + @Override + public boolean isHidden(int alignmentIndex) + { + return (getHiddenSequences().getHiddenSequence(alignmentIndex) != null); + } + + /** + * Delete all annotations, including auto-calculated if the flag is set true. + * Returns true if at least one annotation was deleted, else false. + * + * @param includingAutoCalculated + * @return + */ + @Override + public boolean deleteAllAnnotations(boolean includingAutoCalculated) + { + boolean result = false; + for (AlignmentAnnotation alan : getAlignmentAnnotation()) + { + if (!alan.autoCalculated || includingAutoCalculated) + { + deleteAnnotation(alan); + result = true; + } + } + return result; + } + /* * (non-Javadoc) * * @seejalview.datamodel.AlignmentI#deleteAnnotation(jalview.datamodel. * AlignmentAnnotation) */ + @Override public boolean deleteAnnotation(AlignmentAnnotation aa) { return deleteAnnotation(aa, true); } - + + @Override public boolean deleteAnnotation(AlignmentAnnotation aa, boolean unhook) { int aSize = 1; @@ -684,13 +883,16 @@ public class Alignment implements AlignmentI continue; } if (tIndex < temp.length) + { temp[tIndex++] = annotations[i]; + } } if (swap) { annotations = temp; - if (unhook) { + if (unhook) + { unhookAnnotation(aa); } } @@ -721,6 +923,7 @@ public class Alignment implements AlignmentI * @seejalview.datamodel.AlignmentI#addAnnotation(jalview.datamodel. * AlignmentAnnotation) */ + @Override public void addAnnotation(AlignmentAnnotation aa) { addAnnotation(aa, -1); @@ -732,12 +935,14 @@ public class Alignment implements AlignmentI * @seejalview.datamodel.AlignmentI#addAnnotation(jalview.datamodel. * AlignmentAnnotation, int) */ + @Override public void addAnnotation(AlignmentAnnotation aa, int pos) { - if(aa.getRNAStruc()!= null){ - hasRNAStructure=true; + if (aa.getRNAStruc() != null) + { + hasRNAStructure = true; } - + int aSize = 1; if (annotations != null) { @@ -773,6 +978,7 @@ public class Alignment implements AlignmentI annotations = temp; } + @Override public void setAnnotationIndex(AlignmentAnnotation aa, int index) { if (aa == null || annotations == null || annotations.length - 1 < index) @@ -814,64 +1020,162 @@ public class Alignment implements AlignmentI return annotations; } - public void setNucleotide(boolean b) + @Override + public boolean isNucleotide() { - if (b) - { - type = NUCLEOTIDE; - } - else - { - type = PROTEIN; - } + return nucleotide; } - public boolean isNucleotide() + @Override + public boolean hasRNAStructure() { - if (type == NUCLEOTIDE) - { - return true; - } - else - { - return false; - } - } - - public boolean hasRNAStructure(){ - //TODO can it happen that structure is removed from alignment? + // TODO can it happen that structure is removed from alignment? return hasRNAStructure; } - public void setDataset(Alignment data) + @Override + public void setDataset(AlignmentI data) { if (dataset == null && data == null) { - // Create a new dataset for this alignment. - // Can only be done once, if dataset is not null - // This will not be performed - SequenceI[] seqs = new SequenceI[getHeight()]; - SequenceI currentSeq; + createDatasetAlignment(); + } + else if (dataset == null && data != null) + { + if (data == this) + { + throw new IllegalArgumentException("Circular dataset reference"); + } + if (!(data instanceof Alignment)) + { + throw new Error( + "Implementation Error: jalview.datamodel.Alignment does not yet support other implementations of AlignmentI as its dataset reference"); + } + dataset = (Alignment) data; for (int i = 0; i < getHeight(); i++) { - currentSeq = getSequenceAt(i); - if (currentSeq.getDatasetSequence() != null) + SequenceI currentSeq = getSequenceAt(i); + SequenceI dsq = currentSeq.getDatasetSequence(); + if (dsq == null) { - seqs[i] = (Sequence) currentSeq.getDatasetSequence(); + dsq = currentSeq.createDatasetSequence(); + dataset.addSequence(dsq); } else { - seqs[i] = currentSeq.createDatasetSequence(); + while (dsq.getDatasetSequence() != null) + { + dsq = dsq.getDatasetSequence(); + } + if (dataset.findIndex(dsq) == -1) + { + dataset.addSequence(dsq); + } } } + } + dataset.addAlignmentRef(); + } - dataset = new Alignment(seqs); + /** + * add dataset sequences to seq for currentSeq and any sequences it references + */ + private void resolveAndAddDatasetSeq(SequenceI currentSeq, + Set seqs, boolean createDatasetSequence) + { + SequenceI alignedSeq = currentSeq; + if (currentSeq.getDatasetSequence() != null) + { + currentSeq = currentSeq.getDatasetSequence(); } - else if (dataset == null && data != null) + else + { + if (createDatasetSequence) + { + currentSeq = currentSeq.createDatasetSequence(); + } + } + + List toProcess = new ArrayList<>(); + toProcess.add(currentSeq); + while (toProcess.size() > 0) { - dataset = data; + // use a queue ? + SequenceI curDs = toProcess.remove(0); + + if (!seqs.add(curDs)) + { + continue; + } + // iterate over database references, making sure we add forward referenced + // sequences + if (curDs.getDBRefs() != null) + { + for (DBRefEntry dbr : curDs.getDBRefs()) + { + if (dbr.getMap() != null && dbr.getMap().getTo() != null) + { + if (dbr.getMap().getTo() == alignedSeq) + { + /* + * update mapping to be to the newly created dataset sequence + */ + dbr.getMap().setTo(currentSeq); + } + if (dbr.getMap().getTo().getDatasetSequence() != null) + { + throw new Error("Implementation error: Map.getTo() for dbref " + + dbr + " from " + curDs.getName() + + " is not a dataset sequence."); + } + // we recurse to add all forward references to dataset sequences via + // DBRefs/etc + toProcess.add(dbr.getMap().getTo()); + } + } + } } - dataset.addAlignmentRef(); + } + + /** + * Creates a new dataset for this alignment. Can only be done once - if + * dataset is not null this will not be performed. + */ + public void createDatasetAlignment() + { + if (dataset != null) + { + return; + } + // try to avoid using SequenceI.equals at this stage, it will be expensive + Set seqs = new LinkedIdentityHashSet<>(); + + for (int i = 0; i < getHeight(); i++) + { + SequenceI currentSeq = getSequenceAt(i); + resolveAndAddDatasetSeq(currentSeq, seqs, true); + } + + // verify all mappings are in dataset + for (AlignedCodonFrame cf : codonFrameList) + { + for (SequenceToSequenceMapping ssm : cf.getMappings()) + { + if (!seqs.contains(ssm.getFromSeq())) + { + resolveAndAddDatasetSeq(ssm.getFromSeq(), seqs, false); + } + if (!seqs.contains(ssm.getMapping().getTo())) + { + resolveAndAddDatasetSeq(ssm.getMapping().getTo(), seqs, false); + } + } + } + // finally construct dataset + dataset = new Alignment(seqs.toArray(new SequenceI[seqs.size()])); + // move mappings to the dataset alignment + dataset.codonFrameList = this.codonFrameList; + this.codonFrameList = null; } /** @@ -887,11 +1191,13 @@ public class Alignment implements AlignmentI alignmentRefs++; } + @Override public Alignment getDataset() { return dataset; } + @Override public boolean padGaps() { boolean modified = false; @@ -943,6 +1249,7 @@ public class Alignment implements AlignmentI * true if alignment padded to right, false to justify to left * @return true if alignment was changed */ + @Override public boolean justify(boolean right) { boolean modified = false; @@ -956,8 +1263,8 @@ public class Alignment implements AlignmentI current = getSequenceAt(i); // This should really be a sequence method ends[i * 2] = current.findIndex(current.getStart()); - ends[i * 2 + 1] = current.findIndex(current.getStart() - + current.getLength()); + ends[i * 2 + 1] = current + .findIndex(current.getStart() + current.getLength()); boolean hitres = false; for (int j = 0, rs = 0, ssiz = current.getLength(); j < ssiz; j++) { @@ -1040,172 +1347,185 @@ public class Alignment implements AlignmentI return modified; } + @Override public HiddenSequences getHiddenSequences() { return hiddenSequences; } + @Override + public HiddenColumns getHiddenColumns() + { + return hiddenCols; + } + + @Override public CigarArray getCompactAlignment() { - SeqCigar alseqs[] = new SeqCigar[sequences.size()]; - for (int i = 0; i < sequences.size(); i++) + synchronized (sequences) { - alseqs[i] = new SeqCigar((SequenceI) sequences.elementAt(i)); + SeqCigar alseqs[] = new SeqCigar[sequences.size()]; + int i = 0; + for (SequenceI seq : sequences) + { + alseqs[i++] = new SeqCigar(seq); + } + CigarArray cal = new CigarArray(alseqs); + cal.addOperation(CigarArray.M, getWidth()); + return cal; } - CigarArray cal = new CigarArray(alseqs); - cal.addOperation(CigarArray.M, getWidth()); - return cal; } + @Override public void setProperty(Object key, Object value) { if (alignmentProperties == null) + { alignmentProperties = new Hashtable(); + } alignmentProperties.put(key, value); } + @Override public Object getProperty(Object key) { if (alignmentProperties != null) + { return alignmentProperties.get(key); + } else + { return null; + } } + @Override public Hashtable getProperties() { return alignmentProperties; } - AlignedCodonFrame[] codonFrameList = null; - - /* - * (non-Javadoc) - * - * @see - * jalview.datamodel.AlignmentI#addCodonFrame(jalview.datamodel.AlignedCodonFrame - * ) + /** + * Adds the given mapping to the stored set. Note this may be held on the + * dataset alignment. */ + @Override public void addCodonFrame(AlignedCodonFrame codons) { - if (codons == null) - return; - if (codonFrameList == null) + List acfs = getCodonFrames(); + if (codons != null && acfs != null && !acfs.contains(codons)) { - codonFrameList = new AlignedCodonFrame[] - { codons }; - return; + acfs.add(codons); } - AlignedCodonFrame[] t = new AlignedCodonFrame[codonFrameList.length + 1]; - System.arraycopy(codonFrameList, 0, t, 0, codonFrameList.length); - t[codonFrameList.length] = codons; - codonFrameList = t; } /* * (non-Javadoc) * - * @see jalview.datamodel.AlignmentI#getCodonFrame(int) + * @see + * jalview.datamodel.AlignmentI#getCodonFrame(jalview.datamodel.SequenceI) */ - public AlignedCodonFrame getCodonFrame(int index) + @Override + public List getCodonFrame(SequenceI seq) { - return codonFrameList[index]; + if (seq == null) + { + return null; + } + List cframes = new ArrayList<>(); + for (AlignedCodonFrame acf : getCodonFrames()) + { + if (acf.involvesSequence(seq)) + { + cframes.add(acf); + } + } + return cframes; } - /* - * (non-Javadoc) + /** + * Sets the codon frame mappings (replacing any existing mappings). Note the + * mappings are set on the dataset alignment instead if there is one. * - * @see - * jalview.datamodel.AlignmentI#getCodonFrame(jalview.datamodel.SequenceI) + * @see jalview.datamodel.AlignmentI#setCodonFrames() */ - public AlignedCodonFrame[] getCodonFrame(SequenceI seq) + @Override + public void setCodonFrames(List acfs) { - if (seq == null || codonFrameList == null) - return null; - Vector cframes = new Vector(); - for (int f = 0; f < codonFrameList.length; f++) + if (dataset != null) { - if (codonFrameList[f].involvesSequence(seq)) - cframes.addElement(codonFrameList[f]); + dataset.setCodonFrames(acfs); + } + else + { + this.codonFrameList = acfs; } - if (cframes.size() == 0) - return null; - AlignedCodonFrame[] cfr = new AlignedCodonFrame[cframes.size()]; - cframes.copyInto(cfr); - return cfr; } - /* - * (non-Javadoc) + /** + * Returns the set of codon frame mappings. Any changes to the returned set + * will affect the alignment. The mappings are held on (and read from) the + * dataset alignment if there is one. * * @see jalview.datamodel.AlignmentI#getCodonFrames() */ - public AlignedCodonFrame[] getCodonFrames() + @Override + public List getCodonFrames() { - return codonFrameList; + // TODO: Fix this method to fix failing AlignedCodonFrame tests + // this behaviour is currently incorrect. method should return codon frames + // for just the alignment, + // selected from dataset + return dataset != null ? dataset.getCodonFrames() : codonFrameList; } - /* - * (non-Javadoc) - * - * @seejalview.datamodel.AlignmentI#removeCodonFrame(jalview.datamodel. - * AlignedCodonFrame) + /** + * Removes the given mapping from the stored set. Note that the mappings are + * held on the dataset alignment if there is one. */ + @Override public boolean removeCodonFrame(AlignedCodonFrame codons) { - if (codons == null || codonFrameList == null) - return false; - boolean removed = false; - int i = 0, iSize = codonFrameList.length; - while (i < iSize) + List acfs = getCodonFrames(); + if (codons == null || acfs == null) { - if (codonFrameList[i] == codons) - { - removed = true; - if (i + 1 < iSize) - { - System.arraycopy(codonFrameList, i + 1, codonFrameList, i, iSize - - i - 1); - } - iSize--; - } - else - { - i++; - } + return false; } - return removed; + return acfs.remove(codons); } + @Override public void append(AlignmentI toappend) { - // TODO test this method for a future 2.5 release + // TODO JAL-1270 needs test coverage // currently tested for use in jalview.gui.SequenceFetcher - boolean samegap = toappend.getGapCharacter() == getGapCharacter(); char oldc = toappend.getGapCharacter(); + boolean samegap = oldc == getGapCharacter(); boolean hashidden = toappend.getHiddenSequences() != null && toappend.getHiddenSequences().hiddenSequences != null; // get all sequences including any hidden ones - Vector sqs = (hashidden) ? toappend.getHiddenSequences() - .getFullAlignment().getSequences() : toappend.getSequences(); + List sqs = (hashidden) + ? toappend.getHiddenSequences().getFullAlignment() + .getSequences() + : toappend.getSequences(); if (sqs != null) { - Enumeration sq = sqs.elements(); - while (sq.hasMoreElements()) + // avoid self append deadlock by + List toappendsq = new ArrayList<>(); + synchronized (sqs) { - SequenceI addedsq = (SequenceI) sq.nextElement(); - if (!samegap) + for (SequenceI addedsq : sqs) { - char[] oldseq = addedsq.getSequence(); - for (int c = 0; c < oldseq.length; c++) + if (!samegap) { - if (oldseq[c] == oldc) - { - oldseq[c] = gapCharacter; - } + addedsq.replace(oldc, gapCharacter); } + toappendsq.add(addedsq); } + } + for (SequenceI addedsq : toappendsq) + { addSequence(addedsq); } } @@ -1214,18 +1534,16 @@ public class Alignment implements AlignmentI { addAnnotation(alan[a]); } - AlignedCodonFrame[] acod = toappend.getCodonFrames(); - for (int a = 0; acod != null && a < acod.length; a++) - { - this.addCodonFrame(acod[a]); - } - Vector sg = toappend.getGroups(); + + // use add method + getCodonFrames().addAll(toappend.getCodonFrames()); + + List sg = toappend.getGroups(); if (sg != null) { - Enumeration el = sg.elements(); - while (el.hasMoreElements()) + for (SequenceGroup _sg : sg) { - addGroup((SequenceGroup) el.nextElement()); + addGroup(_sg); } } if (toappend.getHiddenSequences() != null) @@ -1265,8 +1583,8 @@ public class Alignment implements AlignmentI if (ourval instanceof String) { // append strings - this.setProperty(k, ((String) ourval) + "; " - + ((String) toapprop)); + this.setProperty(k, + ((String) ourval) + "; " + ((String) toapprop)); } else { @@ -1292,4 +1610,433 @@ public class Alignment implements AlignmentI } } + @Override + public AlignmentAnnotation findOrCreateAnnotation(String name, + String calcId, boolean autoCalc, SequenceI seqRef, + SequenceGroup groupRef) + { + if (annotations != null) + { + for (AlignmentAnnotation annot : getAlignmentAnnotation()) + { + if (annot.autoCalculated == autoCalc && (name.equals(annot.label)) + && (calcId == null || annot.getCalcId().equals(calcId)) + && annot.sequenceRef == seqRef + && annot.groupRef == groupRef) + { + return annot; + } + } + } + AlignmentAnnotation annot = new AlignmentAnnotation(name, name, + new Annotation[1], 0f, 0f, AlignmentAnnotation.BAR_GRAPH); + annot.hasText = false; + if (calcId != null) + { + annot.setCalcId(new String(calcId)); + } + annot.autoCalculated = autoCalc; + if (seqRef != null) + { + annot.setSequenceRef(seqRef); + } + annot.groupRef = groupRef; + addAnnotation(annot); + + return annot; + } + + @Override + public Iterable findAnnotation(String calcId) + { + AlignmentAnnotation[] alignmentAnnotation = getAlignmentAnnotation(); + if (alignmentAnnotation != null) + { + return AlignmentAnnotation.findAnnotation( + Arrays.asList(getAlignmentAnnotation()), calcId); + } + return Arrays.asList(new AlignmentAnnotation[] {}); + } + + @Override + public Iterable findAnnotations(SequenceI seq, + String calcId, String label) + { + return AlignmentAnnotation.findAnnotations( + Arrays.asList(getAlignmentAnnotation()), seq, calcId, label); + } + + @Override + public void moveSelectedSequencesByOne(SequenceGroup sg, + Map map, boolean up) + { + synchronized (sequences) + { + if (up) + { + + for (int i = 1, iSize = sequences.size(); i < iSize; i++) + { + SequenceI seq = sequences.get(i); + if (!sg.getSequences(map).contains(seq)) + { + continue; + } + + SequenceI temp = sequences.get(i - 1); + if (sg.getSequences(null).contains(temp)) + { + continue; + } + + sequences.set(i, temp); + sequences.set(i - 1, seq); + } + } + else + { + for (int i = sequences.size() - 2; i > -1; i--) + { + SequenceI seq = sequences.get(i); + if (!sg.getSequences(map).contains(seq)) + { + continue; + } + + SequenceI temp = sequences.get(i + 1); + if (sg.getSequences(map).contains(temp)) + { + continue; + } + + sequences.set(i, temp); + sequences.set(i + 1, seq); + } + } + + } + } + + @Override + public void validateAnnotation(AlignmentAnnotation alignmentAnnotation) + { + alignmentAnnotation.validateRangeAndDisplay(); + if (isNucleotide() && alignmentAnnotation.isValidStruc()) + { + hasRNAStructure = true; + } + } + + private SequenceI seqrep = null; + + /** + * + * @return the representative sequence for this group + */ + @Override + public SequenceI getSeqrep() + { + return seqrep; + } + + /** + * set the representative sequence for this group. Note - this affects the + * interpretation of the Hidereps attribute. + * + * @param seqrep + * the seqrep to set (null means no sequence representative) + */ + @Override + public void setSeqrep(SequenceI seqrep) + { + this.seqrep = seqrep; + } + + /** + * + * @return true if group has a sequence representative + */ + @Override + public boolean hasSeqrep() + { + return seqrep != null; + } + + @Override + public int getEndRes() + { + return getWidth() - 1; + } + + @Override + public int getStartRes() + { + return 0; + } + + /* + * In the case of AlignmentI - returns the dataset for the alignment, if set + * (non-Javadoc) + * + * @see jalview.datamodel.AnnotatedCollectionI#getContext() + */ + @Override + public AnnotatedCollectionI getContext() + { + return dataset; + } + + /** + * Align this alignment like the given (mapped) one. + */ + @Override + public int alignAs(AlignmentI al) + { + /* + * Currently retains unmapped gaps (in introns), regaps mapped regions + * (exons) + */ + return alignAs(al, false, true); + } + + /** + * Align this alignment 'the same as' the given one. Mapped sequences only are + * realigned. If both of the same type (nucleotide/protein) then align both + * identically. If this is nucleotide and the other is protein, make 3 gaps + * for each gap in the protein sequences. If this is protein and the other is + * nucleotide, insert a gap for each 3 gaps (or part thereof) between + * nucleotide bases. If this is protein and the other is nucleotide, gaps + * protein to match the relative ordering of codons in the nucleotide. + * + * Parameters control whether gaps in exon (mapped) and intron (unmapped) + * regions are preserved. Gaps that connect introns to exons are treated + * conservatively, i.e. only preserved if both intron and exon gaps are + * preserved. TODO: check caveats below where the implementation fails + * + * @param al + * - must have same dataset, and sequences in al must have equivalent + * dataset sequence and start/end bounds under given mapping + * @param preserveMappedGaps + * if true, gaps within and between mapped codons are preserved + * @param preserveUnmappedGaps + * if true, gaps within and between unmapped codons are preserved + */ + // @Override + public int alignAs(AlignmentI al, boolean preserveMappedGaps, + boolean preserveUnmappedGaps) + { + // TODO should this method signature be the one in the interface? + // JBPComment - yes - neither flag is used, so should be deleted. + boolean thisIsNucleotide = this.isNucleotide(); + boolean thatIsProtein = !al.isNucleotide(); + if (!thatIsProtein && !thisIsNucleotide) + { + return AlignmentUtils.alignProteinAsDna(this, al); + } + else if (thatIsProtein && thisIsNucleotide) + { + return AlignmentUtils.alignCdsAsProtein(this, al); + } + return AlignmentUtils.alignAs(this, al); + } + + /** + * Returns the alignment in Fasta format. Behaviour of this method is not + * guaranteed between versions. + */ + @Override + public String toString() + { + return new FastaFile().print(getSequencesArray(), true); + } + + /** + * Returns the set of distinct sequence names. No ordering is guaranteed. + */ + @Override + public Set getSequenceNames() + { + Set names = new HashSet<>(); + for (SequenceI seq : getSequences()) + { + names.add(seq.getName()); + } + return names; + } + + @Override + public boolean hasValidSequence() + { + boolean hasValidSeq = false; + for (SequenceI seq : getSequences()) + { + if ((seq.getEnd() - seq.getStart()) > 0) + { + hasValidSeq = true; + break; + } + } + return hasValidSeq; + } + + /** + * Update any mappings to 'virtual' sequences to compatible real ones, if + * present in the added sequences. Returns a count of mappings updated. + * + * @param seqs + * @return + */ + @Override + public int realiseMappings(List seqs) + { + int count = 0; + for (SequenceI seq : seqs) + { + for (AlignedCodonFrame mapping : getCodonFrames()) + { + count += mapping.realiseWith(seq); + } + } + return count; + } + + /** + * Returns the first AlignedCodonFrame that has a mapping between the given + * dataset sequences + * + * @param mapFrom + * @param mapTo + * @return + */ + @Override + public AlignedCodonFrame getMapping(SequenceI mapFrom, SequenceI mapTo) + { + for (AlignedCodonFrame acf : getCodonFrames()) + { + if (acf.getAaForDnaSeq(mapFrom) == mapTo) + { + return acf; + } + } + return null; + } + + @Override + public void setHiddenColumns(HiddenColumns cols) + { + hiddenCols = cols; + } + + @Override + public void setupJPredAlignment() + { + SequenceI repseq = getSequenceAt(0); + setSeqrep(repseq); + HiddenColumns cs = new HiddenColumns(); + cs.hideList(repseq.getInsertions()); + setHiddenColumns(cs); + } + + @Override + public HiddenColumns propagateInsertions(SequenceI profileseq, + AlignmentView input) + { + int profsqpos = 0; + + char gc = getGapCharacter(); + Object[] alandhidden = input.getAlignmentAndHiddenColumns(gc); + HiddenColumns nview = (HiddenColumns) alandhidden[1]; + SequenceI origseq = ((SequenceI[]) alandhidden[0])[profsqpos]; + return propagateInsertions(profileseq, origseq, nview); + } + + /** + * + * @param profileseq + * sequence in al which corresponds to origseq + * @param al + * alignment which is to have gaps inserted into it + * @param origseq + * sequence corresponding to profileseq which defines gap map for + * modifying al + */ + private HiddenColumns propagateInsertions(SequenceI profileseq, + SequenceI origseq, HiddenColumns hc) + { + // take the set of hidden columns, and the set of gaps in origseq, + // and remove all the hidden gaps from hiddenColumns + + // first get the gaps as a Bitset + // then calculate hidden ^ not(gap) + BitSet gaps = origseq.gapBitset(); + hc.andNot(gaps); + + // for each sequence in the alignment, except the profile sequence, + // insert gaps corresponding to each hidden region but where each hidden + // column region is shifted backwards by the number of preceding visible + // gaps update hidden columns at the same time + HiddenColumns newhidden = new HiddenColumns(); + + int numGapsBefore = 0; + int gapPosition = 0; + Iterator it = hc.iterator(); + while (it.hasNext()) + { + int[] region = it.next(); + + // get region coordinates accounting for gaps + // we can rely on gaps not being *in* hidden regions because we already + // removed those + while (gapPosition < region[0]) + { + gapPosition++; + if (gaps.get(gapPosition)) + { + numGapsBefore++; + } + } + + int left = region[0] - numGapsBefore; + int right = region[1] - numGapsBefore; + + newhidden.hideColumns(left, right); + padGaps(left, right, profileseq); + } + return newhidden; + } + + /** + * Pad gaps in all sequences in alignment except profileseq + * + * @param left + * position of first gap to insert + * @param right + * position of last gap to insert + * @param profileseq + * sequence not to pad + */ + private void padGaps(int left, int right, SequenceI profileseq) + { + char gc = getGapCharacter(); + + // make a string with number of gaps = length of hidden region + StringBuilder sb = new StringBuilder(); + for (int g = 0; g < right - left + 1; g++) + { + sb.append(gc); + } + + // loop over the sequences and pad with gaps where required + for (int s = 0, ns = getHeight(); s < ns; s++) + { + SequenceI sqobj = getSequenceAt(s); + if ((sqobj != profileseq) && (sqobj.getLength() >= left)) + { + String sq = sqobj.getSequenceAsString(); + sqobj.setSequence( + sq.substring(0, left) + sb.toString() + sq.substring(left)); + } + } + } + }