X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fdatamodel%2FAlignment.java;h=b2a647ae5739ca379ea92f0e8bf8d457f6903c77;hb=90add82a33c05e218a92eb1cdb2c345da05465f8;hp=4485c4641fa93d6baf2f4ee8c628edc45935c20a;hpb=1772466d28ffeef4aa4e1a4d5a640fd6a987df8c;p=jalview.git diff --git a/src/jalview/datamodel/Alignment.java b/src/jalview/datamodel/Alignment.java index 4485c46..290707d 100755 --- a/src/jalview/datamodel/Alignment.java +++ b/src/jalview/datamodel/Alignment.java @@ -1,686 +1,1920 @@ /* - * Jalview - A Sequence Alignment Editor and Viewer - * Copyright (C) 2006 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 + * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) + * Copyright (C) $$Year-Rel$$ The Jalview Authors + * + * This file is part of Jalview. + * + * Jalview is free software: you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, either version 3 * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * + * + * Jalview is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + * along with Jalview. If not, see . + * The Jalview Authors are detailed in the 'AUTHORS' file. */ package jalview.datamodel; -import jalview.analysis.*; - -import jalview.util.*; - -import java.util.*; - -/** Data structure to hold and manipulate a multiple sequence alignment +import jalview.analysis.AlignmentUtils; +import jalview.datamodel.AlignedCodonFrame.SequenceToSequenceMapping; +import jalview.io.FastaFile; +import jalview.util.Comparison; +import jalview.util.LinkedIdentityHashSet; +import jalview.util.MessageManager; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.Enumeration; +import java.util.HashSet; +import java.util.Hashtable; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.Vector; + +/** + * Data structure to hold and manipulate a multiple sequence alignment + */ +/** + * @author JimP + * */ public class Alignment implements AlignmentI { - protected Alignment dataset; - protected Vector sequences; - protected Vector groups = new Vector(); - protected char gapCharacter = '-'; - protected int type = NUCLEOTIDE; - public static final int PROTEIN = 0; - public static final int NUCLEOTIDE = 1; + private Alignment dataset; - /** DOCUMENT ME!! */ - public AlignmentAnnotation[] annotations; + protected List sequences; - HiddenSequences hiddenSequences = new HiddenSequences(this); + protected List groups; - private void initAlignment(SequenceI[] seqs) { - int i=0; + protected char gapCharacter = '-'; - if( jalview.util.Comparison.isNucleotide(seqs)) - type = NUCLEOTIDE; - else - type = PROTEIN; + private boolean nucleotide = true; - sequences = new Vector(); + public boolean hasRNAStructure = false; - for (i = 0; i < seqs.length; i++) - { - sequences.addElement(seqs[i]); - } + public AlignmentAnnotation[] annotations; + + HiddenSequences hiddenSequences; + public Hashtable alignmentProperties; + + private List codonFrameList; + + private void initAlignment(SequenceI[] seqs) + { + groups = Collections.synchronizedList(new ArrayList()); + hiddenSequences = new HiddenSequences(this); + codonFrameList = new ArrayList(); + + nucleotide = Comparison.isNucleotide(seqs); + + sequences = Collections.synchronizedList(new ArrayList()); + + for (int i = 0; i < seqs.length; i++) + { + sequences.add(seqs[i]); } - /** Make an alignment from an array of Sequences. - * - * @param sequences - */ - public Alignment(SequenceI[] seqs) + + } + + /** + * Make a 'copy' alignment - sequences have new copies of features and + * annotations, but share the original dataset sequences. + */ + public Alignment(AlignmentI al) + { + SequenceI[] seqs = al.getSequencesArray(); + for (int i = 0; i < seqs.length; i++) { - initAlignment(seqs); + seqs[i] = new Sequence(seqs[i]); } - /** - * Make a new alignment from an array of SeqCigars - * @param seqs SeqCigar[] - */ - public Alignment(SeqCigar[] alseqs) { - SequenceI[] seqs = SeqCigar.createAlignmentSequences(alseqs, gapCharacter, new ColumnSelection(), null); - initAlignment(seqs); - } - /** - * Make a new alignment from an CigarArray - * JBPNote - can only do this when compactAlignment does not contain hidden regions. - * JBPNote - must also check that compactAlignment resolves to a set of SeqCigars - or construct them appropriately. - * @param compactAlignment CigarArray + + initAlignment(seqs); + + /* + * Share the same dataset sequence mappings (if any). */ - public static AlignmentI createAlignment(CigarArray compactAlignment) { - throw new Error("Alignment(CigarArray) not yet implemented"); - // this(compactAlignment.refCigars); + if (dataset == null && al.getDataset() == null) + { + this.setCodonFrames(al.getCodonFrames()); } + } - /** - * DOCUMENT ME! - * - * @return DOCUMENT ME! - */ - public Vector getSequences() + /** + * Make an alignment from an array of Sequences. + * + * @param sequences + */ + public Alignment(SequenceI[] seqs) + { + initAlignment(seqs); + } + + /** + * Make a new alignment from an array of SeqCigars + * + * @param seqs + * SeqCigar[] + */ + public Alignment(SeqCigar[] alseqs) + { + SequenceI[] seqs = SeqCigar.createAlignmentSequences(alseqs, + gapCharacter, new ColumnSelection(), null); + initAlignment(seqs); + } + + /** + * Make a new alignment from an CigarArray JBPNote - can only do this when + * compactAlignment does not contain hidden regions. JBPNote - must also check + * that compactAlignment resolves to a set of SeqCigars - or construct them + * appropriately. + * + * @param compactAlignment + * CigarArray + */ + public static AlignmentI createAlignment(CigarArray compactAlignment) + { + throw new Error( + MessageManager + .getString("error.alignment_cigararray_not_implemented")); + // this(compactAlignment.refCigars); + } + + @Override + public List getSequences() + { + return sequences; + } + + @Override + public List getSequences( + Map hiddenReps) + { + // TODO: in jalview 2.8 we don't do anything with hiddenreps - fix design to + // work on this. + return sequences; + } + + @Override + public SequenceI[] getSequencesArray() + { + if (sequences == null) + { + return null; + } + synchronized (sequences) { - return sequences; + return sequences.toArray(new SequenceI[sequences.size()]); } + } + + /** + * Returns a map of lists of sequences keyed by sequence name. + * + * @return + */ + @Override + public Map> getSequencesByName() + { + return AlignmentUtils.getSequencesByName(this); + } - public SequenceI [] getSequencesArray() + /** + * DOCUMENT ME! + * + * @param i + * DOCUMENT ME! + * + * @return DOCUMENT ME! + */ + @Override + public SequenceI getSequenceAt(int i) + { + synchronized (sequences) { - SequenceI [] reply = new SequenceI[sequences.size()]; - for(int i=0; i -1 && i < sequences.size()) { - reply[i] = (SequenceI)sequences.elementAt(i); + return sequences.get(i); } - return reply; } + return null; + } - /** - * DOCUMENT ME! - * - * @param i DOCUMENT ME! - * - * @return DOCUMENT ME! - */ - public SequenceI getSequenceAt(int i) + /** + * Adds a sequence to the alignment. Recalculates maxLength and size. Note + * this currently does not recalculate whether or not the alignment is + * nucleotide, so mixed alignments may have undefined behaviour. + * + * @param snew + */ + @Override + public void addSequence(SequenceI snew) + { + if (dataset != null) { - if (i < sequences.size()) - { - return (SequenceI) sequences.elementAt(i); - } - - return null; - } - /** Adds a sequence to the alignment. Recalculates maxLength and size. - * - * @param snew - */ - public void addSequence(SequenceI snew) - { - if(dataset!=null) + // maintain dataset integrity + SequenceI dsseq = snew.getDatasetSequence(); + if (dsseq == null) { - if(snew.getDatasetSequence()!=null) - { - System.out.println(snew.getName()); - getDataset().addSequence(snew.getDatasetSequence()); - } - else - { - Sequence ds = new Sequence(snew.getName(), - AlignSeq.extractGaps("-. ", - snew.getSequence()), - snew.getStart(), - snew.getEnd()); - - snew.setDatasetSequence(ds); - getDataset().addSequence(ds); - } + // derive new sequence + SequenceI adding = snew.deriveSequence(); + snew = adding; + dsseq = snew.getDatasetSequence(); + } + if (getDataset().findIndex(dsseq) == -1) + { + getDataset().addSequence(dsseq); } - sequences.addElement(snew); - hiddenSequences.adjustHeightSequenceAdded(); } - - - /** Adds a sequence to the alignment. Recalculates maxLength and size. - * - * @param snew - */ - public void setSequenceAt(int i, SequenceI snew) + if (sequences == null) { - SequenceI oldseq = getSequenceAt(i); - deleteSequence(oldseq); - - sequences.setElementAt(snew, i); + initAlignment(new SequenceI[] { snew }); } - - /** - * DOCUMENT ME! - * - * @return DOCUMENT ME! - */ - public Vector getGroups() + else { - return groups; + synchronized (sequences) + { + sequences.add(snew); + } } - - /** - * DOCUMENT ME! - * - * @param s DOCUMENT ME! - */ - public void deleteSequence(SequenceI s) + if (hiddenSequences != null) { - deleteSequence(findIndex(s)); + hiddenSequences.adjustHeightSequenceAdded(); } + } - /** - * DOCUMENT ME! - * - * @param i DOCUMENT ME! - */ - public void deleteSequence(int i) + @Override + public SequenceI replaceSequenceAt(int i, SequenceI snew) + { + synchronized (sequences) { - if(i>0 && i i) { - sequences.removeElementAt(i); - hiddenSequences.adjustHeightSequenceDeleted(i); + return sequences.set(i, snew); + + } + else + { + sequences.add(snew); + hiddenSequences.adjustHeightSequenceAdded(); } + return null; } + } + /** + * DOCUMENT ME! + * + * @return DOCUMENT ME! + */ + @Override + public List getGroups() + { + return groups; + } - /** */ - public SequenceGroup findGroup(SequenceI s) + @Override + public void finalize() throws Throwable + { + if (getDataset() != null) { - for (int i = 0; i < this.groups.size(); i++) - { - SequenceGroup sg = (SequenceGroup) groups.elementAt(i); - - if (sg.getSequences(false).contains(s)) - { - return sg; - } - } - - return null; + getDataset().removeAlignmentRef(); } - /** - * DOCUMENT ME! - * - * @param s DOCUMENT ME! - * - * @return DOCUMENT ME! - */ - public SequenceGroup[] findAllGroups(SequenceI s) - { - Vector temp = new Vector(); - - int gSize = groups.size(); - for (int i = 0; i < gSize; i++) - { - SequenceGroup sg = (SequenceGroup) groups.elementAt(i); - if(sg==null || sg.getSequences(false)==null) - { - this.deleteGroup(sg); - gSize--; - continue; - } - - if (sg.getSequences(false).contains(s)) - { - temp.addElement(sg); - } - } - - SequenceGroup[] ret = new SequenceGroup[temp.size()]; + nullReferences(); + super.finalize(); + } - for (int i = 0; i < temp.size(); i++) - { - ret[i] = (SequenceGroup) temp.elementAt(i); - } + /** + * Defensively nulls out references in case this object is not garbage + * collected + */ + void nullReferences() + { + dataset = null; + sequences = null; + groups = null; + annotations = null; + hiddenSequences = null; + } - return ret; + /** + * decrement the alignmentRefs counter by one and null references if it goes + * to zero. + * + * @throws Throwable + */ + private void removeAlignmentRef() throws Throwable + { + if (--alignmentRefs == 0) + { + nullReferences(); } + } + /** + * DOCUMENT ME! + * + * @param s + * DOCUMENT ME! + */ + @Override + public void deleteSequence(SequenceI s) + { + deleteSequence(findIndex(s)); + } - - /** */ - public void addGroup(SequenceGroup sg) + /** + * DOCUMENT ME! + * + * @param i + * DOCUMENT ME! + */ + @Override + public void deleteSequence(int i) + { + if (i > -1 && i < getHeight()) { - if (!groups.contains(sg)) - { - groups.addElement(sg); - } + synchronized (sequences) + { + sequences.remove(i); + hiddenSequences.adjustHeightSequenceDeleted(i); + } } + } - /** - * DOCUMENT ME! - */ - public void deleteAllGroups() + /* + * (non-Javadoc) + * + * @see jalview.datamodel.AlignmentI#findGroup(jalview.datamodel.SequenceI) + */ + @Override + public SequenceGroup findGroup(SequenceI s) + { + synchronized (groups) { - groups.removeAllElements(); - - int i = 0; + for (int i = 0; i < this.groups.size(); i++) + { + SequenceGroup sg = groups.get(i); - while (i < sequences.size()) + if (sg.getSequences(null).contains(s)) { - SequenceI s = getSequenceAt(i); - s.setColor(java.awt.Color.white); - i++; + return sg; } + } } + return null; + } + + /* + * (non-Javadoc) + * + * @see + * jalview.datamodel.AlignmentI#findAllGroups(jalview.datamodel.SequenceI) + */ + @Override + public SequenceGroup[] findAllGroups(SequenceI s) + { + ArrayList temp = new ArrayList(); - /** */ - public void deleteGroup(SequenceGroup g) + synchronized (groups) { - if (groups.contains(g)) + int gSize = groups.size(); + for (int i = 0; i < gSize; i++) + { + SequenceGroup sg = groups.get(i); + if (sg == null || sg.getSequences() == null) { - groups.removeElement(g); + this.deleteGroup(sg); + gSize--; + continue; } - } - /** */ - public SequenceI findName(String name) - { - int i = 0; - - while (i < sequences.size()) + if (sg.getSequences().contains(s)) { - if (getSequenceAt(i).getName().equals(name)) - { - return getSequenceAt(i); - } - - i++; + temp.add(sg); } - - return null; - } - - public SequenceI [] findSequenceMatch(String name) - { - Vector matches = new Vector(); - int i = 0; - - while (i < sequences.size()) - { - if (getSequenceAt(i).getName().equals(name)) - { - matches.addElement(getSequenceAt(i)); - } - i++; } - - SequenceI [] result = new SequenceI[matches.size()]; - for(i=0; i 0) { - if (s == getSequenceAt(i)) + int i, iSize = sg.getSize(); + for (i = 0; i < iSize; i++) + { + if (!sequences.contains(sg.getSequenceAt(i))) { - return i; + sg.deleteSequence(sg.getSequenceAt(i), false); + iSize--; + i--; } + } - i++; + if (sg.getSize() < 1) + { + return; + } } - - return -1; + sg.setContext(this); + groups.add(sg); + } } + } - /** - * DOCUMENT ME! - * - * @return DOCUMENT ME! - */ - public int getHeight() + /** + * remove any annotation that references gp + * + * @param gp + * (if null, removes all group associated annotation) + */ + private void removeAnnotationForGroup(SequenceGroup gp) + { + if (annotations == null || annotations.length == 0) { - return sequences.size(); + return; } - - /** - * DOCUMENT ME! - * - * @return DOCUMENT ME! - */ - public int getWidth() + // remove annotation very quickly + AlignmentAnnotation[] t, todelete = new AlignmentAnnotation[annotations.length], tokeep = new AlignmentAnnotation[annotations.length]; + int i, p, k; + if (gp == null) { - int maxLength = -1; - - for (int i = 0; i < sequences.size(); i++) + for (i = 0, p = 0, k = 0; i < annotations.length; i++) + { + if (annotations[i].groupRef != null) { - if (getSequenceAt(i).getLength() > maxLength) - { - maxLength = getSequenceAt(i).getLength(); - } + todelete[p++] = annotations[i]; } - - return maxLength; + else + { + tokeep[k++] = annotations[i]; + } + } } - - - /** - * DOCUMENT ME! - * - * @param gc DOCUMENT ME! - */ - public void setGapCharacter(char gc) + else { - gapCharacter = gc; - - for (int i = 0; i < sequences.size(); i++) + for (i = 0, p = 0, k = 0; i < annotations.length; i++) + { + if (annotations[i].groupRef == gp) { - Sequence seq = (Sequence) sequences.elementAt(i); - seq.setSequence( seq.getSequence().replace('.', gc) ); - seq.setSequence( seq.getSequence().replace('-', gc) ); - seq.setSequence( seq.getSequence().replace(' ', gc) ); + todelete[p++] = annotations[i]; } + else + { + tokeep[k++] = annotations[i]; + } + } } - - /** - * DOCUMENT ME! - * - * @return DOCUMENT ME! - */ - public char getGapCharacter() + if (p > 0) { - return gapCharacter; + // clear out the group associated annotation. + for (i = 0; i < p; i++) + { + unhookAnnotation(todelete[i]); + todelete[i] = null; + } + t = new AlignmentAnnotation[k]; + for (i = 0; i < k; i++) + { + t[i] = tokeep[i]; + } + annotations = t; } + } - - /** - * DOCUMENT ME! - * - * @return DOCUMENT ME! - */ - public boolean isAligned() + @Override + public void deleteAllGroups() + { + synchronized (groups) { - int width = getWidth(); - - for (int i = 0; i < sequences.size(); i++) - { - if (getSequenceAt(i).getLength() != width) - { - return false; - } - } - - return true; + if (annotations != null) + { + removeAnnotationForGroup(null); + } + for (SequenceGroup sg : groups) + { + sg.setContext(null); + } + groups.clear(); } + } - /** - * DOCUMENT ME! - * - * @param aa DOCUMENT ME! - */ - public void deleteAnnotation(AlignmentAnnotation aa) + /** */ + @Override + public void deleteGroup(SequenceGroup g) + { + synchronized (groups) { - int aSize = 1; + if (groups.contains(g)) + { + removeAnnotationForGroup(g); + groups.remove(g); + g.setContext(null); + } + } + } - if (annotations != null) - { - aSize = annotations.length; - } + /** */ + @Override + public SequenceI findName(String name) + { + return findName(name, false); + } - AlignmentAnnotation[] temp = new AlignmentAnnotation[aSize - 1]; + /* + * (non-Javadoc) + * + * @see jalview.datamodel.AlignmentI#findName(java.lang.String, boolean) + */ + @Override + public SequenceI findName(String token, boolean b) + { + return findName(null, token, b); + } - int tIndex = 0; + /* + * (non-Javadoc) + * + * @see jalview.datamodel.AlignmentI#findName(SequenceI, java.lang.String, + * boolean) + */ + @Override + public SequenceI findName(SequenceI startAfter, String token, boolean b) + { - for (int i = 0; i < aSize; i++) + int i = 0; + SequenceI sq = null; + String sqname = null; + if (startAfter != null) + { + // try to find the sequence in the alignment + boolean matched = false; + while (i < sequences.size()) + { + if (getSequenceAt(i++) == startAfter) { - if (annotations[i] == aa) - { - continue; - } - - temp[tIndex] = annotations[i]; - tIndex++; + matched = true; + break; } + } + if (!matched) + { + i = 0; + } + } + while (i < sequences.size()) + { + sq = getSequenceAt(i); + sqname = sq.getName(); + if (sqname.equals(token) // exact match + || (b && // allow imperfect matches - case varies + (sqname.equalsIgnoreCase(token)))) + { + return getSequenceAt(i); + } - annotations = temp; + i++; } + return null; + } + + @Override + public SequenceI[] findSequenceMatch(String name) + { + Vector matches = new Vector(); + int i = 0; - public void adjustSequenceAnnotations() + while (i < sequences.size()) { - if(annotations!=null) + if (getSequenceAt(i).getName().equals(name)) { - for (int a = 0; a < annotations.length; a++) - { - if (annotations[a].sequenceRef != null) - { - annotations[a].adjustForAlignment(); - } - } + matches.addElement(getSequenceAt(i)); } + i++; } - /** - * DOCUMENT ME! - * - * @param aa DOCUMENT ME! - */ - public void addAnnotation(AlignmentAnnotation aa) + SequenceI[] result = new SequenceI[matches.size()]; + for (i = 0; i < result.length; i++) { - int aSize = 1; - if (annotations != null) - { - aSize = annotations.length + 1; - } + result[i] = (SequenceI) matches.elementAt(i); + } - AlignmentAnnotation[] temp = new AlignmentAnnotation[aSize]; + return result; - temp[aSize-1] = aa; + } - int i = 0; + /* + * (non-Javadoc) + * + * @see jalview.datamodel.AlignmentI#findIndex(jalview.datamodel.SequenceI) + */ + @Override + public int findIndex(SequenceI s) + { + int i = 0; - if (aSize > 1) - { - for (i = 0; i < (aSize-1); i++) - { - temp[i] = annotations[i]; - } - } + while (i < sequences.size()) + { + if (s == getSequenceAt(i)) + { + return i; + } - annotations = temp; + i++; } - public void setAnnotationIndex(AlignmentAnnotation aa, int index) - { - if(aa==null || annotations==null || annotations.length-1 maxLength) { - if(i==index) - continue; + maxLength = getSequenceAt(i).getLength(); + } + } - if(i