X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fdatamodel%2FAlignment.java;h=64b79b7416d1381b1cb921bcef718354f36f41c7;hb=482e093c4670a813c9eec254990178b209e32dbc;hp=713cb0d04c918103753e01b5ae73f43f83d0d16a;hpb=52f42d57f0f08942ce54e3a739f1c9936cc5f293;p=jalview.git diff --git a/src/jalview/datamodel/Alignment.java b/src/jalview/datamodel/Alignment.java index 713cb0d..64b79b7 100755 --- a/src/jalview/datamodel/Alignment.java +++ b/src/jalview/datamodel/Alignment.java @@ -1,689 +1,1129 @@ /* - * Jalview - A Sequence Alignment Editor and Viewer - * Copyright (C) 2006 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle - * + * Jalview - A Sequence Alignment Editor and Viewer (Version 2.4) + * Copyright (C) 2008 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle + * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. - * + * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * + * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA */ package jalview.datamodel; -import jalview.analysis.*; - -import jalview.util.*; - import java.util.*; -/** Data structure to hold and manipulate a multiple sequence alignment +import jalview.analysis.*; + +/** + * Data structure to hold and manipulate a multiple sequence alignment */ public class Alignment implements AlignmentI { - protected Alignment dataset; - protected Vector sequences; - protected Vector groups = new Vector(); - protected char gapCharacter = '-'; - protected int type = NUCLEOTIDE; - public static final int PROTEIN = 0; - public static final int NUCLEOTIDE = 1; + protected Alignment dataset; - /** DOCUMENT ME!! */ - public AlignmentAnnotation[] annotations; + protected Vector sequences; - HiddenSequences hiddenSequences = new HiddenSequences(this); + protected Vector groups = new Vector(); - private void initAlignment(SequenceI[] seqs) { - int i=0; + protected char gapCharacter = '-'; - if( jalview.util.Comparison.isNucleotide(seqs)) - type = NUCLEOTIDE; - else - type = PROTEIN; + protected int type = NUCLEOTIDE; - sequences = new Vector(); + public static final int PROTEIN = 0; - for (i = 0; i < seqs.length; i++) - { - sequences.addElement(seqs[i]); - } + public static final int NUCLEOTIDE = 1; - } - /** Make an alignment from an array of Sequences. - * - * @param sequences - */ - public Alignment(SequenceI[] seqs) + /** DOCUMENT ME!! */ + public AlignmentAnnotation[] annotations; + + HiddenSequences hiddenSequences = new HiddenSequences(this); + + public Hashtable alignmentProperties; + + private void initAlignment(SequenceI[] seqs) + { + int i = 0; + + if (jalview.util.Comparison.isNucleotide(seqs)) { - initAlignment(seqs); - } - /** - * Make a new alignment from an array of SeqCigars - * @param seqs SeqCigar[] - */ - public Alignment(SeqCigar[] alseqs) { - SequenceI[] seqs = SeqCigar.createAlignmentSequences(alseqs, gapCharacter, new ColumnSelection(), null); - initAlignment(seqs); + type = NUCLEOTIDE; } - /** - * Make a new alignment from an CigarArray - * JBPNote - can only do this when compactAlignment does not contain hidden regions. - * JBPNote - must also check that compactAlignment resolves to a set of SeqCigars - or construct them appropriately. - * @param compactAlignment CigarArray - */ - public static AlignmentI createAlignment(CigarArray compactAlignment) { - throw new Error("Alignment(CigarArray) not yet implemented"); - // this(compactAlignment.refCigars); + else + { + type = PROTEIN; } - /** - * DOCUMENT ME! - * - * @return DOCUMENT ME! - */ - public Vector getSequences() + sequences = new Vector(); + + for (i = 0; i < seqs.length; i++) { - return sequences; + sequences.addElement(seqs[i]); } - public SequenceI [] getSequencesArray() + } + + /** + * Make an alignment from an array of Sequences. + * + * @param sequences + */ + public Alignment(SequenceI[] seqs) + { + initAlignment(seqs); + } + + /** + * Make a new alignment from an array of SeqCigars + * + * @param seqs + * SeqCigar[] + */ + public Alignment(SeqCigar[] alseqs) + { + SequenceI[] seqs = SeqCigar.createAlignmentSequences(alseqs, + gapCharacter, new ColumnSelection(), null); + initAlignment(seqs); + } + + /** + * Make a new alignment from an CigarArray JBPNote - can only do this when + * compactAlignment does not contain hidden regions. JBPNote - must also check + * that compactAlignment resolves to a set of SeqCigars - or construct them + * appropriately. + * + * @param compactAlignment + * CigarArray + */ + public static AlignmentI createAlignment(CigarArray compactAlignment) + { + throw new Error("Alignment(CigarArray) not yet implemented"); + // this(compactAlignment.refCigars); + } + + /** + * DOCUMENT ME! + * + * @return DOCUMENT ME! + */ + public Vector getSequences() + { + return sequences; + } + + public SequenceI[] getSequencesArray() + { + if (sequences == null) + return null; + SequenceI[] reply = new SequenceI[sequences.size()]; + for (int i = 0; i < sequences.size(); i++) { - SequenceI [] reply = new SequenceI[sequences.size()]; - for(int i=0; i -1 && i < getHeight()) { - return groups; + sequences.removeElementAt(i); + hiddenSequences.adjustHeightSequenceDeleted(i); } + } - /** - * DOCUMENT ME! - * - * @param s DOCUMENT ME! - */ - public void deleteSequence(SequenceI s) + /** */ + public SequenceGroup findGroup(SequenceI s) + { + for (int i = 0; i < this.groups.size(); i++) { - deleteSequence(findIndex(s)); + SequenceGroup sg = (SequenceGroup) groups.elementAt(i); + + if (sg.getSequences(null).contains(s)) + { + return sg; + } } - /** - * DOCUMENT ME! - * - * @param i DOCUMENT ME! - */ - public void deleteSequence(int i) + return null; + } + + /** + * DOCUMENT ME! + * + * @param s + * DOCUMENT ME! + * + * @return DOCUMENT ME! + */ + public SequenceGroup[] findAllGroups(SequenceI s) + { + Vector temp = new Vector(); + + int gSize = groups.size(); + for (int i = 0; i < gSize; i++) { - if(i>-1 && i 0) + { + int i, iSize = sg.getSize(); + for (i = 0; i < iSize; i++) { - SequenceGroup sg = (SequenceGroup) groups.elementAt(i); + if (!sequences.contains(sg.getSequenceAt(i))) + { + sg.deleteSequence(sg.getSequenceAt(i), false); + iSize--; + i--; + } + } - if (sg.getSequences(false).contains(s)) - { - return sg; - } + if (sg.getSize() < 1) + { + return; } + } - return null; + groups.addElement(sg); } + } + + /** + * DOCUMENT ME! + */ + public void deleteAllGroups() + { + groups.removeAllElements(); + } - /** - * DOCUMENT ME! - * - * @param s DOCUMENT ME! - * - * @return DOCUMENT ME! - */ - public SequenceGroup[] findAllGroups(SequenceI s) + /** */ + public void deleteGroup(SequenceGroup g) + { + if (groups.contains(g)) { - Vector temp = new Vector(); + groups.removeElement(g); + } + } - int gSize = groups.size(); - for (int i = 0; i < gSize; i++) - { - SequenceGroup sg = (SequenceGroup) groups.elementAt(i); - if(sg==null || sg.getSequences(false)==null) - { - this.deleteGroup(sg); - gSize--; - continue; - } + /** */ + public SequenceI findName(String name) + { + return findName(name, false); + } - if (sg.getSequences(false).contains(s)) - { - temp.addElement(sg); - } - } + /* + * (non-Javadoc) + * + * @see jalview.datamodel.AlignmentI#findName(java.lang.String, boolean) + */ + public SequenceI findName(String token, boolean b) + { + return findName(null, token, b); + } - SequenceGroup[] ret = new SequenceGroup[temp.size()]; + /* + * (non-Javadoc) + * + * @see jalview.datamodel.AlignmentI#findName(SequenceI, java.lang.String, + * boolean) + */ + public SequenceI findName(SequenceI startAfter, String token, boolean b) + { - for (int i = 0; i < temp.size(); i++) + int i = 0; + SequenceI sq = null; + String sqname = null; + if (startAfter != null) + { + // try to find the sequence in the alignment + boolean matched = false; + while (i < sequences.size()) + { + if (getSequenceAt(i++) == startAfter) { - ret[i] = (SequenceGroup) temp.elementAt(i); + matched = true; + break; } + } + if (!matched) + { + i = 0; + } + } + while (i < sequences.size()) + { + sq = getSequenceAt(i); + sqname = sq.getName(); + if (sqname.equals(token) // exact match + || (b && // allow imperfect matches - case varies + (sqname.equalsIgnoreCase(token)))) + { + return getSequenceAt(i); + } - return ret; + i++; } + return null; + } + public SequenceI[] findSequenceMatch(String name) + { + Vector matches = new Vector(); + int i = 0; - /** */ - public void addGroup(SequenceGroup sg) + while (i < sequences.size()) { - if (!groups.contains(sg)) - { - groups.addElement(sg); - } + if (getSequenceAt(i).getName().equals(name)) + { + matches.addElement(getSequenceAt(i)); + } + i++; } - /** - * DOCUMENT ME! - */ - public void deleteAllGroups() + SequenceI[] result = new SequenceI[matches.size()]; + for (i = 0; i < result.length; i++) { - groups.removeAllElements(); + result[i] = (SequenceI) matches.elementAt(i); + } - int i = 0; + return result; - while (i < sequences.size()) - { - SequenceI s = getSequenceAt(i); - s.setColor(java.awt.Color.white); - i++; - } - } + } + + /* + * (non-Javadoc) + * + * @see jalview.datamodel.AlignmentI#findIndex(jalview.datamodel.SequenceI) + */ + public int findIndex(SequenceI s) + { + int i = 0; - /** */ - public void deleteGroup(SequenceGroup g) + while (i < sequences.size()) { - if (groups.contains(g)) - { - groups.removeElement(g); - } + if (s == getSequenceAt(i)) + { + return i; + } + + i++; } - /** */ - public SequenceI findName(String name) + return -1; + } + + /* + * (non-Javadoc) + * + * @see jalview.datamodel.AlignmentI#findIndex(jalview.datamodel.SearchResults) + */ + public int findIndex(SearchResults results) + { + int i = 0; + + while (i < sequences.size()) { - int i = 0; + if (results.involvesSequence(getSequenceAt(i))) + { + return i; + } + i++; + } + return -1; + } - while (i < sequences.size()) - { - if (getSequenceAt(i).getName().equals(name)) - { - return getSequenceAt(i); - } + /** + * DOCUMENT ME! + * + * @return DOCUMENT ME! + */ + public int getHeight() + { + return sequences.size(); + } - i++; - } + /** + * DOCUMENT ME! + * + * @return DOCUMENT ME! + */ + public int getWidth() + { + int maxLength = -1; - return null; + for (int i = 0; i < sequences.size(); i++) + { + if (getSequenceAt(i).getLength() > maxLength) + { + maxLength = getSequenceAt(i).getLength(); + } } - public SequenceI [] findSequenceMatch(String name) + return maxLength; + } + + /** + * DOCUMENT ME! + * + * @param gc + * DOCUMENT ME! + */ + public void setGapCharacter(char gc) + { + gapCharacter = gc; + + for (int i = 0; i < sequences.size(); i++) { - Vector matches = new Vector(); - int i = 0; + Sequence seq = (Sequence) sequences.elementAt(i); + seq.setSequence(seq.getSequenceAsString().replace('.', gc).replace( + '-', gc).replace(' ', gc)); + } + } - while (i < sequences.size()) + /** + * DOCUMENT ME! + * + * @return DOCUMENT ME! + */ + public char getGapCharacter() + { + return gapCharacter; + } + + /** + * DOCUMENT ME! + * + * @return DOCUMENT ME! + */ + public boolean isAligned() + { + int width = getWidth(); + + for (int i = 0; i < sequences.size(); i++) + { + if (getSequenceAt(i).getLength() != width) { - if (getSequenceAt(i).getName().equals(name)) - { - matches.addElement(getSequenceAt(i)); - } - i++; + return false; } + } - SequenceI [] result = new SequenceI[matches.size()]; - for(i=0; i 1) { - int maxLength = -1; + for (i = 0; i < (aSize - 1); i++) + { + temp[i] = annotations[i]; + } + } - for (int i = 0; i < sequences.size(); i++) - { - if (getSequenceAt(i).getLength() > maxLength) - { - maxLength = getSequenceAt(i).getLength(); - } - } + annotations = temp; + } - return maxLength; + public void setAnnotationIndex(AlignmentAnnotation aa, int index) + { + if (aa == null || annotations == null || annotations.length - 1 < index) + { + return; } + int aSize = annotations.length; + AlignmentAnnotation[] temp = new AlignmentAnnotation[aSize]; + + temp[index] = aa; - /** - * DOCUMENT ME! - * - * @param gc DOCUMENT ME! - */ - public void setGapCharacter(char gc) + for (int i = 0; i < aSize; i++) { - gapCharacter = gc; + if (i == index) + { + continue; + } - for (int i = 0; i < sequences.size(); i++) - { - Sequence seq = (Sequence) sequences.elementAt(i); - seq.setSequence( seq.getSequence().replace('.', gc) ); - seq.setSequence( seq.getSequence().replace('-', gc) ); - seq.setSequence( seq.getSequence().replace(' ', gc) ); - } + if (i < index) + { + temp[i] = annotations[i]; + } + else + { + temp[i] = annotations[i - 1]; + } } - /** - * DOCUMENT ME! - * - * @return DOCUMENT ME! - */ - public char getGapCharacter() + annotations = temp; + } + + /** + * DOCUMENT ME! + * + * @return DOCUMENT ME! + */ + public AlignmentAnnotation[] getAlignmentAnnotation() + { + return annotations; + } + + public void setNucleotide(boolean b) + { + if (b) { - return gapCharacter; + type = NUCLEOTIDE; } + else + { + type = PROTEIN; + } + } - - /** - * DOCUMENT ME! - * - * @return DOCUMENT ME! - */ - public boolean isAligned() + public boolean isNucleotide() + { + if (type == NUCLEOTIDE) + { + return true; + } + else { - int width = getWidth(); + return false; + } + } - for (int i = 0; i < sequences.size(); i++) + public void setDataset(Alignment data) + { + if (dataset == null && data == null) + { + // Create a new dataset for this alignment. + // Can only be done once, if dataset is not null + // This will not be performed + SequenceI[] seqs = new SequenceI[getHeight()]; + SequenceI currentSeq; + for (int i = 0; i < getHeight(); i++) + { + currentSeq = getSequenceAt(i); + if (currentSeq.getDatasetSequence() != null) { - if (getSequenceAt(i).getLength() != width) - { - return false; - } + seqs[i] = (Sequence) currentSeq.getDatasetSequence(); + } + else + { + seqs[i] = currentSeq.createDatasetSequence(); } + } - return true; + dataset = new Alignment(seqs); } - - /** - * DOCUMENT ME! - * - * @param aa DOCUMENT ME! - */ - public void deleteAnnotation(AlignmentAnnotation aa) + else if (dataset == null && data != null) { - int aSize = 1; + dataset = data; + } + dataset.addAlignmentRef(); + } - if (annotations != null) - { - aSize = annotations.length; - } + /** + * reference count for number of alignments referencing this one. + */ + int alignmentRefs = 0; + + /** + * increase reference count to this alignment. + */ + private void addAlignmentRef() + { + alignmentRefs++; + } - AlignmentAnnotation[] temp = new AlignmentAnnotation[aSize - 1]; + public Alignment getDataset() + { + return dataset; + } - int tIndex = 0; + public boolean padGaps() + { + boolean modified = false; - for (int i = 0; i < aSize; i++) - { - if (annotations[i] == aa) - { - continue; - } + // Remove excess gaps from the end of alignment + int maxLength = -1; - temp[tIndex] = annotations[i]; - tIndex++; + SequenceI current; + for (int i = 0; i < sequences.size(); i++) + { + current = getSequenceAt(i); + for (int j = current.getLength(); j > maxLength; j--) + { + if (j > maxLength + && !jalview.util.Comparison.isGap(current.getCharAt(j))) + { + maxLength = j; + break; } - - annotations = temp; + } } + maxLength++; + + int cLength; + for (int i = 0; i < sequences.size(); i++) + { + current = getSequenceAt(i); + cLength = current.getLength(); + + if (cLength < maxLength) + { + current.insertCharAt(cLength, maxLength - cLength, gapCharacter); + modified = true; + } + else if (current.getLength() > maxLength) + { + current.deleteChars(maxLength, current.getLength()); + } + } + return modified; + } + /** + * Justify the sequences to the left or right by deleting and inserting gaps before the initial residue or after the terminal residue + * @param right true if alignment padded to right, false to justify to left + * @return true if alignment was changed + */ + public boolean justify(boolean right) + { + boolean modified = false; - public void adjustSequenceAnnotations() + // Remove excess gaps from the end of alignment + int maxLength = -1; + int ends[] = new int[sequences.size()*2]; + SequenceI current; + for (int i = 0; i < sequences.size(); i++) { - if(annotations!=null) + current = getSequenceAt(i); + // This should really be a sequence method + ends[i*2] = current.findIndex(current.getStart()); + ends[i*2+1] = current.findIndex(current.getStart()+current.getLength()); + boolean hitres=false; + for (int j = 0,rs=0,ssiz=current.getLength(); jmaxLength) + { + maxLength = j-ends[i*2]; + } } } } } - /** - * DOCUMENT ME! - * - * @param aa DOCUMENT ME! - */ - public void addAnnotation(AlignmentAnnotation aa) + maxLength++; + // now edit the flanking gaps to justify to either left or right + int cLength,extent,diff; + for (int i = 0; i < sequences.size(); i++) { - int aSize = 1; - if (annotations != null) + current = getSequenceAt(i); + + cLength = 1+ends[i*2+1]-ends[i*2]; + diff = maxLength-cLength; // number of gaps to indent + extent = current.getLength(); + if (right) + { + // right justify + if (extent>ends[i*2+1]) { - aSize = annotations.length + 1; + current.deleteChars(ends[i*2+1]+1, extent); + modified = true; } - - AlignmentAnnotation[] temp = new AlignmentAnnotation[aSize]; - - temp[aSize-1] = aa; - - int i = 0; - - if (aSize > 1) + if (ends[i*2]>diff) { - for (i = 0; i < (aSize-1); i++) - { - temp[i] = annotations[i]; - } + current.deleteChars(0, ends[i*2]-diff); + modified = true; + } else { + if (ends[i*2]0) + { + current.deleteChars(0, ends[i*2]); + modified = true; + ends[i*2+1]-=ends[i*2]; + extent-=ends[i*2]; + } + if (extent>maxLength) + { + current.deleteChars(maxLength+1, extent); + modified = true; + } else { + if (extent