X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fdatamodel%2FAlignment.java;h=f36872ee11724358c02b5a46b5ad4372271073a5;hb=a4fca83856b96df2fe8701bb5cc14c8d1d0a6b09;hp=86c140f0a8071566edb454d883f52c1e3b0caa0b;hpb=12aa3352bd2981b8aa86d9d28e4c405fa56a2c29;p=jalview.git diff --git a/src/jalview/datamodel/Alignment.java b/src/jalview/datamodel/Alignment.java index 86c140f..f36872e 100755 --- a/src/jalview/datamodel/Alignment.java +++ b/src/jalview/datamodel/Alignment.java @@ -1,6 +1,6 @@ /* * Jalview - A Sequence Alignment Editor and Viewer - * Copyright (C) 2005 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle + * Copyright (C) 2007 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -18,825 +18,807 @@ */ package jalview.datamodel; -import jalview.analysis.*; - -import jalview.util.*; - import java.util.*; +import jalview.analysis.*; + /** Data structure to hold and manipulate a multiple sequence alignment */ -public class Alignment implements AlignmentI +public class Alignment + implements AlignmentI { - protected Alignment dataset; - protected Vector sequences; - protected Vector groups = new Vector(); - protected char gapCharacter = '-'; - protected int type = NUCLEOTIDE; - public static final int PROTEIN = 0; - public static final int NUCLEOTIDE = 1; + protected Alignment dataset; + protected Vector sequences; + protected Vector groups = new Vector(); + protected char gapCharacter = '-'; + protected int type = NUCLEOTIDE; + public static final int PROTEIN = 0; + public static final int NUCLEOTIDE = 1; - /** DOCUMENT ME!! */ - public AlignmentAnnotation[] annotations; + /** DOCUMENT ME!! */ + public AlignmentAnnotation[] annotations; - HiddenSequences hiddenSequences = new HiddenSequences(this); + HiddenSequences hiddenSequences = new HiddenSequences(this); - private void initAlignment(SequenceI[] seqs) { - int i=0; + public Hashtable alignmentProperties; - if( jalview.util.Comparison.isNucleotide(seqs)) - type = NUCLEOTIDE; - else - type = PROTEIN; - - sequences = new Vector(); - - for (i = 0; i < seqs.length; i++) - { - sequences.addElement(seqs[i]); - } + private void initAlignment(SequenceI[] seqs) + { + int i = 0; + if (jalview.util.Comparison.isNucleotide(seqs)) + { + type = NUCLEOTIDE; } - /** Make an alignment from an array of Sequences. - * - * @param sequences - */ - public Alignment(SequenceI[] seqs) + else { - initAlignment(seqs); + type = PROTEIN; } - /** - * Make a new alignment from an array of SeqCigars - * @param seqs SeqCigar[] - */ - public Alignment(SeqCigar[] alseqs) { - SequenceI[] seqs = SeqCigar.createAlignmentSequences(alseqs, gapCharacter, new ColumnSelection(), null); - initAlignment(seqs); + + sequences = new Vector(); + + for (i = 0; i < seqs.length; i++) + { + sequences.addElement(seqs[i]); } - /** - * Make a new alignment from an CigarArray - * JBPNote - can only do this when compactAlignment does not contain hidden regions. - * JBPNote - must also check that compactAlignment resolves to a set of SeqCigars - or construct them appropriately. - * @param compactAlignment CigarArray - */ - public static AlignmentI createAlignment(CigarArray compactAlignment) { - throw new Error("Alignment(CigarArray) not yet implemented"); - // this(compactAlignment.refCigars); + + } + + /** Make an alignment from an array of Sequences. + * + * @param sequences + */ + public Alignment(SequenceI[] seqs) + { + initAlignment(seqs); + } + + /** + * Make a new alignment from an array of SeqCigars + * @param seqs SeqCigar[] + */ + public Alignment(SeqCigar[] alseqs) + { + SequenceI[] seqs = SeqCigar.createAlignmentSequences(alseqs, gapCharacter, + new ColumnSelection(), null); + initAlignment(seqs); + } + + /** + * Make a new alignment from an CigarArray + * JBPNote - can only do this when compactAlignment does not contain hidden regions. + * JBPNote - must also check that compactAlignment resolves to a set of SeqCigars - or construct them appropriately. + * @param compactAlignment CigarArray + */ + public static AlignmentI createAlignment(CigarArray compactAlignment) + { + throw new Error("Alignment(CigarArray) not yet implemented"); + // this(compactAlignment.refCigars); + } + + /** + * DOCUMENT ME! + * + * @return DOCUMENT ME! + */ + public Vector getSequences() + { + return sequences; + } + + public SequenceI[] getSequencesArray() + { + if (sequences==null) + return null; + SequenceI[] reply = new SequenceI[sequences.size()]; + for (int i = 0; i < sequences.size(); i++) + { + reply[i] = (SequenceI) sequences.elementAt(i); } + return reply; + } - /** - * DOCUMENT ME! - * - * @return DOCUMENT ME! - */ - public Vector getSequences() + /** + * DOCUMENT ME! + * + * @param i DOCUMENT ME! + * + * @return DOCUMENT ME! + */ + public SequenceI getSequenceAt(int i) + { + if (i < sequences.size()) { - return sequences; + return (SequenceI) sequences.elementAt(i); } - public SequenceI [] getSequencesArray() + return null; + } + + /** Adds a sequence to the alignment. Recalculates maxLength and size. + * + * @param snew + */ + public void addSequence(SequenceI snew) + { + if (dataset != null) { - SequenceI [] reply = new SequenceI[sequences.size()]; - for(int i=0; i -1 && i < getHeight()) + { + sequences.removeElementAt(i); + hiddenSequences.adjustHeightSequenceDeleted(i); } + } - /** Adds a sequence to the alignment. Recalculates maxLength and size. - * - * @param snew - */ - public void addSequence(SequenceI snew) + /** */ + public SequenceGroup findGroup(SequenceI s) + { + for (int i = 0; i < this.groups.size(); i++) { - if(dataset!=null) + SequenceGroup sg = (SequenceGroup) groups.elementAt(i); + + if (sg.getSequences(null).contains(s)) { - if(snew.getDatasetSequence()!=null) - { - System.out.println(snew.getName()); - getDataset().addSequence(snew.getDatasetSequence()); - } - else - { - Sequence ds = new Sequence(snew.getName(), - AlignSeq.extractGaps("-. ", - snew.getSequence()), - snew.getStart(), - snew.getEnd()); - - snew.setDatasetSequence(ds); - getDataset().addSequence(ds); - } + return sg; } - - sequences.addElement(snew); } + return null; + } + + /** + * DOCUMENT ME! + * + * @param s DOCUMENT ME! + * + * @return DOCUMENT ME! + */ + public SequenceGroup[] findAllGroups(SequenceI s) + { + Vector temp = new Vector(); - /** Adds a sequence to the alignment. Recalculates maxLength and size. - * - * @param snew - */ - public void setSequenceAt(int i, SequenceI snew) + int gSize = groups.size(); + for (int i = 0; i < gSize; i++) { - SequenceI oldseq = getSequenceAt(i); - deleteSequence(oldseq); + SequenceGroup sg = (SequenceGroup) groups.elementAt(i); + if (sg == null || sg.getSequences(null) == null) + { + this.deleteGroup(sg); + gSize--; + continue; + } - sequences.setElementAt(snew, i); + if (sg.getSequences(null).contains(s)) + { + temp.addElement(sg); + } } - /** - * DOCUMENT ME! - * - * @return DOCUMENT ME! - */ - public Vector getGroups() + SequenceGroup[] ret = new SequenceGroup[temp.size()]; + + for (int i = 0; i < temp.size(); i++) { - return groups; + ret[i] = (SequenceGroup) temp.elementAt(i); } - /** Takes out columns consisting entirely of gaps (-,.," ") - */ - public void removeGaps() - { - SequenceI[] seqs = getVisibleAndRepresentedSeqs(); - int j, jSize = seqs.length; + return ret; + } - int width = 0; - for (int i = 0; i < jSize; i++) + /** */ + public void addGroup(SequenceGroup sg) + { + if (!groups.contains(sg)) + { + if (hiddenSequences.getSize() > 0) + { + int i, iSize = sg.getSize(); + for (i = 0; i < iSize; i++) { - if (seqs[i].getLength() > width) + if (!sequences.contains(sg.getSequenceAt(i))) { - width = seqs[i].getLength(); + sg.deleteSequence(sg.getSequenceAt(i), false); + iSize--; + i--; } } - int startCol = -1, endCol = -1; - boolean delete = true; - for (int i = 0; i < width; i++) + if (sg.getSize() < 1) { - delete = true; - - for (j = 0; j < jSize; j++) - { - if (seqs[j].getLength() > i) - { - if (!jalview.util.Comparison.isGap(seqs[j].getCharAt(i))) - { - if(delete) - endCol = i; - - delete = false; - break; - } - } - } - - if(delete && startCol==-1) - { - startCol = i; - } - - - if (!delete && startCol > -1) - { - deleteColumns(seqs, startCol, endCol); - width -= (endCol - startCol); - i -= (endCol - startCol); - startCol = -1; - endCol = -1; - } + return; } + } - if (delete && startCol > -1) - { - deleteColumns(seqs, startCol, endCol); - } + groups.addElement(sg); } + } - /** Removes a range of columns (start to end inclusive). - * - * @param seqs Sequences to remove columns from - * @param start Start column in the alignment - * @param end End column in the alignment - */ - public void deleteColumns(SequenceI [] seqs, int start, int end) + /** + * DOCUMENT ME! + */ + public void deleteAllGroups() + { + groups.removeAllElements(); + } + + /** */ + public void deleteGroup(SequenceGroup g) + { + if (groups.contains(g)) { - for(int i=0; iseqs[j].getLength()) - { - sequences.removeElement(seqs[j]); - j--; - jSize--; - } - else - { - seqs[j].setStart(newstart); - seqs[j].setSequence(seqs[j].getSequence().substring(i)); - } - } + if (getSequenceAt(i).getName().equals(name)) + { + return getSequenceAt(i); + } + + i++; } - /** - * DOCUMENT ME! - * - * @param i DOCUMENT ME! - */ - public void trimRight(int i) - { - SequenceI[] seqs = getVisibleAndRepresentedSeqs(); - int j, jSize = seqs.length; - for (j = 0; j < jSize; j++) - { - int newend = seqs[j].findPosition(i); + return null; + } - seqs[j].setEnd(newend); - if(seqs[j].getLength()>i) - seqs[j].setSequence(seqs[j].getSequence().substring(0, i + 1)); - } - } + public SequenceI[] findSequenceMatch(String name) + { + Vector matches = new Vector(); + int i = 0; - /** - * DOCUMENT ME! - * - * @param s DOCUMENT ME! - */ - public void deleteSequence(SequenceI s) + while (i < sequences.size()) { - for (int i = 0; i < getHeight(); i++) - { - if (getSequenceAt(i) == s) - { - deleteSequence(i); - } - } + if (getSequenceAt(i).getName().equals(name)) + { + matches.addElement(getSequenceAt(i)); + } + i++; } - /** - * DOCUMENT ME! - * - * @param i DOCUMENT ME! - */ - public void deleteSequence(int i) + SequenceI[] result = new SequenceI[matches.size()]; + for (i = 0; i < result.length; i++) { - sequences.removeElementAt(i); + result[i] = (SequenceI) matches.elementAt(i); } + return result; - /** */ - public SequenceGroup findGroup(SequenceI s) - { - for (int i = 0; i < this.groups.size(); i++) - { - SequenceGroup sg = (SequenceGroup) groups.elementAt(i); - - if (sg.getSequences(false).contains(s)) - { - return sg; - } - } + } - return null; - } + /** */ + public int findIndex(SequenceI s) + { + int i = 0; - /** - * DOCUMENT ME! - * - * @param s DOCUMENT ME! - * - * @return DOCUMENT ME! - */ - public SequenceGroup[] findAllGroups(SequenceI s) + while (i < sequences.size()) { - Vector temp = new Vector(); + if (s == getSequenceAt(i)) + { + return i; + } - int gSize = groups.size(); - for (int i = 0; i < gSize; i++) - { - SequenceGroup sg = (SequenceGroup) groups.elementAt(i); - if(sg==null || sg.getSequences(false)==null) - { - this.deleteGroup(sg); - gSize--; - continue; - } - - if (sg.getSequences(false).contains(s)) - { - temp.addElement(sg); - } - } + i++; + } - SequenceGroup[] ret = new SequenceGroup[temp.size()]; + return -1; + } - for (int i = 0; i < temp.size(); i++) - { - ret[i] = (SequenceGroup) temp.elementAt(i); - } + /** + * DOCUMENT ME! + * + * @return DOCUMENT ME! + */ + public int getHeight() + { + return sequences.size(); + } + + /** + * DOCUMENT ME! + * + * @return DOCUMENT ME! + */ + public int getWidth() + { + int maxLength = -1; - return ret; + for (int i = 0; i < sequences.size(); i++) + { + if (getSequenceAt(i).getLength() > maxLength) + { + maxLength = getSequenceAt(i).getLength(); + } } + return maxLength; + } + /** + * DOCUMENT ME! + * + * @param gc DOCUMENT ME! + */ + public void setGapCharacter(char gc) + { + gapCharacter = gc; - /** */ - public void addGroup(SequenceGroup sg) + for (int i = 0; i < sequences.size(); i++) { - if (!groups.contains(sg)) - { - groups.addElement(sg); - } + Sequence seq = (Sequence) sequences.elementAt(i); + seq.setSequence(seq.getSequenceAsString() + .replace('.', gc) + .replace('-', gc) + .replace(' ', gc) + ); } + } - /** - * DOCUMENT ME! - */ - public void deleteAllGroups() - { - groups.removeAllElements(); + /** + * DOCUMENT ME! + * + * @return DOCUMENT ME! + */ + public char getGapCharacter() + { + return gapCharacter; + } - int i = 0; + /** + * DOCUMENT ME! + * + * @return DOCUMENT ME! + */ + public boolean isAligned() + { + int width = getWidth(); - while (i < sequences.size()) - { - Sequence s = (Sequence)getSequenceAt(i); - s.hiddenSequences = null; - s.setColor(java.awt.Color.white); - i++; - } + for (int i = 0; i < sequences.size(); i++) + { + if (getSequenceAt(i).getLength() != width) + { + return false; + } } - /** */ - public void deleteGroup(SequenceGroup g) + return true; + } + /* (non-Javadoc) + * @see jalview.datamodel.AlignmentI#deleteAnnotation(jalview.datamodel.AlignmentAnnotation) + */ + public boolean deleteAnnotation(AlignmentAnnotation aa) + { + int aSize = 1; + + if (annotations != null) { - if (groups.contains(g)) - { - //remove any hidden representatives - // for(int i=0; i maxLength) - { - maxLength = getSequenceAt(i).getLength(); - } - } + temp[aSize - 1] = aa; - return maxLength; - } + int i = 0; - /** - * DOCUMENT ME! - * - * @return DOCUMENT ME! - */ - public int getMaxIdLength() + if (aSize > 1) { - int max = 0; - int i = 0; + for (i = 0; i < (aSize - 1); i++) + { + temp[i] = annotations[i]; + } + } - while (i < sequences.size()) - { - SequenceI seq = getSequenceAt(i); - String tmp = seq.getName() + "/" + seq.getStart() + "-" + - seq.getEnd(); + annotations = temp; + } - if (tmp.length() > max) - { - max = tmp.length(); - } + public void setAnnotationIndex(AlignmentAnnotation aa, int index) + { + if (aa == null || annotations == null || annotations.length - 1 < index) + { + return; + } - i++; - } + int aSize = annotations.length; + AlignmentAnnotation[] temp = new AlignmentAnnotation[aSize]; - return max; - } + temp[index] = aa; - /** - * DOCUMENT ME! - * - * @param gc DOCUMENT ME! - */ - public void setGapCharacter(char gc) + for (int i = 0; i < aSize; i++) { - gapCharacter = gc; + if (i == index) + { + continue; + } - for (int i = 0; i < sequences.size(); i++) - { - Sequence seq = (Sequence) sequences.elementAt(i); - seq.setSequence( seq.getSequence().replace('.', gc) ); - seq.setSequence( seq.getSequence().replace('-', gc) ); - seq.setSequence( seq.getSequence().replace(' ', gc) ); - } + if (i < index) + { + temp[i] = annotations[i]; + } + else + { + temp[i] = annotations[i - 1]; + } } - /** - * DOCUMENT ME! - * - * @return DOCUMENT ME! - */ - public char getGapCharacter() + annotations = temp; + } + + /** + * DOCUMENT ME! + * + * @return DOCUMENT ME! + */ + public AlignmentAnnotation[] getAlignmentAnnotation() + { + return annotations; + } + + public void setNucleotide(boolean b) + { + if (b) { - return gapCharacter; + type = NUCLEOTIDE; } - - /** - * DOCUMENT ME! - * - * @return DOCUMENT ME! - */ - public Vector getAAFrequency() + else { - return AAFrequency.calculate(sequences, 0, getWidth()); + type = PROTEIN; } + } - /** - * DOCUMENT ME! - * - * @return DOCUMENT ME! - */ - public boolean isAligned() + public boolean isNucleotide() + { + if (type == NUCLEOTIDE) { - int width = getWidth(); - - for (int i = 0; i < sequences.size(); i++) - { - if (getSequenceAt(i).getLength() != width) - { - return false; - } - } - - return true; + return true; } - - /** - * DOCUMENT ME! - * - * @param aa DOCUMENT ME! - */ - public void deleteAnnotation(AlignmentAnnotation aa) + else { - int aSize = 1; + return false; + } + } - if (annotations != null) + public void setDataset(Alignment data) + { + if (dataset == null && data == null) + { + // Create a new dataset for this alignment. + // Can only be done once, if dataset is not null + // This will not be performed + SequenceI[] seqs = new SequenceI[getHeight()]; + SequenceI currentSeq; + for (int i = 0; i < getHeight(); i++) + { + currentSeq = getSequenceAt(i); + if (currentSeq.getDatasetSequence() != null) { - aSize = annotations.length; + seqs[i] = (Sequence) currentSeq.getDatasetSequence(); } - - AlignmentAnnotation[] temp = new AlignmentAnnotation[aSize - 1]; - - int tIndex = 0; - - for (int i = 0; i < aSize; i++) + else { - if (annotations[i] == aa) - { - continue; - } - - temp[tIndex] = annotations[i]; - tIndex++; + seqs[i] = currentSeq.createDatasetSequence(); } + } - annotations = temp; + dataset = new Alignment(seqs); } - - - public void adjustSequenceAnnotations() + else if (dataset == null && data != null) { - if(annotations!=null) - { - for (int a = 0; a < annotations.length; a++) - { - if (annotations[a].sequenceRef != null) - { - annotations[a].adjustForAlignment(); - } - } - } + dataset = data; } + dataset.addAlignmentRef(); + } + /** + * reference count for number of alignments referencing this one. + */ + int alignmentRefs=0; + /** + * increase reference count to this alignment. + */ + private void addAlignmentRef() + { + alignmentRefs++; + } - /** - * DOCUMENT ME! - * - * @param aa DOCUMENT ME! - */ - public void addAnnotation(AlignmentAnnotation aa) - { - int aSize = 1; - if (annotations != null) - { - aSize = annotations.length + 1; - } - - AlignmentAnnotation[] temp = new AlignmentAnnotation[aSize]; + public Alignment getDataset() + { + return dataset; + } - temp[aSize-1] = aa; + public boolean padGaps() + { + boolean modified = false; - int i = 0; + //Remove excess gaps from the end of alignment + int maxLength = -1; - if (aSize > 1) + SequenceI current; + for (int i = 0; i < sequences.size(); i++) + { + current = getSequenceAt(i); + for (int j = current.getLength(); j > maxLength; j--) + { + if (j > maxLength && !jalview.util.Comparison.isGap( + current.getCharAt(j))) { - for (i = 0; i < (aSize-1); i++) - { - temp[i] = annotations[i]; - } + maxLength = j; + break; } - - annotations = temp; + } } - public void setAnnotationIndex(AlignmentAnnotation aa, int index) - { - if(aa==null || annotations==null || annotations.length-1 maxLength) + { + current.deleteChars(maxLength, current.getLength()); } - - annotations = temp; } + return modified; + } - /** - * DOCUMENT ME! - * - * @return DOCUMENT ME! - */ - public AlignmentAnnotation[] getAlignmentAnnotation() - { - return annotations; - } + public HiddenSequences getHiddenSequences() + { + return hiddenSequences; + } - public void setNucleotide(boolean b) + public CigarArray getCompactAlignment() + { + SeqCigar alseqs[] = new SeqCigar[sequences.size()]; + for (int i = 0; i < sequences.size(); i++) { - if(b) - type = NUCLEOTIDE; - else - type = PROTEIN; + alseqs[i] = new SeqCigar( (SequenceI) sequences.elementAt(i)); } + CigarArray cal = new CigarArray(alseqs); + cal.addOperation(CigarArray.M, getWidth()); + return cal; + } - public boolean isNucleotide() - { - if(type==NUCLEOTIDE) - return true; - else - return false; - } + public void setProperty(Object key, Object value) + { + if(alignmentProperties==null) + alignmentProperties = new Hashtable(); - public void setDataset(Alignment data) - { - if(dataset==null && data==null) - { - // Create a new dataset for this alignment. - // Can only be done once, if dataset is not null - // This will not be performed - Sequence[] seqs = new Sequence[getHeight()]; - for (int i = 0; i < getHeight(); i++) - { - if(getSequenceAt(i).getDatasetSequence()!=null) - { - seqs[i] = (Sequence)getSequenceAt(i).getDatasetSequence(); - } - else - { - seqs[i] = new Sequence(getSequenceAt(i).getName(), - AlignSeq.extractGaps( - jalview.util.Comparison.GapChars, - getSequenceAt(i).getSequence() - ), - getSequenceAt(i).getStart(), - getSequenceAt(i).getEnd()); - seqs[i].sequenceFeatures = getSequenceAt(i).getSequenceFeatures(); - getSequenceAt(i).setSequenceFeatures(null); - getSequenceAt(i).setDatasetSequence(seqs[i]); - } - } + alignmentProperties.put(key,value); + } - dataset = new Alignment(seqs); - } - else if(dataset==null && data!=null) - { - dataset = data; - } - } + public Object getProperty(Object key) + { + if(alignmentProperties!=null) + return alignmentProperties.get(key); + else + return null; + } - public Alignment getDataset() + public Hashtable getProperties() + { + return alignmentProperties; + } + AlignedCodonFrame[] codonFrameList=null; + /* (non-Javadoc) + * @see jalview.datamodel.AlignmentI#addCodonFrame(jalview.datamodel.AlignedCodonFrame) + */ + public void addCodonFrame(AlignedCodonFrame codons) + { + if (codons==null) + return; + if (codonFrameList==null) { - return dataset; + codonFrameList = new AlignedCodonFrame[] { codons }; + return; } + AlignedCodonFrame[] t = new AlignedCodonFrame[codonFrameList.length+1]; + System.arraycopy(codonFrameList, 0, t, 0, codonFrameList.length); + t[codonFrameList.length] = codons; + codonFrameList = t; + } - public boolean padGaps() { - boolean modified=false; - - //Remove excess gaps from the end of alignment - int maxLength = -1; + /* (non-Javadoc) + * @see jalview.datamodel.AlignmentI#getCodonFrame(int) + */ + public AlignedCodonFrame getCodonFrame(int index) + { + return codonFrameList[index]; + } - SequenceI current; - for (int i = 0; i < sequences.size(); i++) - { - current = getSequenceAt(i); - for (int j = current.getLength(); j > maxLength; j--) - { - if (j > maxLength && !jalview.util.Comparison.isGap( - current.getCharAt(j))) - { - maxLength = j; - break; - } - } - } + /* (non-Javadoc) + * @see jalview.datamodel.AlignmentI#getCodonFrame(jalview.datamodel.SequenceI) + */ + public AlignedCodonFrame[] getCodonFrame(SequenceI seq) + { + if (seq==null || codonFrameList==null) + return null; + Vector cframes=new Vector(); + for (int f=0;f maxLength) + removed=true; + if (i+1