X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fdatamodel%2FAlignment.java;h=b0b04cae8739551cd24ee5b045d62d5391d81b01;hb=1ed150e97b6a7df6ebe5cf92ead96cd147a58b7e;hp=dd02bc12626f3b396b3109ee24a6a0aa92747c9d;hpb=2cf0e6316cf8b4e545bad1648966b02fad49017f;p=jalview.git diff --git a/src/jalview/datamodel/Alignment.java b/src/jalview/datamodel/Alignment.java index dd02bc1..b0b04ca 100755 --- a/src/jalview/datamodel/Alignment.java +++ b/src/jalview/datamodel/Alignment.java @@ -1,481 +1,857 @@ +/* + * Jalview - A Sequence Alignment Editor and Viewer + * Copyright (C) 2005 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ package jalview.datamodel; import jalview.analysis.*; + import jalview.util.*; + import java.util.*; /** Data structure to hold and manipulate a multiple sequence alignment */ public class Alignment implements AlignmentI { + protected Alignment dataset; + protected Vector sequences; + protected Vector groups = new Vector(); + protected Vector superGroup = new Vector(); + protected char gapCharacter = '-'; + protected int type = NUCLEOTIDE; + public static final int PROTEIN = 0; + public static final int NUCLEOTIDE = 1; + + /** DOCUMENT ME!! */ + public AlignmentAnnotation[] annotations; + + HiddenSequences hiddenSequences = new HiddenSequences(this); + + + /** Make an alignment from an array of Sequences. + * + * @param sequences + */ + public Alignment(SequenceI[] seqs) + { + int i=0; - protected Vector sequences; - protected Vector groups = new Vector(); - protected char gapCharacter = '-'; + if( jalview.util.Comparison.isNucleotide(seqs)) + type = NUCLEOTIDE; + else + type = PROTEIN; - /** Make an alignment from an array of Sequences. - * - * @param sequences - */ - public Alignment(SequenceI[] seqs) { - sequences = new Vector(); + sequences = new Vector(); - for (int i=0; i < seqs.length; i++) - sequences.addElement(seqs[i]); + for (i = 0; i < seqs.length; i++) + { + sequences.addElement(seqs[i]); - getWidth(); - } + if(seqs[i].getDatasetSequence()!=null + && seqs[i].getDatasetSequence().getAnnotation()!=null) + { - public Vector getSequences() { - return sequences; - } + for(int a=0; a i) + { + /* MC Should move this to a method somewhere */ + if (!jalview.util.Comparison.isGap(current.getCharAt(i))) + { + delete = false; + } + } + } + + if (delete) + { + deleteColumns(i, i); + iSize--; + i--; + } + } + } - for (int i=groups.size()-1; i >= 0; i--) { - newg.addElement(s[i]); + /** Removes a range of columns (start to end inclusive). + * + * @param start Start column in the alignment + * @param end End column in the alignment + */ + public void deleteColumns(int start, int end) + { + deleteColumns(0, getHeight() - 1, start, end); } - groups = newg; - } + /** + * DOCUMENT ME! + * + * @param seq1 DOCUMENT ME! + * @param seq2 DOCUMENT ME! + * @param start DOCUMENT ME! + * @param end DOCUMENT ME! + */ + public void deleteColumns(int seq1, int seq2, int start, int end) + { + for (int i = 0; i <= (end - start); i++) + { + for (int j = seq1; j <= seq2; j++) + { + getSequenceAt(j).deleteCharAt(start); + } + } + } - /** Takes out columns consisting entirely of gaps (-,.," ") - */ - public void removeGaps() - { + /** + * DOCUMENT ME! + * + * @param i DOCUMENT ME! + */ + public void trimLeft(int i) + { + int j, jSize = getHeight(); + for (j = 0; j < jSize; j++) + { + SequenceI s = getSequenceAt(j); + int newstart = s.findPosition(i); + + if(i>s.getLength()) + { + sequences.removeElement(s); + j--; + jSize--; + } + else + { + s.setStart(newstart); + s.setSequence(s.getSequence().substring(i)); + } + } + } - SequenceI current; - int iSize = getWidth(); - for (int i=0; i < iSize; i++) + /** + * DOCUMENT ME! + * + * @param i DOCUMENT ME! + */ + public void trimRight(int i) { - boolean delete = true; - for (int j=0; j < getHeight(); j++) - { - current = getSequenceAt(j); - if (current.getLength() > i) + for (int j = 0; j < getHeight(); j++) { - /* MC Should move this to a method somewhere */ - if ( !jalview.util.Comparison.isGap(current.getCharAt(i))) - delete = false; + SequenceI s = getSequenceAt(j); + int newend = s.findPosition(i); + s.setEnd(newend); + if(s.getLength()>i) + s.setSequence(s.getSequence().substring(0, i + 1)); } - } + } - if ( delete ) - { - deleteColumns(i,i); - iSize--; - i--; - } + /** + * DOCUMENT ME! + * + * @param s DOCUMENT ME! + */ + public void deleteSequence(SequenceI s) + { + for (int i = 0; i < getHeight(); i++) + { + if (getSequenceAt(i) == s) + { + deleteSequence(i); + } + } } + /** + * DOCUMENT ME! + * + * @param i DOCUMENT ME! + */ + public void deleteSequence(int i) + { + sequences.removeElementAt(i); + } - } + /** + * DOCUMENT ME! + * + * @param threshold DOCUMENT ME! + * @param sel DOCUMENT ME! + * + * @return DOCUMENT ME! + */ + public Vector removeRedundancy(float threshold, Vector sel) + { + Vector del = new Vector(); - /** Returns an array of Sequences containing columns - * start to end (inclusive) only. - * - * @param start start column to fetch - * @param end end column to fetch - * @return Array of Sequences, ready to put into a new Alignment - */ - public SequenceI[] getColumns(int start, int end) { - return getColumns(0,getHeight()-1,start,end); - } + for (int i = 1; i < sel.size(); i++) + { + for (int j = 0; j < i; j++) + { + // Only do the comparison if either have not been deleted + if (!del.contains((SequenceI) sel.elementAt(i)) || + !del.contains((SequenceI) sel.elementAt(j))) + { + // use PID instead of Comparison (which is really not pleasant) + float pid = Comparison.PID((SequenceI) sel.elementAt(j), + (SequenceI) sel.elementAt(i)); + + if (pid >= threshold) + { + // Delete the shortest one + if (((SequenceI) sel.elementAt(j)).getSequence().length() > ((SequenceI) sel + .elementAt( + i)).getSequence().length()) + { + del.addElement(sel.elementAt(i)); + } + else + { + del.addElement(sel.elementAt(i)); + } + } + } + } + } - /** Removes a range of columns (start to end inclusive). - * - * @param start Start column in the alignment - * @param end End column in the alignment - */ - public void deleteColumns(int start, int end) { - deleteColumns(0,getHeight()-1,start,end); - } + // Now delete the sequences + for (int i = 0; i < del.size(); i++) + { + deleteSequence((SequenceI) del.elementAt(i)); + } - public void deleteColumns(int seq1, int seq2, int start, int end) { + return del; + } - for (int i=0; i <= (end-start); i++) { - for (int j=seq1; j <= seq2; j++) { - getSequenceAt(j).deleteCharAt(start); - } + /** */ + public SequenceGroup findGroup(int i) + { + return findGroup(getSequenceAt(i)); } - } - public void insertColumns(SequenceI[] seqs, int pos) { - if (seqs.length == getHeight()) { - for (int i=0; i < getHeight();i++) { - String tmp = new String(getSequenceAt(i).getSequence()); - getSequenceAt(i).setSequence(tmp.substring(0,pos) + seqs[i].getSequence() + tmp.substring(pos)); - } + /** */ + public SequenceGroup findGroup(SequenceI s) + { + for (int i = 0; i < this.groups.size(); i++) + { + SequenceGroup sg = (SequenceGroup) groups.elementAt(i); - } - } + if (sg.sequences.contains(s)) + { + return sg; + } + } - public SequenceI[] getColumns(int seq1, int seq2, int start, int end) { - SequenceI[] seqs = new Sequence[(seq2-seq1)+1]; - for (int i=seq1; i<= seq2; i++ ) { - seqs[i] = new Sequence(getSequenceAt(i).getName(), - getSequenceAt(i).getSequence().substring(start,end), - getSequenceAt(i).findPosition(start), - getSequenceAt(i).findPosition(end)); + return null; } - return seqs; - } - public void trimLeft(int i) { - for (int j = 0;j< getHeight();j++) { + /** + * DOCUMENT ME! + * + * @param s DOCUMENT ME! + * + * @return DOCUMENT ME! + */ + public SequenceGroup[] findAllGroups(SequenceI s) + { + Vector temp = new Vector(); + + int gSize = groups.size(); + for (int i = 0; i < gSize; i++) + { + SequenceGroup sg = (SequenceGroup) groups.elementAt(i); + if(sg==null || sg.sequences==null) + { + this.deleteGroup(sg); + gSize--; + continue; + } + + if (sg.sequences.contains(s)) + { + temp.addElement(sg); + } + } - SequenceI s = getSequenceAt(j); - int newstart = s.findPosition(i); + SequenceGroup[] ret = new SequenceGroup[temp.size()]; - s.setStart(newstart); - s.setSequence(s.getSequence().substring(i)); + for (int i = 0; i < temp.size(); i++) + { + ret[i] = (SequenceGroup) temp.elementAt(i); + } + return ret; } - } - public void trimRight(int i) { - for (int j = 0;j< getHeight();j++) { - SequenceI s = getSequenceAt(j); - int newend = s.findPosition(i); - s.setEnd(newend); - s.setSequence(s.getSequence().substring(0,i+1)); - } - } - public void deleteSequence(SequenceI s) - { - for (int i=0; i < getHeight(); i++) - if (getSequenceAt(i) == s) - deleteSequence(i); - } + /** */ + public void addGroup(SequenceGroup sg) + { + if (!groups.contains(sg)) + { + groups.addElement(sg); + } + } - public void deleteSequence(int i) - { - sequences.removeElementAt(i); - } + /** + * DOCUMENT ME! + */ + public void deleteAllGroups() + { + groups.removeAllElements(); + superGroup.removeAllElements(); + int i = 0; - public Vector removeRedundancy(float threshold, Vector sel) { - Vector del = new Vector(); + while (i < sequences.size()) + { + SequenceI s = getSequenceAt(i); + s.setColor(java.awt.Color.white); + i++; + } + } - for (int i = 1; i < sel.size(); i++) + /** */ + public void deleteGroup(SequenceGroup g) { - for (int j = 0; j < i; j++) - { - // Only do the comparison if either have not been deleted - if (!del.contains( (SequenceI) sel.elementAt(i)) || - !del.contains( (SequenceI) sel.elementAt(j))) + if (groups.contains(g)) { + groups.removeElement(g); + } + } - float pid = Comparison.compare( (SequenceI) sel.elementAt(j), - (SequenceI) sel.elementAt(i)); + /** */ + public SequenceI findName(String name) + { + int i = 0; - if (pid >= threshold) - { - // Delete the shortest one - if ( ( (SequenceI) sel.elementAt(j)).getSequence().length() > - ( (SequenceI) sel.elementAt(i)).getSequence().length()) - del.addElement(sel.elementAt(i)); - else - del.addElement(sel.elementAt(i)); - } + while (i < sequences.size()) + { + if (getSequenceAt(i).getName().equals(name)) + { + return getSequenceAt(i); + } + + i++; } - } + + return null; } - // Now delete the sequences - for (int i=0; i < del.size(); i++) - deleteSequence((SequenceI)del.elementAt(i)); - return del; - } + /** */ + public int findIndex(SequenceI s) + { + int i = 0; - public void sortByPID(SequenceI s) { + while (i < sequences.size()) + { + if (s == getSequenceAt(i)) + { + return i; + } - float scores[] = new float[getHeight()]; - SequenceI seqs[] = new SequenceI[getHeight()]; + i++; + } - for (int i = 0; i < getHeight(); i++) { - scores[i] = Comparison.compare(getSequenceAt(i),s); - seqs[i] = getSequenceAt(i); + return -1; } - QuickSort.sort(scores,0,scores.length-1,seqs); + /** + * DOCUMENT ME! + * + * @return DOCUMENT ME! + */ + public int getHeight() + { + return sequences.size(); + } - int len = 0; + /** + * DOCUMENT ME! + * + * @return DOCUMENT ME! + */ + public int getWidth() + { + int maxLength = -1; - if (getHeight()%2 == 0) { - len = getHeight()/2; - } else { - len = (getHeight()+1)/2; - } + for (int i = 0; i < sequences.size(); i++) + { + if (getSequenceAt(i).getLength() > maxLength) + { + maxLength = getSequenceAt(i).getLength(); + } + } - for (int i = 0; i < len; i++) { - SequenceI tmp = seqs[i]; - sequences.setElementAt(seqs[getHeight()-i-1],i); - sequences.setElementAt(tmp,getHeight()-i-1); + return maxLength; } - } - public void sortByID() { - String ids[] = new String[getHeight()]; - SequenceI seqs[] = new SequenceI[getHeight()]; + /** + * DOCUMENT ME! + * + * @return DOCUMENT ME! + */ + public int getMaxIdLength() + { + int max = 0; + int i = 0; - for (int i = 0; i < getHeight(); i++) { - ids[i] = getSequenceAt(i).getName(); - seqs[i] = getSequenceAt(i); - } + while (i < sequences.size()) + { + SequenceI seq = getSequenceAt(i); + String tmp = seq.getName() + "/" + seq.getStart() + "-" + + seq.getEnd(); - QuickSort.sort(ids,seqs); + if (tmp.length() > max) + { + max = tmp.length(); + } - int len = 0; + i++; + } - if (getHeight()%2 == 0) { - len = getHeight()/2; - } else { - len = (getHeight()+1)/2; - System.out.println("Sort len is odd = " + len); - } - for (int i = 0; i < len; i++) { - System.out.println("Swapping " + seqs[i].getName() + " and " + seqs[getHeight()-i-1].getName()); - SequenceI tmp = seqs[i]; - sequences.setElementAt(seqs[getHeight()-i-1],i); - sequences.setElementAt(tmp,getHeight()-i-1); + return max; } - } - /** */ - public SequenceGroup findGroup(int i) { - return findGroup(getSequenceAt(i)); - } - - /** */ - public SequenceGroup findGroup(SequenceI s) { - for (int i = 0; i < this.groups.size();i++) + /** + * DOCUMENT ME! + * + * @param gc DOCUMENT ME! + */ + public void setGapCharacter(char gc) { - SequenceGroup sg = (SequenceGroup)groups.elementAt(i); - if (sg.sequences.contains(s)) - return sg; + gapCharacter = gc; + for (int i = 0; i < sequences.size(); i++) + { + Sequence seq = (Sequence) sequences.elementAt(i); + seq.sequence = seq.sequence.replace('.', gc); + seq.sequence = seq.sequence.replace('-', gc); + seq.sequence = seq.sequence.replace(' ', gc); + } } - return null; - } - public SequenceGroup [] findAllGroups(SequenceI s) - { + /** + * DOCUMENT ME! + * + * @return DOCUMENT ME! + */ + public char getGapCharacter() + { + return gapCharacter; + } - ArrayList temp = new ArrayList(); + /** + * DOCUMENT ME! + * + * @return DOCUMENT ME! + */ + public Vector getAAFrequency() + { + return AAFrequency.calculate(sequences, 0, getWidth()); + } - for (int i = 0; i < this.groups.size();i++) + /** + * DOCUMENT ME! + * + * @return DOCUMENT ME! + */ + public boolean isAligned() { - SequenceGroup sg = (SequenceGroup)groups.elementAt(i); - if (sg.sequences.contains(s)) - temp.add(sg); + int width = getWidth(); + + for (int i = 0; i < sequences.size(); i++) + { + if (getSequenceAt(i).getLength() != width) + { + return false; + } + } + + return true; } - SequenceGroup [] ret = new SequenceGroup[temp.size()]; - temp.toArray( ret ); + /** + * DOCUMENT ME! + * + * @param aa DOCUMENT ME! + */ + public void deleteAnnotation(AlignmentAnnotation aa) + { + int aSize = 1; + + if (annotations != null) + { + aSize = annotations.length; + } + + AlignmentAnnotation[] temp = new AlignmentAnnotation[aSize - 1]; + + int tIndex = 0; - return ret; + for (int i = 0; i < aSize; i++) + { + if (annotations[i] == aa) + { + continue; + } - } - /** */ - public void addToGroup(SequenceGroup g, SequenceI s) { - if (!(g.sequences.contains(s))) { - g.sequences.addElement(s); + temp[tIndex] = annotations[i]; + tIndex++; + } + + annotations = temp; } - } - /** */ - public void removeFromGroup(SequenceGroup g,SequenceI s) { - if (g != null && g.sequences != null) { - if (g.sequences.contains(s)) { - g.sequences.removeElement(s); - if (g.sequences.size() == 0) { - groups.removeElement(g); + + /** + * + * @param aa AlignmentAnnotation + * @param seqRef The sequence to associate this annotation with + * @return The adjusted AlignmentAnnotation, with dataset sequence and annotation added + */ + public AlignmentAnnotation addAnnotation(AlignmentAnnotation aa, SequenceI seqRef) + { + if(seqRef!=null) + { + //We can only add Annotations to the dataset sequences + if(seqRef.getDatasetSequence()==null) + { + setDataset(null); + } + + AlignmentAnnotation [] old = seqRef.getDatasetSequence().getAnnotation(); + + //First check if this is a new annotation or not. If it is new, + //we must add the annotation to the dataset + boolean newAnnotation = true; + if(seqRef.getDatasetSequence().getAnnotation()!=null) + { + for(int a=0; a 0) + copy = new AlignmentAnnotation( + aa.label, aa.description, aa.annotations, aa.graphMin, + aa.graphMax, aa.graph + ); + else + copy = new AlignmentAnnotation( + aa.label, aa.description, aa.annotations + ); + + copy.datasetAnnotation = aa; + + addAnnotation(copy); + + copy.sequenceRef = seqRef; + + return copy; + } + else + { + addAnnotation(aa); + return aa; } } - } - /** */ - public void addGroup(SequenceGroup sg) { - if(!groups.contains(sg)) - groups.addElement(sg); - } + public void adjustSequenceAnnotations() + { + if(annotations!=null) + { + for (int a = 0; a < annotations.length; a++) + { + if (annotations[a].sequenceRef != null) + { + annotations[a].adjustForAlignment(); + } + } + } + } - /** */ - public SequenceGroup addGroup() { - SequenceGroup sg = new SequenceGroup(); - groups.addElement(sg); - return sg; - } + /** + * DOCUMENT ME! + * + * @param aa DOCUMENT ME! + */ + public void addAnnotation(AlignmentAnnotation aa) + { + int aSize = 1; + if (annotations != null) + { + aSize = annotations.length + 1; + } - /** */ - public void deleteGroup(SequenceGroup g) { - if (groups.contains(g)) { - groups.removeElement(g); - } - } + AlignmentAnnotation[] temp = new AlignmentAnnotation[aSize]; - /** */ - public SequenceI findName(String name) { - int i = 0; - while (i < sequences.size()) { - SequenceI s = getSequenceAt(i); - if (s.getName().equals(name)) - return s; + temp[aSize-1] = aa; - i++; - } - return null; - } + int i = 0; - /** */ - public SequenceI findbyDisplayId(String name) { - int i = 0; - while (i < sequences.size()) { - SequenceI s = getSequenceAt(i); - if (s.getDisplayId().equals(name)) - return s; + if (aSize > 1) + { + for (i = 0; i < (aSize-1); i++) + { + temp[i] = annotations[i]; + } + } - i++; + annotations = temp; } - return null; - } - /** */ - public int findIndex(SequenceI s) - { - int i=0; - while (i < sequences.size()) + public void setAnnotationIndex(AlignmentAnnotation aa, int index) { - if (s == getSequenceAt(i)) - return i; + if(aa==null || annotations==null || annotations.length-1 maxLength) - maxLength = getSequenceAt(i).getLength(); + return annotations; } - return maxLength; - } + public void setNucleotide(boolean b) + { + if(b) + type = NUCLEOTIDE; + else + type = PROTEIN; + } + public boolean isNucleotide() + { + if(type==NUCLEOTIDE) + return true; + else + return false; + } + + public void setDataset(Alignment data) + { + if(dataset==null && data==null) + { + // Create a new dataset for this alignment. + // Can only be done once, if dataset is not null + // This will not be performed + Sequence[] seqs = new Sequence[getHeight()]; + for (int i = 0; i < getHeight(); i++) + { - public int getMaxIdLength() { - int max = 0; - int i = 0; + seqs[i] = new Sequence(getSequenceAt(i).getName(), + AlignSeq.extractGaps( + jalview.util.Comparison.GapChars, + getSequenceAt(i).getSequence() + ), + getSequenceAt(i).getStart(), + getSequenceAt(i).getEnd()); - while (i < sequences.size()) { - SequenceI seq = getSequenceAt(i); - String tmp = seq.getName() + "/" + seq.getStart() + "-" + seq.getEnd(); + getSequenceAt(i).setDatasetSequence(seqs[i]); + } - if (tmp.length() > max) { - max = tmp.length(); + dataset = new Alignment(seqs); + } + else if(dataset==null && data!=null) + { + dataset = data; } + } - i++; - } - return max; - } - - public void setGapCharacter(char gc) - { - char old = getGapCharacter(); - gapCharacter = gc; - for (int i=0; i < sequences.size(); i++) - { - Sequence seq = (Sequence)sequences.elementAt(i); - seq.sequence = seq.sequence.replace(old, gc); - } - } - - public char getGapCharacter() { - return gapCharacter; - } - - public Vector getAAFrequency() - { - return AAFrequency.calculate(sequences, 0, getWidth()); - } -} + public Alignment getDataset() + { + return dataset; + } + public boolean padGaps() { + boolean modified=false; + //Remove excess gaps from the end of alignment + int maxLength = -1; + SequenceI current; + for (int i = 0; i < sequences.size(); i++) + { + current = getSequenceAt(i); + for (int j = current.getLength(); j > maxLength; j--) + { + if (j > maxLength && !jalview.util.Comparison.isGap( + current.getCharAt(j))) + { + maxLength = j; + break; + } + } + } + maxLength++; + for (int i = 0; i < sequences.size(); + i++) + { + current = getSequenceAt(i); + if (current.getLength() < maxLength) + { + current.insertCharAt(maxLength - 1, gapCharacter); + modified=true; + } + else if(current.getLength() > maxLength) + { + current.deleteChars(maxLength, current.getLength()); + } + } + return modified; + } + public HiddenSequences getHiddenSequences() + { + return hiddenSequences; + } +}