X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fdatamodel%2FAlignment.java;h=71e76aff2f6835cc26c329b4a32c23c2cae4fa1c;hb=582d39cb05dfbb5f956f74d4a97a17d9f63b0786;hp=2355a204d79805c16e6f0129c3e6a9f96830251d;hpb=00b702186f1676036db687a8df5d335a4d5da54d;p=jalview.git diff --git a/src/jalview/datamodel/Alignment.java b/src/jalview/datamodel/Alignment.java index 2355a20..71e76af 100755 --- a/src/jalview/datamodel/Alignment.java +++ b/src/jalview/datamodel/Alignment.java @@ -1,565 +1,686 @@ -package jalview.datamodel; - -import jalview.analysis.*; -import jalview.util.*; -import java.util.*; - -/** Data structure to hold and manipulate a multiple sequence alignment - */ -public class Alignment implements AlignmentI -{ - - protected Vector sequences; - protected Vector groups = new Vector(); - protected Vector superGroup = new Vector(); - protected char gapCharacter = '-'; - public AlignmentAnnotation [] annotations; - public Conservation conservation; - - public boolean featuresAdded = false; - - /** Make an alignment from an array of Sequences. - * - * @param sequences - */ - public Alignment(SequenceI[] seqs) { - sequences = new Vector(); - - for (int i=0; i < seqs.length; i++) - sequences.addElement(seqs[i]); - - getWidth(); - } - - public Vector getSequences() { - return sequences; - } - - public SequenceI getSequenceAt(int i) { - if (i < sequences.size()) { - return (SequenceI)sequences.elementAt(i); - } - - return null; - } - - /** Adds a sequence to the alignment. Recalculates maxLength and size. - * - * @param snew - */ - public void addSequence(SequenceI snew) { - sequences.addElement(snew); - } - - public void addSequence(SequenceI[] seq) { - for (int i=0; i < seq.length; i++) { - addSequence(seq[i]); - } - } - - /** Adds a sequence to the alignment. Recalculates maxLength and size. - * - * @param snew - */ - public void setSequenceAt(int i,SequenceI snew) { - SequenceI oldseq = getSequenceAt(i); - deleteSequence(oldseq); - - sequences.setElementAt(snew,i); - } - - public Vector getGroups() { - return groups; - } - - /** Sorts the sequences by sequence group size - largest to smallest. - * Uses QuickSort. - */ - public void sortGroups() { - float[] arr = new float [groups.size()]; - Object[] s = new Object[groups.size()]; - - for (int i=0; i < groups.size(); i++) { - arr[i] = ((SequenceGroup)groups.elementAt(i)).sequences.size(); - s[i] = groups.elementAt(i); - } - - QuickSort.sort(arr,s); - - Vector newg = new Vector(groups.size()); - - for (int i=groups.size()-1; i >= 0; i--) { - newg.addElement(s[i]); - } - - groups = newg; - } - - /** Takes out columns consisting entirely of gaps (-,.," ") - */ - public void removeGaps() - { - - SequenceI current; - int iSize = getWidth(); - for (int i=0; i < iSize; i++) - { - boolean delete = true; - for (int j=0; j < getHeight(); j++) - { - current = getSequenceAt(j); - if (current.getLength() > i) - { - /* MC Should move this to a method somewhere */ - if ( !jalview.util.Comparison.isGap(current.getCharAt(i))) - delete = false; - - } - } - - if ( delete ) - { - deleteColumns(i,i); - iSize--; - i--; - } - } - - - } - - /** Returns an array of Sequences containing columns - * start to end (inclusive) only. - * - * @param start start column to fetch - * @param end end column to fetch - * @return Array of Sequences, ready to put into a new Alignment - */ - public SequenceI[] getColumns(int start, int end) { - return getColumns(0,getHeight()-1,start,end); - } - - /** Removes a range of columns (start to end inclusive). - * - * @param start Start column in the alignment - * @param end End column in the alignment - */ - public void deleteColumns(int start, int end) { - deleteColumns(0,getHeight()-1,start,end); - } - - public void deleteColumns(int seq1, int seq2, int start, int end) { - - for (int i=0; i <= (end-start); i++) { - for (int j=seq1; j <= seq2; j++) { - getSequenceAt(j).deleteCharAt(start); - } - } - } - - public void insertColumns(SequenceI[] seqs, int pos) { - if (seqs.length == getHeight()) { - for (int i=0; i < getHeight();i++) { - String tmp = new String(getSequenceAt(i).getSequence()); - getSequenceAt(i).setSequence(tmp.substring(0,pos) + seqs[i].getSequence() + tmp.substring(pos)); - } - - } - } - - public SequenceI[] getColumns(int seq1, int seq2, int start, int end) { - SequenceI[] seqs = new Sequence[(seq2-seq1)+1]; - for (int i=seq1; i<= seq2; i++ ) { - seqs[i] = new Sequence(getSequenceAt(i).getName(), - getSequenceAt(i).getSequence().substring(start,end), - getSequenceAt(i).findPosition(start), - getSequenceAt(i).findPosition(end)); - } - return seqs; - } - - public void trimLeft(int i) { - for (int j = 0;j< getHeight();j++) { - - SequenceI s = getSequenceAt(j); - int newstart = s.findPosition(i); - - s.setStart(newstart); - s.setSequence(s.getSequence().substring(i)); - - } - } - - public void trimRight(int i) { - for (int j = 0;j< getHeight();j++) { - SequenceI s = getSequenceAt(j); - int newend = s.findPosition(i); - - s.setEnd(newend); - s.setSequence(s.getSequence().substring(0,i+1)); - } - } - - public void deleteSequence(SequenceI s) - { - for (int i=0; i < getHeight(); i++) - if (getSequenceAt(i) == s) - deleteSequence(i); - } - - public void deleteSequence(int i) - { - sequences.removeElementAt(i); - } - - - public Vector removeRedundancy(float threshold, Vector sel) { - Vector del = new Vector(); - - for (int i = 1; i < sel.size(); i++) - { - for (int j = 0; j < i; j++) - { - // Only do the comparison if either have not been deleted - if (!del.contains( (SequenceI) sel.elementAt(i)) || - !del.contains( (SequenceI) sel.elementAt(j))) - { - - float pid = Comparison.compare( (SequenceI) sel.elementAt(j), - (SequenceI) sel.elementAt(i)); - - if (pid >= threshold) - { - // Delete the shortest one - if ( ( (SequenceI) sel.elementAt(j)).getSequence().length() > - ( (SequenceI) sel.elementAt(i)).getSequence().length()) - del.addElement(sel.elementAt(i)); - else - del.addElement(sel.elementAt(i)); - } - } - } - } - - // Now delete the sequences - for (int i=0; i < del.size(); i++) - deleteSequence((SequenceI)del.elementAt(i)); - - return del; - } - - public void sortByPID(SequenceI s) { - - float scores[] = new float[getHeight()]; - SequenceI seqs[] = new SequenceI[getHeight()]; - - for (int i = 0; i < getHeight(); i++) { - scores[i] = Comparison.compare(getSequenceAt(i),s); - seqs[i] = getSequenceAt(i); - } - - QuickSort.sort(scores,0,scores.length-1,seqs); - - int len = 0; - - if (getHeight()%2 == 0) { - len = getHeight()/2; - } else { - len = (getHeight()+1)/2; - } - - for (int i = 0; i < len; i++) { - SequenceI tmp = seqs[i]; - sequences.setElementAt(seqs[getHeight()-i-1],i); - sequences.setElementAt(tmp,getHeight()-i-1); - } - } - - public void sortByID() { - String ids[] = new String[getHeight()]; - SequenceI seqs[] = new SequenceI[getHeight()]; - - for (int i = 0; i < getHeight(); i++) { - ids[i] = getSequenceAt(i).getName(); - seqs[i] = getSequenceAt(i); - } - - QuickSort.sort(ids,seqs); - - int len = 0; - - if (getHeight()%2 == 0) { - len = getHeight()/2; - } else { - len = (getHeight()+1)/2; - System.out.println("Sort len is odd = " + len); - } - for (int i = 0; i < len; i++) { - System.out.println("Swapping " + seqs[i].getName() + " and " + seqs[getHeight()-i-1].getName()); - SequenceI tmp = seqs[i]; - sequences.setElementAt(seqs[getHeight()-i-1],i); - sequences.setElementAt(tmp,getHeight()-i-1); - } - } - - /** */ - public SequenceGroup findGroup(int i) { - return findGroup(getSequenceAt(i)); - } - - /** */ - public SequenceGroup findGroup(SequenceI s) { - for (int i = 0; i < this.groups.size();i++) - { - SequenceGroup sg = (SequenceGroup)groups.elementAt(i); - if (sg.sequences.contains(s)) - return sg; - - } - return null; - } - - public SequenceGroup [] findAllGroups(SequenceI s) - { - - Vector temp = new Vector(); - - for (int i = 0; i < this.groups.size();i++) - { - SequenceGroup sg = (SequenceGroup)groups.elementAt(i); - - if (sg.sequences.contains(s)) - temp.addElement(sg); - } - - SequenceGroup [] ret = new SequenceGroup[temp.size()]; - for(int i=0; i maxLength) - maxLength = getSequenceAt(i).getLength(); - } - - return maxLength; - } - - - public int getMaxIdLength() { - int max = 0; - int i = 0; - - while (i < sequences.size()) { - SequenceI seq = getSequenceAt(i); - String tmp = seq.getName() + "/" + seq.getStart() + "-" + seq.getEnd(); - - if (tmp.length() > max) { - max = tmp.length(); - } - - i++; - } - return max; - } - - public void setGapCharacter(char gc) - { - gapCharacter = gc; - for (int i=0; i < sequences.size(); i++) - { - Sequence seq = (Sequence)sequences.elementAt(i); - seq.sequence = seq.sequence.replace('.', gc); - seq.sequence = seq.sequence.replace('-', gc); - } - } - - public char getGapCharacter() { - return gapCharacter; - } - - public Vector getAAFrequency() - { - return AAFrequency.calculate(sequences, 0, getWidth()); - } - - public boolean isAligned() - { - int width = getWidth(); - for (int i = 0; i < sequences.size(); i++) - if (getSequenceAt(i).getLength() != width) - return false; - - return true; - } - - public void deleteAnnotation(AlignmentAnnotation aa) - { - int aSize = 1; - if(annotations!=null) - aSize = annotations.length; - - AlignmentAnnotation [] temp = new AlignmentAnnotation [aSize-1]; - - int tIndex = 0; - for (int i = 0; i < aSize; i++) - { - if(annotations[i]==aa) - continue; - - - temp[tIndex] = annotations[i]; - tIndex++; - } - - annotations = temp; - - } - - public void addAnnotation(AlignmentAnnotation aa) - { - int aSize = 1; - if(annotations!=null) - aSize = annotations.length+1; - - AlignmentAnnotation [] temp = new AlignmentAnnotation [aSize]; - int i=0; - if (aSize > 1) - for (i = 0; i < aSize-1; i++) - temp[i] = annotations[i]; - - temp[i] = aa; - - annotations = temp; - } - public AlignmentAnnotation[] getAlignmentAnnotation() - { - return annotations; - } - -} - - - - - - - - +/* + * Jalview - A Sequence Alignment Editor and Viewer + * Copyright (C) 2006 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ +package jalview.datamodel; + +import jalview.analysis.*; + +import jalview.util.*; + +import java.util.*; + +/** Data structure to hold and manipulate a multiple sequence alignment + */ +public class Alignment implements AlignmentI +{ + protected Alignment dataset; + protected Vector sequences; + protected Vector groups = new Vector(); + protected char gapCharacter = '-'; + protected int type = NUCLEOTIDE; + public static final int PROTEIN = 0; + public static final int NUCLEOTIDE = 1; + + /** DOCUMENT ME!! */ + public AlignmentAnnotation[] annotations; + + HiddenSequences hiddenSequences = new HiddenSequences(this); + + private void initAlignment(SequenceI[] seqs) { + int i=0; + + if( jalview.util.Comparison.isNucleotide(seqs)) + type = NUCLEOTIDE; + else + type = PROTEIN; + + sequences = new Vector(); + + for (i = 0; i < seqs.length; i++) + { + sequences.addElement(seqs[i]); + } + + } + /** Make an alignment from an array of Sequences. + * + * @param sequences + */ + public Alignment(SequenceI[] seqs) + { + initAlignment(seqs); + } + /** + * Make a new alignment from an array of SeqCigars + * @param seqs SeqCigar[] + */ + public Alignment(SeqCigar[] alseqs) { + SequenceI[] seqs = SeqCigar.createAlignmentSequences(alseqs, gapCharacter, new ColumnSelection(), null); + initAlignment(seqs); + } + /** + * Make a new alignment from an CigarArray + * JBPNote - can only do this when compactAlignment does not contain hidden regions. + * JBPNote - must also check that compactAlignment resolves to a set of SeqCigars - or construct them appropriately. + * @param compactAlignment CigarArray + */ + public static AlignmentI createAlignment(CigarArray compactAlignment) { + throw new Error("Alignment(CigarArray) not yet implemented"); + // this(compactAlignment.refCigars); + } + + /** + * DOCUMENT ME! + * + * @return DOCUMENT ME! + */ + public Vector getSequences() + { + return sequences; + } + + public SequenceI [] getSequencesArray() + { + SequenceI [] reply = new SequenceI[sequences.size()]; + for(int i=0; i-1 && i maxLength) + { + maxLength = getSequenceAt(i).getLength(); + } + } + + return maxLength; + } + + + /** + * DOCUMENT ME! + * + * @param gc DOCUMENT ME! + */ + public void setGapCharacter(char gc) + { + gapCharacter = gc; + + for (int i = 0; i < sequences.size(); i++) + { + Sequence seq = (Sequence) sequences.elementAt(i); + seq.setSequence( seq.getSequence().replace('.', gc) ); + seq.setSequence( seq.getSequence().replace('-', gc) ); + seq.setSequence( seq.getSequence().replace(' ', gc) ); + } + } + + /** + * DOCUMENT ME! + * + * @return DOCUMENT ME! + */ + public char getGapCharacter() + { + return gapCharacter; + } + + + /** + * DOCUMENT ME! + * + * @return DOCUMENT ME! + */ + public boolean isAligned() + { + int width = getWidth(); + + for (int i = 0; i < sequences.size(); i++) + { + if (getSequenceAt(i).getLength() != width) + { + return false; + } + } + + return true; + } + + /** + * DOCUMENT ME! + * + * @param aa DOCUMENT ME! + */ + public void deleteAnnotation(AlignmentAnnotation aa) + { + int aSize = 1; + + if (annotations != null) + { + aSize = annotations.length; + } + + if(aSize<1) + return; + + AlignmentAnnotation[] temp = new AlignmentAnnotation[aSize - 1]; + + int tIndex = 0; + + for (int i = 0; i < aSize; i++) + { + if (annotations[i] == aa) + { + continue; + } + + temp[tIndex] = annotations[i]; + tIndex++; + } + + annotations = temp; + } + + + public void adjustSequenceAnnotations() + { + if(annotations!=null) + { + for (int a = 0; a < annotations.length; a++) + { + if (annotations[a].sequenceRef != null) + { + annotations[a].adjustForAlignment(); + } + } + } + } + + /** + * DOCUMENT ME! + * + * @param aa DOCUMENT ME! + */ + public void addAnnotation(AlignmentAnnotation aa) + { + int aSize = 1; + if (annotations != null) + { + aSize = annotations.length + 1; + } + + AlignmentAnnotation[] temp = new AlignmentAnnotation[aSize]; + + temp[aSize-1] = aa; + + int i = 0; + + if (aSize > 1) + { + for (i = 0; i < (aSize-1); i++) + { + temp[i] = annotations[i]; + } + } + + annotations = temp; + } + + public void setAnnotationIndex(AlignmentAnnotation aa, int index) + { + if(aa==null || annotations==null || annotations.length-1 maxLength; j--) + { + if (j > maxLength && !jalview.util.Comparison.isGap( + current.getCharAt(j))) + { + maxLength = j; + break; + } + } + } + + maxLength++; + + int cLength; + for (int i = 0; i < sequences.size(); + i++) + { + current = getSequenceAt(i); + cLength = current.getLength(); + + if (cLength < maxLength) + { + current.insertCharAt(cLength, + maxLength-cLength, gapCharacter); + modified=true; + } + else if(current.getLength() > maxLength) + { + current.deleteChars(maxLength, current.getLength()); + } + } + return modified; + } + + public HiddenSequences getHiddenSequences() + { + return hiddenSequences; + } + SequenceI [] getVisibleAndRepresentedSeqs() + { + if(hiddenSequences==null || hiddenSequences.getSize()<1) + return getSequencesArray(); + + Vector seqs = new Vector(); + SequenceI seq; + SequenceGroup hidden; + for (int i = 0; i < sequences.size(); i++) + { + seq = (SequenceI) sequences.elementAt(i); + seqs.addElement(seq); + hidden = seq.getHiddenSequences(); + if(hidden!=null) + { + for(int j=0; j