X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fdatamodel%2FAlignment.java;h=a770264d648a04ca733eb2159ae138bc04122bea;hb=d69ea8f1997771890b44e4b332a7ca84fe6f0893;hp=f78f9587028d2b37f4e284af9956b11502ccf6af;hpb=3ac9a19998fe3b2f4e4583bed327a13ca80c874b;p=jalview.git diff --git a/src/jalview/datamodel/Alignment.java b/src/jalview/datamodel/Alignment.java index f78f958..a770264 100755 --- a/src/jalview/datamodel/Alignment.java +++ b/src/jalview/datamodel/Alignment.java @@ -1,503 +1,841 @@ -package jalview.datamodel; - -import jalview.analysis.*; -import jalview.util.*; -import java.util.*; - -/** Data structure to hold and manipulate a multiple sequence alignment - */ -public class Alignment implements AlignmentI -{ - - protected Vector sequences; - protected Vector groups = new Vector(); - protected ArrayList superGroup = new ArrayList(); - protected char gapCharacter = '-'; - - /** Make an alignment from an array of Sequences. - * - * @param sequences - */ - public Alignment(SequenceI[] seqs) { - sequences = new Vector(); - - for (int i=0; i < seqs.length; i++) - sequences.addElement(seqs[i]); - - getWidth(); - } - - public Vector getSequences() { - return sequences; - } - - public SequenceI getSequenceAt(int i) { - if (i < sequences.size()) { - return (SequenceI)sequences.elementAt(i); - } - - return null; - } - - /** Adds a sequence to the alignment. Recalculates maxLength and size. - * Should put the new sequence in a sequence group!!! - * - * @param snew - */ - public void addSequence(SequenceI snew) { - sequences.addElement(snew); - - ((SequenceGroup)groups.lastElement()).addSequence(snew); - } - - public void addSequence(SequenceI[] seq) { - for (int i=0; i < seq.length; i++) { - addSequence(seq[i]); - } - } - - /** Adds a sequence to the alignment. Recalculates maxLength and size. - * Should put the new sequence in a sequence group!!! - * - * @param snew - */ - public void setSequenceAt(int i,SequenceI snew) { - SequenceI oldseq = getSequenceAt(i); - deleteSequence(oldseq); - - sequences.setElementAt(snew,i); - - ((SequenceGroup)groups.lastElement()).addSequence(snew); - } - - public Vector getGroups() { - return groups; - } - - /** Sorts the sequences by sequence group size - largest to smallest. - * Uses QuickSort. - */ - public void sortGroups() { - float[] arr = new float [groups.size()]; - Object[] s = new Object[groups.size()]; - - for (int i=0; i < groups.size(); i++) { - arr[i] = ((SequenceGroup)groups.elementAt(i)).sequences.size(); - s[i] = groups.elementAt(i); - } - - QuickSort.sort(arr,s); - - Vector newg = new Vector(groups.size()); - - for (int i=groups.size()-1; i >= 0; i--) { - newg.addElement(s[i]); - } - - groups = newg; - } - - /** Takes out columns consisting entirely of gaps (-,.," ") - */ - public void removeGaps() - { - - SequenceI current; - int iSize = getWidth(); - for (int i=0; i < iSize; i++) - { - boolean delete = true; - for (int j=0; j < getHeight(); j++) - { - current = getSequenceAt(j); - if (current.getLength() > i) - { - /* MC Should move this to a method somewhere */ - if ( !jalview.util.Comparison.isGap(current.getCharAt(i))) - delete = false; - - } - } - - if ( delete ) - { - deleteColumns(i,i); - iSize--; - i--; - } - } - - - } - - /** Returns an array of Sequences containing columns - * start to end (inclusive) only. - * - * @param start start column to fetch - * @param end end column to fetch - * @return Array of Sequences, ready to put into a new Alignment - */ - public SequenceI[] getColumns(int start, int end) { - return getColumns(0,getHeight()-1,start,end); - } - - /** Removes a range of columns (start to end inclusive). - * - * @param start Start column in the alignment - * @param end End column in the alignment - */ - public void deleteColumns(int start, int end) { - deleteColumns(0,getHeight()-1,start,end); - } - - public void deleteColumns(int seq1, int seq2, int start, int end) { - - for (int i=0; i <= (end-start); i++) { - for (int j=seq1; j <= seq2; j++) { - getSequenceAt(j).deleteCharAt(start); - } - } - } - - public void insertColumns(SequenceI[] seqs, int pos) { - if (seqs.length == getHeight()) { - for (int i=0; i < getHeight();i++) { - String tmp = new String(getSequenceAt(i).getSequence()); - getSequenceAt(i).setSequence(tmp.substring(0,pos) + seqs[i].getSequence() + tmp.substring(pos)); - } - - } - } - - public SequenceI[] getColumns(int seq1, int seq2, int start, int end) { - SequenceI[] seqs = new Sequence[(seq2-seq1)+1]; - for (int i=seq1; i<= seq2; i++ ) { - seqs[i] = new Sequence(getSequenceAt(i).getName(), - getSequenceAt(i).getSequence().substring(start,end), - getSequenceAt(i).findPosition(start), - getSequenceAt(i).findPosition(end)); - } - return seqs; - } - - public void trimLeft(int i) { - for (int j = 0;j< getHeight();j++) { - - SequenceI s = getSequenceAt(j); - int newstart = s.findPosition(i); - - s.setStart(newstart); - s.setSequence(s.getSequence().substring(i)); - - } - } - - public void trimRight(int i) { - for (int j = 0;j< getHeight();j++) { - SequenceI s = getSequenceAt(j); - int newend = s.findPosition(i); - - s.setEnd(newend); - s.setSequence(s.getSequence().substring(0,i+1)); - } - } - - public void deleteSequence(SequenceI s) - { - for (int i=0; i < getHeight(); i++) - if (getSequenceAt(i) == s) - deleteSequence(i); - } - - public void deleteSequence(int i) - { - sequences.removeElementAt(i); - } - - - public Vector removeRedundancy(float threshold, Vector sel) { - Vector del = new Vector(); - - for (int i = 1; i < sel.size(); i++) - { - for (int j = 0; j < i; j++) - { - // Only do the comparison if either have not been deleted - if (!del.contains( (SequenceI) sel.elementAt(i)) || - !del.contains( (SequenceI) sel.elementAt(j))) - { - - float pid = Comparison.compare( (SequenceI) sel.elementAt(j), - (SequenceI) sel.elementAt(i)); - - if (pid >= threshold) - { - // Delete the shortest one - if ( ( (SequenceI) sel.elementAt(j)).getSequence().length() > - ( (SequenceI) sel.elementAt(i)).getSequence().length()) - del.addElement(sel.elementAt(i)); - else - del.addElement(sel.elementAt(i)); - } - } - } - } - - // Now delete the sequences - for (int i=0; i < del.size(); i++) - deleteSequence((SequenceI)del.elementAt(i)); - - return del; - } - - public void sortByPID(SequenceI s) { - - float scores[] = new float[getHeight()]; - SequenceI seqs[] = new SequenceI[getHeight()]; - - for (int i = 0; i < getHeight(); i++) { - scores[i] = Comparison.compare(getSequenceAt(i),s); - seqs[i] = getSequenceAt(i); - } - - QuickSort.sort(scores,0,scores.length-1,seqs); - - int len = 0; - - if (getHeight()%2 == 0) { - len = getHeight()/2; - } else { - len = (getHeight()+1)/2; - } - - for (int i = 0; i < len; i++) { - SequenceI tmp = seqs[i]; - sequences.setElementAt(seqs[getHeight()-i-1],i); - sequences.setElementAt(tmp,getHeight()-i-1); - } - } - - public void sortByID() { - String ids[] = new String[getHeight()]; - SequenceI seqs[] = new SequenceI[getHeight()]; - - for (int i = 0; i < getHeight(); i++) { - ids[i] = getSequenceAt(i).getName(); - seqs[i] = getSequenceAt(i); - } - - QuickSort.sort(ids,seqs); - - int len = 0; - - if (getHeight()%2 == 0) { - len = getHeight()/2; - } else { - len = (getHeight()+1)/2; - System.out.println("Sort len is odd = " + len); - } - for (int i = 0; i < len; i++) { - System.out.println("Swapping " + seqs[i].getName() + " and " + seqs[getHeight()-i-1].getName()); - SequenceI tmp = seqs[i]; - sequences.setElementAt(seqs[getHeight()-i-1],i); - sequences.setElementAt(tmp,getHeight()-i-1); - } - } - - /** */ - public SequenceGroup findGroup(int i) { - return findGroup(getSequenceAt(i)); - } - - /** */ - public SequenceGroup findGroup(SequenceI s) { - for (int i = 0; i < this.groups.size();i++) - { - SequenceGroup sg = (SequenceGroup)groups.elementAt(i); - if (sg.sequences.contains(s)) - return sg; - - } - return null; - } - - public SequenceGroup [] findAllGroups(SequenceI s) - { - - ArrayList temp = new ArrayList(); - - for (int i = 0; i < this.groups.size();i++) - { - SequenceGroup sg = (SequenceGroup)groups.elementAt(i); - - if (sg.sequences.contains(s)) - temp.add(sg); - } - - SequenceGroup [] ret = new SequenceGroup[temp.size()]; - temp.toArray( ret ); - - return ret; - - } - /** */ - public void addToGroup(SequenceGroup g, SequenceI s) { - if (!(g.sequences.contains(s))) { - g.sequences.addElement(s); - } - } - /** */ - public void removeFromGroup(SequenceGroup g,SequenceI s) { - if (g != null && g.sequences != null) { - if (g.sequences.contains(s)) { - g.sequences.removeElement(s); - if (g.sequences.size() == 0) { - groups.removeElement(g); - } - } - } - } - - public void addSuperGroup(SuperGroup sg) - { - superGroup.add(sg); - } - - public void removeSuperGroup(SuperGroup sg) - { - superGroup.remove(sg); - } - - public SuperGroup getSuperGroup(SequenceGroup sg) - { - for (int i = 0; i < this.superGroup.size(); i++) - { - SuperGroup temp = (SuperGroup) superGroup.get(i); - if (temp.sequenceGroups.contains(sg)) - return temp; - } - return null; - } - - /** */ - public void addGroup(SequenceGroup sg) { - if(!groups.contains(sg)) - groups.addElement(sg); - } - - public void deleteAllGroups() - { - groups.clear(); - superGroup.clear(); - } - - /** */ - public void deleteGroup(SequenceGroup g) { - if (groups.contains(g)) { - groups.removeElement(g); - } - } - - /** */ - public SequenceI findName(String name) { - int i = 0; - while (i < sequences.size()) { - SequenceI s = getSequenceAt(i); - if (s.getName().equals(name)) - return s; - - i++; - } - return null; - } - - /** */ - public SequenceI findbyDisplayId(String name) { - int i = 0; - while (i < sequences.size()) { - SequenceI s = getSequenceAt(i); - if (s.getDisplayId().equals(name)) - return s; - - i++; - } - return null; - } - - /** */ - public int findIndex(SequenceI s) - { - int i=0; - while (i < sequences.size()) - { - if (s == getSequenceAt(i)) - return i; - - i++; - } - return -1; - } - - public int getHeight() { - return sequences.size(); - } - - - public int getWidth() - { - int maxLength = -1; - for (int i = 0; i < sequences.size(); i++) - { - if (getSequenceAt(i).getLength() > maxLength) - maxLength = getSequenceAt(i).getLength(); - } - - return maxLength; - } - - - public int getMaxIdLength() { - int max = 0; - int i = 0; - - while (i < sequences.size()) { - SequenceI seq = getSequenceAt(i); - String tmp = seq.getName() + "/" + seq.getStart() + "-" + seq.getEnd(); - - if (tmp.length() > max) { - max = tmp.length(); - } - - i++; - } - return max; - } - - public void setGapCharacter(char gc) - { - char old = getGapCharacter(); - gapCharacter = gc; - for (int i=0; i < sequences.size(); i++) - { - Sequence seq = (Sequence)sequences.elementAt(i); - seq.sequence = seq.sequence.replace(old, gc); - } - } - - public char getGapCharacter() { - return gapCharacter; - } - - public Vector getAAFrequency() - { - return AAFrequency.calculate(sequences, 0, getWidth()); - } -} - - - - - - - - +/* + * Jalview - A Sequence Alignment Editor and Viewer + * Copyright (C) 2006 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ +package jalview.datamodel; + +import jalview.analysis.*; + +import jalview.util.*; + +import java.util.*; + +/** Data structure to hold and manipulate a multiple sequence alignment + */ +public class Alignment implements AlignmentI +{ + protected Alignment dataset; + protected Vector sequences; + protected Vector groups = new Vector(); + protected char gapCharacter = '-'; + protected int type = NUCLEOTIDE; + public static final int PROTEIN = 0; + public static final int NUCLEOTIDE = 1; + + /** DOCUMENT ME!! */ + public AlignmentAnnotation[] annotations; + + HiddenSequences hiddenSequences = new HiddenSequences(this); + + private void initAlignment(SequenceI[] seqs) { + int i=0; + + if( jalview.util.Comparison.isNucleotide(seqs)) + type = NUCLEOTIDE; + else + type = PROTEIN; + + sequences = new Vector(); + + for (i = 0; i < seqs.length; i++) + { + sequences.addElement(seqs[i]); + } + + } + /** Make an alignment from an array of Sequences. + * + * @param sequences + */ + public Alignment(SequenceI[] seqs) + { + initAlignment(seqs); + } + /** + * Make a new alignment from an array of SeqCigars + * @param seqs SeqCigar[] + */ + public Alignment(SeqCigar[] alseqs) { + SequenceI[] seqs = SeqCigar.createAlignmentSequences(alseqs, gapCharacter, new ColumnSelection(), null); + initAlignment(seqs); + } + /** + * Make a new alignment from an CigarArray + * JBPNote - can only do this when compactAlignment does not contain hidden regions. + * JBPNote - must also check that compactAlignment resolves to a set of SeqCigars - or construct them appropriately. + * @param compactAlignment CigarArray + */ + public static AlignmentI createAlignment(CigarArray compactAlignment) { + throw new Error("Alignment(CigarArray) not yet implemented"); + // this(compactAlignment.refCigars); + } + + /** + * DOCUMENT ME! + * + * @return DOCUMENT ME! + */ + public Vector getSequences() + { + return sequences; + } + + public SequenceI [] getSequencesArray() + { + SequenceI [] reply = new SequenceI[sequences.size()]; + for(int i=0; i width) + { + width = seqs[i].getLength(); + } + } + + int startCol = -1, endCol = -1; + boolean delete = true; + for (int i = 0; i < width; i++) + { + delete = true; + + for (j = 0; j < jSize; j++) + { + if (seqs[j].getLength() > i) + { + if (!jalview.util.Comparison.isGap(seqs[j].getCharAt(i))) + { + if(delete) + endCol = i; + + delete = false; + break; + } + } + } + + if(delete && startCol==-1) + { + startCol = i; + } + + + if (!delete && startCol > -1) + { + deleteColumns(seqs, startCol, endCol); + if (shiftrecord!=null) { + shiftrecord.addShift(startCol, 1+endCol-startCol); + } + width -= (endCol - startCol); + i -= (endCol - startCol); + startCol = -1; + endCol = -1; + } + } + + if (delete && startCol > -1) + { + deleteColumns(seqs, startCol, endCol); + if (shiftrecord!=null) { + shiftrecord.addShift(startCol, 1+endCol-startCol); + } + } + } + + /** Removes a range of columns (start to end inclusive). + * + * @param seqs Sequences to remove columns from + * @param start Start column in the alignment + * @param end End column in the alignment + */ + public void deleteColumns(SequenceI [] seqs, int start, int end) + { + for(int i=0; iseqs[j].getLength()) + { + sequences.removeElement(seqs[j]); + j--; + jSize--; + } + else + { + seqs[j].setStart(newstart); + seqs[j].setSequence(seqs[j].getSequence().substring(i)); + } + } + } + + /** + * DOCUMENT ME! + * + * @param i DOCUMENT ME! + */ + public void trimRight(int i) + { + SequenceI[] seqs = getVisibleAndRepresentedSeqs(); + int j, jSize = seqs.length; + for (j = 0; j < jSize; j++) + { + int newend = seqs[j].findPosition(i); + + seqs[j].setEnd(newend); + if(seqs[j].getLength()>i) + seqs[j].setSequence(seqs[j].getSequence().substring(0, i + 1)); + } + } + + /** + * DOCUMENT ME! + * + * @param s DOCUMENT ME! + */ + public void deleteSequence(SequenceI s) + { + for (int i = 0; i < getHeight(); i++) + { + if (getSequenceAt(i) == s) + { + deleteSequence(i); + } + } + } + + /** + * DOCUMENT ME! + * + * @param i DOCUMENT ME! + */ + public void deleteSequence(int i) + { + sequences.removeElementAt(i); + } + + + /** */ + public SequenceGroup findGroup(SequenceI s) + { + for (int i = 0; i < this.groups.size(); i++) + { + SequenceGroup sg = (SequenceGroup) groups.elementAt(i); + + if (sg.getSequences(false).contains(s)) + { + return sg; + } + } + + return null; + } + + /** + * DOCUMENT ME! + * + * @param s DOCUMENT ME! + * + * @return DOCUMENT ME! + */ + public SequenceGroup[] findAllGroups(SequenceI s) + { + Vector temp = new Vector(); + + int gSize = groups.size(); + for (int i = 0; i < gSize; i++) + { + SequenceGroup sg = (SequenceGroup) groups.elementAt(i); + if(sg==null || sg.getSequences(false)==null) + { + this.deleteGroup(sg); + gSize--; + continue; + } + + if (sg.getSequences(false).contains(s)) + { + temp.addElement(sg); + } + } + + SequenceGroup[] ret = new SequenceGroup[temp.size()]; + + for (int i = 0; i < temp.size(); i++) + { + ret[i] = (SequenceGroup) temp.elementAt(i); + } + + return ret; + } + + + + /** */ + public void addGroup(SequenceGroup sg) + { + if (!groups.contains(sg)) + { + groups.addElement(sg); + } + } + + /** + * DOCUMENT ME! + */ + public void deleteAllGroups() + { + groups.removeAllElements(); + + int i = 0; + + while (i < sequences.size()) + { + SequenceI s = getSequenceAt(i); + s.setColor(java.awt.Color.white); + i++; + } + } + + /** */ + public void deleteGroup(SequenceGroup g) + { + if (groups.contains(g)) + { + groups.removeElement(g); + } + } + + /** */ + public SequenceI findName(String name) + { + int i = 0; + + while (i < sequences.size()) + { + if (getSequenceAt(i).getName().equals(name)) + { + return getSequenceAt(i); + } + + i++; + } + + return null; + } + + public SequenceI [] findSequenceMatch(String name) + { + Vector matches = new Vector(); + int i = 0; + + while (i < sequences.size()) + { + if (getSequenceAt(i).getName().equals(name)) + { + matches.addElement(getSequenceAt(i)); + } + i++; + } + + SequenceI [] result = new SequenceI[matches.size()]; + for(i=0; i maxLength) + { + maxLength = getSequenceAt(i).getLength(); + } + } + + return maxLength; + } + + /** + * DOCUMENT ME! + * + * @return DOCUMENT ME! + */ + public int getMaxIdLength() + { + int max = 0; + int i = 0; + + while (i < sequences.size()) + { + SequenceI seq = getSequenceAt(i); + String tmp = seq.getName() + "/" + seq.getStart() + "-" + + seq.getEnd(); + + if (tmp.length() > max) + { + max = tmp.length(); + } + + i++; + } + + return max; + } + + /** + * DOCUMENT ME! + * + * @param gc DOCUMENT ME! + */ + public void setGapCharacter(char gc) + { + gapCharacter = gc; + + for (int i = 0; i < sequences.size(); i++) + { + Sequence seq = (Sequence) sequences.elementAt(i); + seq.setSequence( seq.getSequence().replace('.', gc) ); + seq.setSequence( seq.getSequence().replace('-', gc) ); + seq.setSequence( seq.getSequence().replace(' ', gc) ); + } + } + + /** + * DOCUMENT ME! + * + * @return DOCUMENT ME! + */ + public char getGapCharacter() + { + return gapCharacter; + } + + + /** + * DOCUMENT ME! + * + * @return DOCUMENT ME! + */ + public boolean isAligned() + { + int width = getWidth(); + + for (int i = 0; i < sequences.size(); i++) + { + if (getSequenceAt(i).getLength() != width) + { + return false; + } + } + + return true; + } + + /** + * DOCUMENT ME! + * + * @param aa DOCUMENT ME! + */ + public void deleteAnnotation(AlignmentAnnotation aa) + { + int aSize = 1; + + if (annotations != null) + { + aSize = annotations.length; + } + + AlignmentAnnotation[] temp = new AlignmentAnnotation[aSize - 1]; + + int tIndex = 0; + + for (int i = 0; i < aSize; i++) + { + if (annotations[i] == aa) + { + continue; + } + + temp[tIndex] = annotations[i]; + tIndex++; + } + + annotations = temp; + } + + + public void adjustSequenceAnnotations() + { + if(annotations!=null) + { + for (int a = 0; a < annotations.length; a++) + { + if (annotations[a].sequenceRef != null) + { + annotations[a].adjustForAlignment(); + } + } + } + } + + /** + * DOCUMENT ME! + * + * @param aa DOCUMENT ME! + */ + public void addAnnotation(AlignmentAnnotation aa) + { + int aSize = 1; + if (annotations != null) + { + aSize = annotations.length + 1; + } + + AlignmentAnnotation[] temp = new AlignmentAnnotation[aSize]; + + temp[aSize-1] = aa; + + int i = 0; + + if (aSize > 1) + { + for (i = 0; i < (aSize-1); i++) + { + temp[i] = annotations[i]; + } + } + + annotations = temp; + } + + public void setAnnotationIndex(AlignmentAnnotation aa, int index) + { + if(aa==null || annotations==null || annotations.length-1 maxLength; j--) + { + if (j > maxLength && !jalview.util.Comparison.isGap( + current.getCharAt(j))) + { + maxLength = j; + break; + } + } + } + + maxLength++; + + for (int i = 0; i < sequences.size(); + i++) + { + current = getSequenceAt(i); + + if (current.getLength() < maxLength) + { + current.insertCharAt(maxLength - 1, gapCharacter); + modified=true; + } + else if(current.getLength() > maxLength) + { + current.deleteChars(maxLength, current.getLength()); + } + } + return modified; + } + + public HiddenSequences getHiddenSequences() + { + return hiddenSequences; + } + SequenceI [] getVisibleAndRepresentedSeqs() + { + if(hiddenSequences==null || hiddenSequences.getSize()<1) + return getSequencesArray(); + + Vector seqs = new Vector(); + SequenceI seq; + SequenceGroup hidden; + for (int i = 0; i < sequences.size(); i++) + { + seq = (SequenceI) sequences.elementAt(i); + seqs.addElement(seq); + hidden = seq.getHiddenSequences(); + if(hidden!=null) + { + for(int j=0; j