X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fdatamodel%2FAlignment.java;h=71e76aff2f6835cc26c329b4a32c23c2cae4fa1c;hb=582d39cb05dfbb5f956f74d4a97a17d9f63b0786;hp=817472a322c2d1bafb8e4def94b8def09b9fb924;hpb=d80cb60cde197454e9b22e08f4861abfc61a25eb;p=jalview.git diff --git a/src/jalview/datamodel/Alignment.java b/src/jalview/datamodel/Alignment.java index 817472a..71e76af 100755 --- a/src/jalview/datamodel/Alignment.java +++ b/src/jalview/datamodel/Alignment.java @@ -1,578 +1,686 @@ -/* -* Jalview - A Sequence Alignment Editor and Viewer -* Copyright (C) 2005 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle -* -* This program is free software; you can redistribute it and/or -* modify it under the terms of the GNU General Public License -* as published by the Free Software Foundation; either version 2 -* of the License, or (at your option) any later version. -* -* This program is distributed in the hope that it will be useful, -* but WITHOUT ANY WARRANTY; without even the implied warranty of -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -* GNU General Public License for more details. -* -* You should have received a copy of the GNU General Public License -* along with this program; if not, write to the Free Software -* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA -*/ -package jalview.datamodel; - -import jalview.analysis.*; - -import jalview.util.*; - -import java.util.*; - - -/** Data structure to hold and manipulate a multiple sequence alignment - */ -public class Alignment implements AlignmentI { - protected Vector sequences; - protected Vector groups = new Vector(); - protected Vector superGroup = new Vector(); - protected char gapCharacter = '-'; - public AlignmentAnnotation[] annotations; - public boolean featuresAdded = false; - - /** Make an alignment from an array of Sequences. - * - * @param sequences - */ - public Alignment(SequenceI[] seqs) { - sequences = new Vector(); - - for (int i = 0; i < seqs.length; i++) - sequences.addElement(seqs[i]); - - getWidth(); - } - - public Vector getSequences() { - return sequences; - } - - public SequenceI getSequenceAt(int i) { - if (i < sequences.size()) { - return (SequenceI) sequences.elementAt(i); - } - - return null; - } - - /** Adds a sequence to the alignment. Recalculates maxLength and size. - * - * @param snew - */ - public void addSequence(SequenceI snew) { - sequences.addElement(snew); - } - - public void addSequence(SequenceI[] seq) { - for (int i = 0; i < seq.length; i++) { - addSequence(seq[i]); - } - } - - /** Adds a sequence to the alignment. Recalculates maxLength and size. - * - * @param snew - */ - public void setSequenceAt(int i, SequenceI snew) { - SequenceI oldseq = getSequenceAt(i); - deleteSequence(oldseq); - - sequences.setElementAt(snew, i); - } - - public Vector getGroups() { - return groups; - } - - /** Sorts the sequences by sequence group size - largest to smallest. - * Uses QuickSort. - */ - public void sortGroups() { - float[] arr = new float[groups.size()]; - Object[] s = new Object[groups.size()]; - - for (int i = 0; i < groups.size(); i++) { - arr[i] = ((SequenceGroup) groups.elementAt(i)).sequences.size(); - s[i] = groups.elementAt(i); - } - - QuickSort.sort(arr, s); - - Vector newg = new Vector(groups.size()); - - for (int i = groups.size() - 1; i >= 0; i--) { - newg.addElement(s[i]); - } - - groups = newg; - } - - /** Takes out columns consisting entirely of gaps (-,.," ") - */ - public void removeGaps() { - SequenceI current; - int iSize = getWidth(); - - for (int i = 0; i < iSize; i++) { - boolean delete = true; - - for (int j = 0; j < getHeight(); j++) { - current = getSequenceAt(j); - - if (current.getLength() > i) { - /* MC Should move this to a method somewhere */ - if (!jalview.util.Comparison.isGap(current.getCharAt(i))) { - delete = false; - } - } - } - - if (delete) { - deleteColumns(i, i); - iSize--; - i--; - } - } - } - - /** Returns an array of Sequences containing columns - * start to end (inclusive) only. - * - * @param start start column to fetch - * @param end end column to fetch - * @return Array of Sequences, ready to put into a new Alignment - */ - public SequenceI[] getColumns(int start, int end) { - return getColumns(0, getHeight() - 1, start, end); - } - - /** Removes a range of columns (start to end inclusive). - * - * @param start Start column in the alignment - * @param end End column in the alignment - */ - public void deleteColumns(int start, int end) { - deleteColumns(0, getHeight() - 1, start, end); - } - - public void deleteColumns(int seq1, int seq2, int start, int end) { - for (int i = 0; i <= (end - start); i++) { - for (int j = seq1; j <= seq2; j++) { - getSequenceAt(j).deleteCharAt(start); - } - } - } - - public void insertColumns(SequenceI[] seqs, int pos) { - if (seqs.length == getHeight()) { - for (int i = 0; i < getHeight(); i++) { - String tmp = new String(getSequenceAt(i).getSequence()); - getSequenceAt(i).setSequence(tmp.substring(0, pos) + - seqs[i].getSequence() + tmp.substring(pos)); - } - } - } - - public SequenceI[] getColumns(int seq1, int seq2, int start, int end) { - SequenceI[] seqs = new Sequence[(seq2 - seq1) + 1]; - - for (int i = seq1; i <= seq2; i++) { - seqs[i] = new Sequence(getSequenceAt(i).getName(), - getSequenceAt(i).getSequence().substring(start, end), - getSequenceAt(i).findPosition(start), - getSequenceAt(i).findPosition(end)); - } - - return seqs; - } - - public void trimLeft(int i) { - for (int j = 0; j < getHeight(); j++) { - SequenceI s = getSequenceAt(j); - int newstart = s.findPosition(i); - - s.setStart(newstart); - s.setSequence(s.getSequence().substring(i)); - } - } - - public void trimRight(int i) { - for (int j = 0; j < getHeight(); j++) { - SequenceI s = getSequenceAt(j); - int newend = s.findPosition(i); - - s.setEnd(newend); - s.setSequence(s.getSequence().substring(0, i + 1)); - } - } - - public void deleteSequence(SequenceI s) { - for (int i = 0; i < getHeight(); i++) - if (getSequenceAt(i) == s) { - deleteSequence(i); - } - } - - public void deleteSequence(int i) { - sequences.removeElementAt(i); - } - - public Vector removeRedundancy(float threshold, Vector sel) { - Vector del = new Vector(); - - for (int i = 1; i < sel.size(); i++) { - for (int j = 0; j < i; j++) { - // Only do the comparison if either have not been deleted - if (!del.contains((SequenceI) sel.elementAt(i)) || - !del.contains((SequenceI) sel.elementAt(j))) { - // use PID instead of Comparison (which is really not pleasant) - float pid = Comparison.PID((SequenceI) sel.elementAt(j), - (SequenceI) sel.elementAt(i)); - - if (pid >= threshold) { - // Delete the shortest one - if (((SequenceI) sel.elementAt(j)).getSequence().length() > ((SequenceI) sel - .elementAt( - i)).getSequence().length()) { - del.addElement(sel.elementAt(i)); - } else { - del.addElement(sel.elementAt(i)); - } - } - } - } - } - - // Now delete the sequences - for (int i = 0; i < del.size(); i++) - deleteSequence((SequenceI) del.elementAt(i)); - - return del; - } - - public void sortByPID(SequenceI s) { - float[] scores = new float[getHeight()]; - SequenceI[] seqs = new SequenceI[getHeight()]; - - for (int i = 0; i < getHeight(); i++) { - scores[i] = Comparison.compare(getSequenceAt(i), s); - seqs[i] = getSequenceAt(i); - } - - QuickSort.sort(scores, 0, scores.length - 1, seqs); - - int len = 0; - - if ((getHeight() % 2) == 0) { - len = getHeight() / 2; - } else { - len = (getHeight() + 1) / 2; - } - - for (int i = 0; i < len; i++) { - SequenceI tmp = seqs[i]; - sequences.setElementAt(seqs[getHeight() - i - 1], i); - sequences.setElementAt(tmp, getHeight() - i - 1); - } - } - - public void sortByID() { - String[] ids = new String[getHeight()]; - SequenceI[] seqs = new SequenceI[getHeight()]; - - for (int i = 0; i < getHeight(); i++) { - ids[i] = getSequenceAt(i).getName(); - seqs[i] = getSequenceAt(i); - } - - QuickSort.sort(ids, seqs); - - int len = 0; - - if ((getHeight() % 2) == 0) { - len = getHeight() / 2; - } else { - len = (getHeight() + 1) / 2; - System.out.println("DEBUG:Sort len is odd = " + len); // log. - } - - for (int i = 0; i < len; i++) { - System.out.println("DEBUG:Swapping " + seqs[i].getName() + " and " + - seqs[getHeight() - i - 1].getName()); // log. - - SequenceI tmp = seqs[i]; - sequences.setElementAt(seqs[getHeight() - i - 1], i); - sequences.setElementAt(tmp, getHeight() - i - 1); - } - } - - /** */ - public SequenceGroup findGroup(int i) { - return findGroup(getSequenceAt(i)); - } - - /** */ - public SequenceGroup findGroup(SequenceI s) { - for (int i = 0; i < this.groups.size(); i++) { - SequenceGroup sg = (SequenceGroup) groups.elementAt(i); - - if (sg.sequences.contains(s)) { - return sg; - } - } - - return null; - } - - public SequenceGroup[] findAllGroups(SequenceI s) { - Vector temp = new Vector(); - - for (int i = 0; i < this.groups.size(); i++) { - SequenceGroup sg = (SequenceGroup) groups.elementAt(i); - - if (sg.sequences.contains(s)) { - temp.addElement(sg); - } - } - - SequenceGroup[] ret = new SequenceGroup[temp.size()]; - - for (int i = 0; i < temp.size(); i++) - ret[i] = (SequenceGroup) temp.elementAt(i); - - return ret; - } - - /** */ - public void addToGroup(SequenceGroup g, SequenceI s) { - if (!(g.sequences.contains(s))) { - g.sequences.addElement(s); - } - } - - /** */ - public void removeFromGroup(SequenceGroup g, SequenceI s) { - if ((g != null) && (g.sequences != null)) { - if (g.sequences.contains(s)) { - g.sequences.removeElement(s); - - if (g.sequences.size() == 0) { - groups.removeElement(g); - } - } - } - } - - public void addSuperGroup(SuperGroup sg) { - superGroup.addElement(sg); - } - - public void removeSuperGroup(SuperGroup sg) { - superGroup.removeElement(sg); - } - - public SuperGroup getSuperGroup(SequenceGroup sg) { - for (int i = 0; i < this.superGroup.size(); i++) { - SuperGroup temp = (SuperGroup) superGroup.elementAt(i); - - if (temp.sequenceGroups.contains(sg)) { - return temp; - } - } - - return null; - } - - /** */ - public void addGroup(SequenceGroup sg) { - if (!groups.contains(sg)) { - groups.addElement(sg); - } - } - - public void deleteAllGroups() { - groups.removeAllElements(); - superGroup.removeAllElements(); - - int i = 0; - - while (i < sequences.size()) { - SequenceI s = getSequenceAt(i); - s.setColor(java.awt.Color.white); - i++; - } - } - - /** */ - public void deleteGroup(SequenceGroup g) { - if (groups.contains(g)) { - groups.removeElement(g); - } - } - - /** */ - public SequenceI findName(String name) { - int i = 0; - - while (i < sequences.size()) { - SequenceI s = getSequenceAt(i); - - if (s.getName().equals(name)) { - return s; - } - - i++; - } - - return null; - } - - /** */ - public SequenceI findbyDisplayId(String name) { - int i = 0; - - while (i < sequences.size()) { - SequenceI s = getSequenceAt(i); - - if (s.getDisplayId().equals(name)) { - return s; - } - - i++; - } - - return null; - } - - /** */ - public int findIndex(SequenceI s) { - int i = 0; - - while (i < sequences.size()) { - if (s == getSequenceAt(i)) { - return i; - } - - i++; - } - - return -1; - } - - public int getHeight() { - return sequences.size(); - } - - public int getWidth() { - int maxLength = -1; - - for (int i = 0; i < sequences.size(); i++) { - if (getSequenceAt(i).getLength() > maxLength) { - maxLength = getSequenceAt(i).getLength(); - } - } - - return maxLength; - } - - public int getMaxIdLength() { - int max = 0; - int i = 0; - - while (i < sequences.size()) { - SequenceI seq = getSequenceAt(i); - String tmp = seq.getName() + "/" + seq.getStart() + "-" + - seq.getEnd(); - - if (tmp.length() > max) { - max = tmp.length(); - } - - i++; - } - - return max; - } - - public void setGapCharacter(char gc) { - gapCharacter = gc; - - for (int i = 0; i < sequences.size(); i++) { - Sequence seq = (Sequence) sequences.elementAt(i); - seq.sequence = seq.sequence.replace('.', gc); - seq.sequence = seq.sequence.replace('-', gc); - seq.sequence = seq.sequence.replace(' ', gc); - } - } - - public char getGapCharacter() { - return gapCharacter; - } - - public Vector getAAFrequency() { - return AAFrequency.calculate(sequences, 0, getWidth()); - } - - public boolean isAligned() { - int width = getWidth(); - - for (int i = 0; i < sequences.size(); i++) - if (getSequenceAt(i).getLength() != width) { - return false; - } - - return true; - } - - public void deleteAnnotation(AlignmentAnnotation aa) { - int aSize = 1; - - if (annotations != null) { - aSize = annotations.length; - } - - AlignmentAnnotation[] temp = new AlignmentAnnotation[aSize - 1]; - - int tIndex = 0; - - for (int i = 0; i < aSize; i++) { - if (annotations[i] == aa) { - continue; - } - - temp[tIndex] = annotations[i]; - tIndex++; - } - - annotations = temp; - } - - public void addAnnotation(AlignmentAnnotation aa) { - int aSize = 1; - - if (annotations != null) { - aSize = annotations.length + 1; - } - - AlignmentAnnotation[] temp = new AlignmentAnnotation[aSize]; - int i = 0; - - if (aSize > 1) { - for (i = 0; i < (aSize - 1); i++) - temp[i] = annotations[i]; - } - - temp[i] = aa; - - annotations = temp; - } - - public AlignmentAnnotation[] getAlignmentAnnotation() { - return annotations; - } -} +/* + * Jalview - A Sequence Alignment Editor and Viewer + * Copyright (C) 2006 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ +package jalview.datamodel; + +import jalview.analysis.*; + +import jalview.util.*; + +import java.util.*; + +/** Data structure to hold and manipulate a multiple sequence alignment + */ +public class Alignment implements AlignmentI +{ + protected Alignment dataset; + protected Vector sequences; + protected Vector groups = new Vector(); + protected char gapCharacter = '-'; + protected int type = NUCLEOTIDE; + public static final int PROTEIN = 0; + public static final int NUCLEOTIDE = 1; + + /** DOCUMENT ME!! */ + public AlignmentAnnotation[] annotations; + + HiddenSequences hiddenSequences = new HiddenSequences(this); + + private void initAlignment(SequenceI[] seqs) { + int i=0; + + if( jalview.util.Comparison.isNucleotide(seqs)) + type = NUCLEOTIDE; + else + type = PROTEIN; + + sequences = new Vector(); + + for (i = 0; i < seqs.length; i++) + { + sequences.addElement(seqs[i]); + } + + } + /** Make an alignment from an array of Sequences. + * + * @param sequences + */ + public Alignment(SequenceI[] seqs) + { + initAlignment(seqs); + } + /** + * Make a new alignment from an array of SeqCigars + * @param seqs SeqCigar[] + */ + public Alignment(SeqCigar[] alseqs) { + SequenceI[] seqs = SeqCigar.createAlignmentSequences(alseqs, gapCharacter, new ColumnSelection(), null); + initAlignment(seqs); + } + /** + * Make a new alignment from an CigarArray + * JBPNote - can only do this when compactAlignment does not contain hidden regions. + * JBPNote - must also check that compactAlignment resolves to a set of SeqCigars - or construct them appropriately. + * @param compactAlignment CigarArray + */ + public static AlignmentI createAlignment(CigarArray compactAlignment) { + throw new Error("Alignment(CigarArray) not yet implemented"); + // this(compactAlignment.refCigars); + } + + /** + * DOCUMENT ME! + * + * @return DOCUMENT ME! + */ + public Vector getSequences() + { + return sequences; + } + + public SequenceI [] getSequencesArray() + { + SequenceI [] reply = new SequenceI[sequences.size()]; + for(int i=0; i-1 && i maxLength) + { + maxLength = getSequenceAt(i).getLength(); + } + } + + return maxLength; + } + + + /** + * DOCUMENT ME! + * + * @param gc DOCUMENT ME! + */ + public void setGapCharacter(char gc) + { + gapCharacter = gc; + + for (int i = 0; i < sequences.size(); i++) + { + Sequence seq = (Sequence) sequences.elementAt(i); + seq.setSequence( seq.getSequence().replace('.', gc) ); + seq.setSequence( seq.getSequence().replace('-', gc) ); + seq.setSequence( seq.getSequence().replace(' ', gc) ); + } + } + + /** + * DOCUMENT ME! + * + * @return DOCUMENT ME! + */ + public char getGapCharacter() + { + return gapCharacter; + } + + + /** + * DOCUMENT ME! + * + * @return DOCUMENT ME! + */ + public boolean isAligned() + { + int width = getWidth(); + + for (int i = 0; i < sequences.size(); i++) + { + if (getSequenceAt(i).getLength() != width) + { + return false; + } + } + + return true; + } + + /** + * DOCUMENT ME! + * + * @param aa DOCUMENT ME! + */ + public void deleteAnnotation(AlignmentAnnotation aa) + { + int aSize = 1; + + if (annotations != null) + { + aSize = annotations.length; + } + + if(aSize<1) + return; + + AlignmentAnnotation[] temp = new AlignmentAnnotation[aSize - 1]; + + int tIndex = 0; + + for (int i = 0; i < aSize; i++) + { + if (annotations[i] == aa) + { + continue; + } + + temp[tIndex] = annotations[i]; + tIndex++; + } + + annotations = temp; + } + + + public void adjustSequenceAnnotations() + { + if(annotations!=null) + { + for (int a = 0; a < annotations.length; a++) + { + if (annotations[a].sequenceRef != null) + { + annotations[a].adjustForAlignment(); + } + } + } + } + + /** + * DOCUMENT ME! + * + * @param aa DOCUMENT ME! + */ + public void addAnnotation(AlignmentAnnotation aa) + { + int aSize = 1; + if (annotations != null) + { + aSize = annotations.length + 1; + } + + AlignmentAnnotation[] temp = new AlignmentAnnotation[aSize]; + + temp[aSize-1] = aa; + + int i = 0; + + if (aSize > 1) + { + for (i = 0; i < (aSize-1); i++) + { + temp[i] = annotations[i]; + } + } + + annotations = temp; + } + + public void setAnnotationIndex(AlignmentAnnotation aa, int index) + { + if(aa==null || annotations==null || annotations.length-1 maxLength; j--) + { + if (j > maxLength && !jalview.util.Comparison.isGap( + current.getCharAt(j))) + { + maxLength = j; + break; + } + } + } + + maxLength++; + + int cLength; + for (int i = 0; i < sequences.size(); + i++) + { + current = getSequenceAt(i); + cLength = current.getLength(); + + if (cLength < maxLength) + { + current.insertCharAt(cLength, + maxLength-cLength, gapCharacter); + modified=true; + } + else if(current.getLength() > maxLength) + { + current.deleteChars(maxLength, current.getLength()); + } + } + return modified; + } + + public HiddenSequences getHiddenSequences() + { + return hiddenSequences; + } + SequenceI [] getVisibleAndRepresentedSeqs() + { + if(hiddenSequences==null || hiddenSequences.getSize()<1) + return getSequencesArray(); + + Vector seqs = new Vector(); + SequenceI seq; + SequenceGroup hidden; + for (int i = 0; i < sequences.size(); i++) + { + seq = (SequenceI) sequences.elementAt(i); + seqs.addElement(seq); + hidden = seq.getHiddenSequences(); + if(hidden!=null) + { + for(int j=0; j