/* * Jalview - A Sequence Alignment Editor and Viewer * Copyright (C) 2005 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA */ package jalview.datamodel; import jalview.analysis.*; import jalview.util.*; import java.util.*; /** Data structure to hold and manipulate a multiple sequence alignment */ public class Alignment implements AlignmentI { protected Vector sequences; protected Vector groups = new Vector(); protected Vector superGroup = new Vector(); protected char gapCharacter = '-'; public AlignmentAnnotation[] annotations; public boolean featuresAdded = false; /** Make an alignment from an array of Sequences. * * @param sequences */ public Alignment(SequenceI[] seqs) { sequences = new Vector(); for (int i = 0; i < seqs.length; i++) sequences.addElement(seqs[i]); getWidth(); } public Vector getSequences() { return sequences; } public SequenceI getSequenceAt(int i) { if (i < sequences.size()) { return (SequenceI) sequences.elementAt(i); } return null; } /** Adds a sequence to the alignment. Recalculates maxLength and size. * * @param snew */ public void addSequence(SequenceI snew) { sequences.addElement(snew); } public void addSequence(SequenceI[] seq) { for (int i = 0; i < seq.length; i++) { addSequence(seq[i]); } } /** Adds a sequence to the alignment. Recalculates maxLength and size. * * @param snew */ public void setSequenceAt(int i, SequenceI snew) { SequenceI oldseq = getSequenceAt(i); deleteSequence(oldseq); sequences.setElementAt(snew, i); } public Vector getGroups() { return groups; } /** Sorts the sequences by sequence group size - largest to smallest. * Uses QuickSort. */ public void sortGroups() { float[] arr = new float[groups.size()]; Object[] s = new Object[groups.size()]; for (int i = 0; i < groups.size(); i++) { arr[i] = ((SequenceGroup) groups.elementAt(i)).sequences.size(); s[i] = groups.elementAt(i); } QuickSort.sort(arr, s); Vector newg = new Vector(groups.size()); for (int i = groups.size() - 1; i >= 0; i--) { newg.addElement(s[i]); } groups = newg; } /** Takes out columns consisting entirely of gaps (-,.," ") */ public void removeGaps() { SequenceI current; int iSize = getWidth(); for (int i = 0; i < iSize; i++) { boolean delete = true; for (int j = 0; j < getHeight(); j++) { current = getSequenceAt(j); if (current.getLength() > i) { /* MC Should move this to a method somewhere */ if (!jalview.util.Comparison.isGap(current.getCharAt(i))) { delete = false; } } } if (delete) { deleteColumns(i, i); iSize--; i--; } } } /** Returns an array of Sequences containing columns * start to end (inclusive) only. * * @param start start column to fetch * @param end end column to fetch * @return Array of Sequences, ready to put into a new Alignment */ public SequenceI[] getColumns(int start, int end) { return getColumns(0, getHeight() - 1, start, end); } /** Removes a range of columns (start to end inclusive). * * @param start Start column in the alignment * @param end End column in the alignment */ public void deleteColumns(int start, int end) { deleteColumns(0, getHeight() - 1, start, end); } public void deleteColumns(int seq1, int seq2, int start, int end) { for (int i = 0; i <= (end - start); i++) { for (int j = seq1; j <= seq2; j++) { getSequenceAt(j).deleteCharAt(start); } } } public void insertColumns(SequenceI[] seqs, int pos) { if (seqs.length == getHeight()) { for (int i = 0; i < getHeight(); i++) { String tmp = new String(getSequenceAt(i).getSequence()); getSequenceAt(i).setSequence(tmp.substring(0, pos) + seqs[i].getSequence() + tmp.substring(pos)); } } } public SequenceI[] getColumns(int seq1, int seq2, int start, int end) { SequenceI[] seqs = new Sequence[(seq2 - seq1) + 1]; for (int i = seq1; i <= seq2; i++) { seqs[i] = new Sequence(getSequenceAt(i).getName(), getSequenceAt(i).getSequence().substring(start, end), getSequenceAt(i).findPosition(start), getSequenceAt(i).findPosition(end)); } return seqs; } public void trimLeft(int i) { for (int j = 0; j < getHeight(); j++) { SequenceI s = getSequenceAt(j); int newstart = s.findPosition(i); s.setStart(newstart); s.setSequence(s.getSequence().substring(i)); } } public void trimRight(int i) { for (int j = 0; j < getHeight(); j++) { SequenceI s = getSequenceAt(j); int newend = s.findPosition(i); s.setEnd(newend); s.setSequence(s.getSequence().substring(0, i + 1)); } } public void deleteSequence(SequenceI s) { for (int i = 0; i < getHeight(); i++) if (getSequenceAt(i) == s) { deleteSequence(i); } } public void deleteSequence(int i) { sequences.removeElementAt(i); } public Vector removeRedundancy(float threshold, Vector sel) { Vector del = new Vector(); for (int i = 1; i < sel.size(); i++) { for (int j = 0; j < i; j++) { // Only do the comparison if either have not been deleted if (!del.contains((SequenceI) sel.elementAt(i)) || !del.contains((SequenceI) sel.elementAt(j))) { // use PID instead of Comparison (which is really not pleasant) float pid = Comparison.PID((SequenceI) sel.elementAt(j), (SequenceI) sel.elementAt(i)); if (pid >= threshold) { // Delete the shortest one if (((SequenceI) sel.elementAt(j)).getSequence().length() > ((SequenceI) sel .elementAt( i)).getSequence().length()) { del.addElement(sel.elementAt(i)); } else { del.addElement(sel.elementAt(i)); } } } } } // Now delete the sequences for (int i = 0; i < del.size(); i++) deleteSequence((SequenceI) del.elementAt(i)); return del; } public void sortByPID(SequenceI s) { float[] scores = new float[getHeight()]; SequenceI[] seqs = new SequenceI[getHeight()]; for (int i = 0; i < getHeight(); i++) { scores[i] = Comparison.compare(getSequenceAt(i), s); seqs[i] = getSequenceAt(i); } QuickSort.sort(scores, 0, scores.length - 1, seqs); int len = 0; if ((getHeight() % 2) == 0) { len = getHeight() / 2; } else { len = (getHeight() + 1) / 2; } for (int i = 0; i < len; i++) { SequenceI tmp = seqs[i]; sequences.setElementAt(seqs[getHeight() - i - 1], i); sequences.setElementAt(tmp, getHeight() - i - 1); } } public void sortByID() { String[] ids = new String[getHeight()]; SequenceI[] seqs = new SequenceI[getHeight()]; for (int i = 0; i < getHeight(); i++) { ids[i] = getSequenceAt(i).getName(); seqs[i] = getSequenceAt(i); } QuickSort.sort(ids, seqs); int len = 0; if ((getHeight() % 2) == 0) { len = getHeight() / 2; } else { len = (getHeight() + 1) / 2; System.out.println("DEBUG:Sort len is odd = " + len); // log. } for (int i = 0; i < len; i++) { System.out.println("DEBUG:Swapping " + seqs[i].getName() + " and " + seqs[getHeight() - i - 1].getName()); // log. SequenceI tmp = seqs[i]; sequences.setElementAt(seqs[getHeight() - i - 1], i); sequences.setElementAt(tmp, getHeight() - i - 1); } } /** */ public SequenceGroup findGroup(int i) { return findGroup(getSequenceAt(i)); } /** */ public SequenceGroup findGroup(SequenceI s) { for (int i = 0; i < this.groups.size(); i++) { SequenceGroup sg = (SequenceGroup) groups.elementAt(i); if (sg.sequences.contains(s)) { return sg; } } return null; } public SequenceGroup[] findAllGroups(SequenceI s) { Vector temp = new Vector(); for (int i = 0; i < this.groups.size(); i++) { SequenceGroup sg = (SequenceGroup) groups.elementAt(i); if (sg.sequences.contains(s)) { temp.addElement(sg); } } SequenceGroup[] ret = new SequenceGroup[temp.size()]; for (int i = 0; i < temp.size(); i++) ret[i] = (SequenceGroup) temp.elementAt(i); return ret; } /** */ public void addToGroup(SequenceGroup g, SequenceI s) { if (!(g.sequences.contains(s))) { g.sequences.addElement(s); } } /** */ public void removeFromGroup(SequenceGroup g, SequenceI s) { if ((g != null) && (g.sequences != null)) { if (g.sequences.contains(s)) { g.sequences.removeElement(s); if (g.sequences.size() == 0) { groups.removeElement(g); } } } } public void addSuperGroup(SuperGroup sg) { superGroup.addElement(sg); } public void removeSuperGroup(SuperGroup sg) { superGroup.removeElement(sg); } public SuperGroup getSuperGroup(SequenceGroup sg) { for (int i = 0; i < this.superGroup.size(); i++) { SuperGroup temp = (SuperGroup) superGroup.elementAt(i); if (temp.sequenceGroups.contains(sg)) { return temp; } } return null; } /** */ public void addGroup(SequenceGroup sg) { if (!groups.contains(sg)) { groups.addElement(sg); } } public void deleteAllGroups() { groups.removeAllElements(); superGroup.removeAllElements(); int i = 0; while (i < sequences.size()) { SequenceI s = getSequenceAt(i); s.setColor(java.awt.Color.white); i++; } } /** */ public void deleteGroup(SequenceGroup g) { if (groups.contains(g)) { groups.removeElement(g); } } /** */ public SequenceI findName(String name) { int i = 0; while (i < sequences.size()) { SequenceI s = getSequenceAt(i); if (s.getName().equals(name)) { return s; } i++; } return null; } /** */ public SequenceI findbyDisplayId(String name) { int i = 0; while (i < sequences.size()) { SequenceI s = getSequenceAt(i); if (s.getDisplayId().equals(name)) { return s; } i++; } return null; } /** */ public int findIndex(SequenceI s) { int i = 0; while (i < sequences.size()) { if (s == getSequenceAt(i)) { return i; } i++; } return -1; } public int getHeight() { return sequences.size(); } public int getWidth() { int maxLength = -1; for (int i = 0; i < sequences.size(); i++) { if (getSequenceAt(i).getLength() > maxLength) { maxLength = getSequenceAt(i).getLength(); } } return maxLength; } public int getMaxIdLength() { int max = 0; int i = 0; while (i < sequences.size()) { SequenceI seq = getSequenceAt(i); String tmp = seq.getName() + "/" + seq.getStart() + "-" + seq.getEnd(); if (tmp.length() > max) { max = tmp.length(); } i++; } return max; } public void setGapCharacter(char gc) { gapCharacter = gc; for (int i = 0; i < sequences.size(); i++) { Sequence seq = (Sequence) sequences.elementAt(i); seq.sequence = seq.sequence.replace('.', gc); seq.sequence = seq.sequence.replace('-', gc); seq.sequence = seq.sequence.replace(' ', gc); } } public char getGapCharacter() { return gapCharacter; } public Vector getAAFrequency() { return AAFrequency.calculate(sequences, 0, getWidth()); } public boolean isAligned() { int width = getWidth(); for (int i = 0; i < sequences.size(); i++) if (getSequenceAt(i).getLength() != width) { return false; } return true; } public void deleteAnnotation(AlignmentAnnotation aa) { int aSize = 1; if (annotations != null) { aSize = annotations.length; } AlignmentAnnotation[] temp = new AlignmentAnnotation[aSize - 1]; int tIndex = 0; for (int i = 0; i < aSize; i++) { if (annotations[i] == aa) { continue; } temp[tIndex] = annotations[i]; tIndex++; } annotations = temp; } public void addAnnotation(AlignmentAnnotation aa) { int aSize = 1; if (annotations != null) { aSize = annotations.length + 1; } AlignmentAnnotation[] temp = new AlignmentAnnotation[aSize]; int i = 0; if (aSize > 1) { for (i = 0; i < (aSize - 1); i++) temp[i] = annotations[i]; } temp[i] = aa; annotations = temp; } public AlignmentAnnotation[] getAlignmentAnnotation() { return annotations; } }