2 * Jalview - A Sequence Alignment Editor and Viewer (Development Version 2.4.1)
3 * Copyright (C) 2009 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
19 package jalview.datamodel;
23 import jalview.analysis.*;
26 * Data structure to hold and manipulate a multiple sequence alignment
32 public class Alignment implements AlignmentI
34 protected Alignment dataset;
36 protected Vector sequences;
38 protected Vector groups = new Vector();
40 protected char gapCharacter = '-';
42 protected int type = NUCLEOTIDE;
44 public static final int PROTEIN = 0;
46 public static final int NUCLEOTIDE = 1;
49 public AlignmentAnnotation[] annotations;
51 HiddenSequences hiddenSequences = new HiddenSequences(this);
53 public Hashtable alignmentProperties;
55 private void initAlignment(SequenceI[] seqs)
59 if (jalview.util.Comparison.isNucleotide(seqs))
68 sequences = new Vector();
70 for (i = 0; i < seqs.length; i++)
72 sequences.addElement(seqs[i]);
78 * Make an alignment from an array of Sequences.
82 public Alignment(SequenceI[] seqs)
88 * Make a new alignment from an array of SeqCigars
93 public Alignment(SeqCigar[] alseqs)
95 SequenceI[] seqs = SeqCigar.createAlignmentSequences(alseqs,
96 gapCharacter, new ColumnSelection(), null);
101 * Make a new alignment from an CigarArray JBPNote - can only do this when
102 * compactAlignment does not contain hidden regions. JBPNote - must also check
103 * that compactAlignment resolves to a set of SeqCigars - or construct them
106 * @param compactAlignment
109 public static AlignmentI createAlignment(CigarArray compactAlignment)
111 throw new Error("Alignment(CigarArray) not yet implemented");
112 // this(compactAlignment.refCigars);
118 * @return DOCUMENT ME!
120 public Vector getSequences()
125 public SequenceI[] getSequencesArray()
127 if (sequences == null)
129 SequenceI[] reply = new SequenceI[sequences.size()];
130 for (int i = 0; i < sequences.size(); i++)
132 reply[i] = (SequenceI) sequences.elementAt(i);
143 * @return DOCUMENT ME!
145 public SequenceI getSequenceAt(int i)
147 if (i < sequences.size())
149 return (SequenceI) sequences.elementAt(i);
156 * Adds a sequence to the alignment. Recalculates maxLength and size.
160 public void addSequence(SequenceI snew)
164 // maintain dataset integrity
165 if (snew.getDatasetSequence() != null)
167 getDataset().addSequence(snew.getDatasetSequence());
171 // derive new sequence
172 SequenceI adding = snew.deriveSequence();
173 getDataset().addSequence(adding.getDatasetSequence());
177 if (sequences == null)
179 initAlignment(new SequenceI[]
184 sequences.addElement(snew);
186 if (hiddenSequences != null)
187 hiddenSequences.adjustHeightSequenceAdded();
191 * Adds a sequence to the alignment. Recalculates maxLength and size.
195 public void setSequenceAt(int i, SequenceI snew)
197 SequenceI oldseq = getSequenceAt(i);
198 deleteSequence(oldseq);
200 sequences.setElementAt(snew, i);
206 * @return DOCUMENT ME!
208 public Vector getGroups()
213 public void finalize()
215 if (getDataset() != null)
216 getDataset().removeAlignmentRef();
222 hiddenSequences = null;
226 * decrement the alignmentRefs counter by one and call finalize if it goes to
229 private void removeAlignmentRef()
231 if (--alignmentRefs == 0)
243 public void deleteSequence(SequenceI s)
245 deleteSequence(findIndex(s));
254 public void deleteSequence(int i)
256 if (i > -1 && i < getHeight())
258 sequences.removeElementAt(i);
259 hiddenSequences.adjustHeightSequenceDeleted(i);
264 public SequenceGroup findGroup(SequenceI s)
266 for (int i = 0; i < this.groups.size(); i++)
268 SequenceGroup sg = (SequenceGroup) groups.elementAt(i);
270 if (sg.getSequences(null).contains(s))
285 * @return DOCUMENT ME!
287 public SequenceGroup[] findAllGroups(SequenceI s)
289 Vector temp = new Vector();
291 int gSize = groups.size();
292 for (int i = 0; i < gSize; i++)
294 SequenceGroup sg = (SequenceGroup) groups.elementAt(i);
295 if (sg == null || sg.getSequences(null) == null)
297 this.deleteGroup(sg);
302 if (sg.getSequences(null).contains(s))
308 SequenceGroup[] ret = new SequenceGroup[temp.size()];
310 for (int i = 0; i < temp.size(); i++)
312 ret[i] = (SequenceGroup) temp.elementAt(i);
319 public void addGroup(SequenceGroup sg)
321 if (!groups.contains(sg))
323 if (hiddenSequences.getSize() > 0)
325 int i, iSize = sg.getSize();
326 for (i = 0; i < iSize; i++)
328 if (!sequences.contains(sg.getSequenceAt(i)))
330 sg.deleteSequence(sg.getSequenceAt(i), false);
336 if (sg.getSize() < 1)
342 groups.addElement(sg);
347 * remove any annotation that references gp
348 * @param gp (if null, removes all group associated annotation)
350 private void removeAnnotationForGroup(SequenceGroup gp)
352 if (annotations==null || annotations.length==0)
356 // remove annotation very quickly
357 AlignmentAnnotation[] t, todelete = new AlignmentAnnotation[annotations.length], tokeep = new AlignmentAnnotation[annotations.length];
361 for (i = 0, p = 0, k = 0; i < annotations.length; i++)
363 if (annotations[i].groupRef != null)
365 todelete[p++] = annotations[i];
369 tokeep[k++] = annotations[i];
375 for (i = 0, p = 0, k = 0; i < annotations.length; i++)
377 if (annotations[i].groupRef == gp)
379 todelete[p++] = annotations[i];
383 tokeep[k++] = annotations[i];
389 // clear out the group associated annotation.
390 for (i = 0; i < p; i++)
392 unhookAnnotation(todelete[i]);
395 t = new AlignmentAnnotation[k];
396 for (i = 0; i < k; i++)
404 public void deleteAllGroups()
406 if (annotations != null)
408 removeAnnotationForGroup(null);
410 groups.removeAllElements();
414 public void deleteGroup(SequenceGroup g)
416 if (groups.contains(g))
418 removeAnnotationForGroup(g);
419 groups.removeElement(g);
424 public SequenceI findName(String name)
426 return findName(name, false);
432 * @see jalview.datamodel.AlignmentI#findName(java.lang.String, boolean)
434 public SequenceI findName(String token, boolean b)
436 return findName(null, token, b);
442 * @see jalview.datamodel.AlignmentI#findName(SequenceI, java.lang.String,
445 public SequenceI findName(SequenceI startAfter, String token, boolean b)
450 String sqname = null;
451 if (startAfter != null)
453 // try to find the sequence in the alignment
454 boolean matched = false;
455 while (i < sequences.size())
457 if (getSequenceAt(i++) == startAfter)
468 while (i < sequences.size())
470 sq = getSequenceAt(i);
471 sqname = sq.getName();
472 if (sqname.equals(token) // exact match
473 || (b && // allow imperfect matches - case varies
474 (sqname.equalsIgnoreCase(token))))
476 return getSequenceAt(i);
485 public SequenceI[] findSequenceMatch(String name)
487 Vector matches = new Vector();
490 while (i < sequences.size())
492 if (getSequenceAt(i).getName().equals(name))
494 matches.addElement(getSequenceAt(i));
499 SequenceI[] result = new SequenceI[matches.size()];
500 for (i = 0; i < result.length; i++)
502 result[i] = (SequenceI) matches.elementAt(i);
512 * @see jalview.datamodel.AlignmentI#findIndex(jalview.datamodel.SequenceI)
514 public int findIndex(SequenceI s)
518 while (i < sequences.size())
520 if (s == getSequenceAt(i))
535 * jalview.datamodel.AlignmentI#findIndex(jalview.datamodel.SearchResults)
537 public int findIndex(SearchResults results)
541 while (i < sequences.size())
543 if (results.involvesSequence(getSequenceAt(i)))
555 * @return DOCUMENT ME!
557 public int getHeight()
559 return sequences.size();
565 * @return DOCUMENT ME!
567 public int getWidth()
571 for (int i = 0; i < sequences.size(); i++)
573 if (getSequenceAt(i).getLength() > maxLength)
575 maxLength = getSequenceAt(i).getLength();
588 public void setGapCharacter(char gc)
592 for (int i = 0; i < sequences.size(); i++)
594 Sequence seq = (Sequence) sequences.elementAt(i);
595 seq.setSequence(seq.getSequenceAsString().replace('.', gc).replace(
596 '-', gc).replace(' ', gc));
603 * @return DOCUMENT ME!
605 public char getGapCharacter()
611 * @see jalview.datamodel.AlignmentI#isAligned()
613 public boolean isAligned()
615 return isAligned(false);
618 * @see jalview.datamodel.AlignmentI#isAligned(boolean)
620 public boolean isAligned(boolean includeHidden) {
621 int width = getWidth();
622 if (hiddenSequences==null || hiddenSequences.getSize()==0) {
623 includeHidden = true; // no hidden sequences to check against.
625 for (int i = 0; i < sequences.size(); i++)
627 if (includeHidden || !hiddenSequences.isHidden(getSequenceAt(i)))
629 if (getSequenceAt(i).getLength() != width)
642 * @seejalview.datamodel.AlignmentI#deleteAnnotation(jalview.datamodel.
643 * AlignmentAnnotation)
645 public boolean deleteAnnotation(AlignmentAnnotation aa)
649 if (annotations != null)
651 aSize = annotations.length;
659 AlignmentAnnotation[] temp = new AlignmentAnnotation[aSize - 1];
661 boolean swap = false;
664 for (int i = 0; i < aSize; i++)
666 if (annotations[i] == aa)
671 if (tIndex < temp.length)
672 temp[tIndex++] = annotations[i];
678 unhookAnnotation(aa);
684 * remove any object references associated with this annotation
688 private void unhookAnnotation(AlignmentAnnotation aa)
690 if (aa.sequenceRef != null)
692 aa.sequenceRef.removeAlignmentAnnotation(aa);
694 if (aa.groupRef != null)
696 // probably need to do more here in the future (post 2.5.0)
704 * @seejalview.datamodel.AlignmentI#addAnnotation(jalview.datamodel.
705 * AlignmentAnnotation)
707 public void addAnnotation(AlignmentAnnotation aa)
709 addAnnotation(aa, -1);
715 * @seejalview.datamodel.AlignmentI#addAnnotation(jalview.datamodel.
716 * AlignmentAnnotation, int)
718 public void addAnnotation(AlignmentAnnotation aa, int pos)
721 if (annotations != null)
723 aSize = annotations.length + 1;
726 AlignmentAnnotation[] temp = new AlignmentAnnotation[aSize];
728 if (pos == -1 || pos >= aSize)
730 temp[aSize - 1] = aa;
739 for (i = 0; i < (aSize - 1); i++, p++)
747 temp[p] = annotations[i];
755 public void setAnnotationIndex(AlignmentAnnotation aa, int index)
757 if (aa == null || annotations == null || annotations.length - 1 < index)
762 int aSize = annotations.length;
763 AlignmentAnnotation[] temp = new AlignmentAnnotation[aSize];
767 for (int i = 0; i < aSize; i++)
776 temp[i] = annotations[i];
780 temp[i] = annotations[i - 1];
790 * @return DOCUMENT ME!
792 public AlignmentAnnotation[] getAlignmentAnnotation()
797 public void setNucleotide(boolean b)
809 public boolean isNucleotide()
811 if (type == NUCLEOTIDE)
821 public void setDataset(Alignment data)
823 if (dataset == null && data == null)
825 // Create a new dataset for this alignment.
826 // Can only be done once, if dataset is not null
827 // This will not be performed
828 SequenceI[] seqs = new SequenceI[getHeight()];
829 SequenceI currentSeq;
830 for (int i = 0; i < getHeight(); i++)
832 currentSeq = getSequenceAt(i);
833 if (currentSeq.getDatasetSequence() != null)
835 seqs[i] = (Sequence) currentSeq.getDatasetSequence();
839 seqs[i] = currentSeq.createDatasetSequence();
843 dataset = new Alignment(seqs);
845 else if (dataset == null && data != null)
849 dataset.addAlignmentRef();
853 * reference count for number of alignments referencing this one.
855 int alignmentRefs = 0;
858 * increase reference count to this alignment.
860 private void addAlignmentRef()
865 public Alignment getDataset()
870 public boolean padGaps()
872 boolean modified = false;
874 // Remove excess gaps from the end of alignment
878 for (int i = 0; i < sequences.size(); i++)
880 current = getSequenceAt(i);
881 for (int j = current.getLength(); j > maxLength; j--)
884 && !jalview.util.Comparison.isGap(current.getCharAt(j)))
895 for (int i = 0; i < sequences.size(); i++)
897 current = getSequenceAt(i);
898 cLength = current.getLength();
900 if (cLength < maxLength)
902 current.insertCharAt(cLength, maxLength - cLength, gapCharacter);
905 else if (current.getLength() > maxLength)
907 current.deleteChars(maxLength, current.getLength());
914 * Justify the sequences to the left or right by deleting and inserting gaps
915 * before the initial residue or after the terminal residue
918 * true if alignment padded to right, false to justify to left
919 * @return true if alignment was changed
921 public boolean justify(boolean right)
923 boolean modified = false;
925 // Remove excess gaps from the end of alignment
927 int ends[] = new int[sequences.size() * 2];
929 for (int i = 0; i < sequences.size(); i++)
931 current = getSequenceAt(i);
932 // This should really be a sequence method
933 ends[i * 2] = current.findIndex(current.getStart());
934 ends[i * 2 + 1] = current.findIndex(current.getStart()
935 + current.getLength());
936 boolean hitres = false;
937 for (int j = 0, rs = 0, ssiz = current.getLength(); j < ssiz; j++)
939 if (!jalview.util.Comparison.isGap(current.getCharAt(j)))
949 if (j - ends[i * 2] > maxLength)
951 maxLength = j - ends[i * 2];
959 // now edit the flanking gaps to justify to either left or right
960 int cLength, extent, diff;
961 for (int i = 0; i < sequences.size(); i++)
963 current = getSequenceAt(i);
965 cLength = 1 + ends[i * 2 + 1] - ends[i * 2];
966 diff = maxLength - cLength; // number of gaps to indent
967 extent = current.getLength();
971 if (extent > ends[i * 2 + 1])
973 current.deleteChars(ends[i * 2 + 1] + 1, extent);
976 if (ends[i * 2] > diff)
978 current.deleteChars(0, ends[i * 2] - diff);
983 if (ends[i * 2] < diff)
985 current.insertCharAt(0, diff - ends[i * 2], gapCharacter);
995 current.deleteChars(0, ends[i * 2]);
997 ends[i * 2 + 1] -= ends[i * 2];
998 extent -= ends[i * 2];
1000 if (extent > maxLength)
1002 current.deleteChars(maxLength + 1, extent);
1007 if (extent < maxLength)
1009 current.insertCharAt(extent, maxLength - extent, gapCharacter);
1018 public HiddenSequences getHiddenSequences()
1020 return hiddenSequences;
1023 public CigarArray getCompactAlignment()
1025 SeqCigar alseqs[] = new SeqCigar[sequences.size()];
1026 for (int i = 0; i < sequences.size(); i++)
1028 alseqs[i] = new SeqCigar((SequenceI) sequences.elementAt(i));
1030 CigarArray cal = new CigarArray(alseqs);
1031 cal.addOperation(CigarArray.M, getWidth());
1035 public void setProperty(Object key, Object value)
1037 if (alignmentProperties == null)
1038 alignmentProperties = new Hashtable();
1040 alignmentProperties.put(key, value);
1043 public Object getProperty(Object key)
1045 if (alignmentProperties != null)
1046 return alignmentProperties.get(key);
1051 public Hashtable getProperties()
1053 return alignmentProperties;
1056 AlignedCodonFrame[] codonFrameList = null;
1062 * jalview.datamodel.AlignmentI#addCodonFrame(jalview.datamodel.AlignedCodonFrame
1065 public void addCodonFrame(AlignedCodonFrame codons)
1069 if (codonFrameList == null)
1071 codonFrameList = new AlignedCodonFrame[]
1075 AlignedCodonFrame[] t = new AlignedCodonFrame[codonFrameList.length + 1];
1076 System.arraycopy(codonFrameList, 0, t, 0, codonFrameList.length);
1077 t[codonFrameList.length] = codons;
1084 * @see jalview.datamodel.AlignmentI#getCodonFrame(int)
1086 public AlignedCodonFrame getCodonFrame(int index)
1088 return codonFrameList[index];
1095 * jalview.datamodel.AlignmentI#getCodonFrame(jalview.datamodel.SequenceI)
1097 public AlignedCodonFrame[] getCodonFrame(SequenceI seq)
1099 if (seq == null || codonFrameList == null)
1101 Vector cframes = new Vector();
1102 for (int f = 0; f < codonFrameList.length; f++)
1104 if (codonFrameList[f].involvesSequence(seq))
1105 cframes.addElement(codonFrameList[f]);
1107 if (cframes.size() == 0)
1109 AlignedCodonFrame[] cfr = new AlignedCodonFrame[cframes.size()];
1110 cframes.copyInto(cfr);
1117 * @see jalview.datamodel.AlignmentI#getCodonFrames()
1119 public AlignedCodonFrame[] getCodonFrames()
1121 return codonFrameList;
1127 * @seejalview.datamodel.AlignmentI#removeCodonFrame(jalview.datamodel.
1128 * AlignedCodonFrame)
1130 public boolean removeCodonFrame(AlignedCodonFrame codons)
1132 if (codons == null || codonFrameList == null)
1134 boolean removed = false;
1135 int i = 0, iSize = codonFrameList.length;
1138 if (codonFrameList[i] == codons)
1143 System.arraycopy(codonFrameList, i + 1, codonFrameList, i, iSize
1156 public void append(AlignmentI toappend)
1158 // TODO test this method for a future 2.5 release
1159 // currently tested for use in jalview.gui.SequenceFetcher
1160 boolean samegap = toappend.getGapCharacter() == getGapCharacter();
1161 char oldc = toappend.getGapCharacter();
1162 boolean hashidden = toappend.getHiddenSequences() != null
1163 && toappend.getHiddenSequences().hiddenSequences != null;
1164 // get all sequences including any hidden ones
1165 Vector sqs = (hashidden) ? toappend.getHiddenSequences()
1166 .getFullAlignment().getSequences() : toappend.getSequences();
1169 Enumeration sq = sqs.elements();
1170 while (sq.hasMoreElements())
1172 SequenceI addedsq = (SequenceI) sq.nextElement();
1175 char[] oldseq = addedsq.getSequence();
1176 for (int c = 0; c < oldseq.length; c++)
1178 if (oldseq[c] == oldc)
1180 oldseq[c] = gapCharacter;
1184 addSequence(addedsq);
1187 AlignmentAnnotation[] alan = toappend.getAlignmentAnnotation();
1188 for (int a = 0; alan != null && a < alan.length; a++)
1190 addAnnotation(alan[a]);
1192 AlignedCodonFrame[] acod = toappend.getCodonFrames();
1193 for (int a = 0; acod != null && a < acod.length; a++)
1195 this.addCodonFrame(acod[a]);
1197 Vector sg = toappend.getGroups();
1200 Enumeration el = sg.elements();
1201 while (el.hasMoreElements())
1203 addGroup((SequenceGroup) el.nextElement());
1206 if (toappend.getHiddenSequences() != null)
1208 HiddenSequences hs = toappend.getHiddenSequences();
1209 if (hiddenSequences == null)
1211 hiddenSequences = new HiddenSequences(this);
1213 if (hs.hiddenSequences != null)
1215 for (int s = 0; s < hs.hiddenSequences.length; s++)
1217 // hide the newly appended sequence in the alignment
1218 if (hs.hiddenSequences[s] != null)
1220 hiddenSequences.hideSequence(hs.hiddenSequences[s]);
1225 if (toappend.getProperties() != null)
1227 // we really can't do very much here - just try to concatenate strings
1228 // where property collisions occur.
1229 Enumeration key = toappend.getProperties().keys();
1230 while (key.hasMoreElements())
1232 Object k = key.nextElement();
1233 Object ourval = this.getProperty(k);
1234 Object toapprop = toappend.getProperty(k);
1237 if (ourval.getClass().equals(toapprop.getClass())
1238 && !ourval.equals(toapprop))
1240 if (ourval instanceof String)
1243 this.setProperty(k, ((String) ourval) + "; "
1244 + ((String) toapprop));
1248 if (ourval instanceof Vector)
1251 Enumeration theirv = ((Vector) toapprop).elements();
1252 while (theirv.hasMoreElements())
1254 ((Vector) ourval).addElement(theirv);
1262 // just add new property directly
1263 setProperty(k, toapprop);