2 * Jalview - A Sequence Alignment Editor and Viewer (Version 2.5)
3 * Copyright (C) 2010 J Procter, AM Waterhouse, G Barton, M Clamp, S Searle
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
11 * Jalview is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty
13 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
14 * PURPOSE. See the GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License along with Jalview. If not, see <http://www.gnu.org/licenses/>.
18 package jalview.datamodel;
22 import jalview.analysis.*;
25 * Data structure to hold and manipulate a multiple sequence alignment
31 public class Alignment implements AlignmentI
33 protected Alignment dataset;
35 protected Vector sequences;
37 protected Vector groups = new Vector();
39 protected char gapCharacter = '-';
41 protected int type = NUCLEOTIDE;
43 public static final int PROTEIN = 0;
45 public static final int NUCLEOTIDE = 1;
48 public AlignmentAnnotation[] annotations;
50 HiddenSequences hiddenSequences = new HiddenSequences(this);
52 public Hashtable alignmentProperties;
54 private void initAlignment(SequenceI[] seqs)
58 if (jalview.util.Comparison.isNucleotide(seqs))
67 sequences = new Vector();
69 for (i = 0; i < seqs.length; i++)
71 sequences.addElement(seqs[i]);
77 * Make an alignment from an array of Sequences.
81 public Alignment(SequenceI[] seqs)
87 * Make a new alignment from an array of SeqCigars
92 public Alignment(SeqCigar[] alseqs)
94 SequenceI[] seqs = SeqCigar.createAlignmentSequences(alseqs,
95 gapCharacter, new ColumnSelection(), null);
100 * Make a new alignment from an CigarArray JBPNote - can only do this when
101 * compactAlignment does not contain hidden regions. JBPNote - must also check
102 * that compactAlignment resolves to a set of SeqCigars - or construct them
105 * @param compactAlignment
108 public static AlignmentI createAlignment(CigarArray compactAlignment)
110 throw new Error("Alignment(CigarArray) not yet implemented");
111 // this(compactAlignment.refCigars);
117 * @return DOCUMENT ME!
119 public Vector getSequences()
124 public SequenceI[] getSequencesArray()
126 if (sequences == null)
128 SequenceI[] reply = new SequenceI[sequences.size()];
129 for (int i = 0; i < sequences.size(); i++)
131 reply[i] = (SequenceI) sequences.elementAt(i);
142 * @return DOCUMENT ME!
144 public SequenceI getSequenceAt(int i)
146 if (i < sequences.size())
148 return (SequenceI) sequences.elementAt(i);
155 * Adds a sequence to the alignment. Recalculates maxLength and size.
159 public void addSequence(SequenceI snew)
163 // maintain dataset integrity
164 if (snew.getDatasetSequence() != null)
166 getDataset().addSequence(snew.getDatasetSequence());
170 // derive new sequence
171 SequenceI adding = snew.deriveSequence();
172 getDataset().addSequence(adding.getDatasetSequence());
176 if (sequences == null)
178 initAlignment(new SequenceI[]
183 sequences.addElement(snew);
185 if (hiddenSequences != null)
186 hiddenSequences.adjustHeightSequenceAdded();
190 * Adds a sequence to the alignment. Recalculates maxLength and size.
194 public void setSequenceAt(int i, SequenceI snew)
196 SequenceI oldseq = getSequenceAt(i);
197 deleteSequence(oldseq);
199 sequences.setElementAt(snew, i);
205 * @return DOCUMENT ME!
207 public Vector getGroups()
212 public void finalize()
214 if (getDataset() != null)
215 getDataset().removeAlignmentRef();
221 hiddenSequences = null;
225 * decrement the alignmentRefs counter by one and call finalize if it goes to
228 private void removeAlignmentRef()
230 if (--alignmentRefs == 0)
242 public void deleteSequence(SequenceI s)
244 deleteSequence(findIndex(s));
253 public void deleteSequence(int i)
255 if (i > -1 && i < getHeight())
257 sequences.removeElementAt(i);
258 hiddenSequences.adjustHeightSequenceDeleted(i);
263 public SequenceGroup findGroup(SequenceI s)
265 for (int i = 0; i < this.groups.size(); i++)
267 SequenceGroup sg = (SequenceGroup) groups.elementAt(i);
269 if (sg.getSequences(null).contains(s))
284 * @return DOCUMENT ME!
286 public SequenceGroup[] findAllGroups(SequenceI s)
288 Vector temp = new Vector();
290 int gSize = groups.size();
291 for (int i = 0; i < gSize; i++)
293 SequenceGroup sg = (SequenceGroup) groups.elementAt(i);
294 if (sg == null || sg.getSequences(null) == null)
296 this.deleteGroup(sg);
301 if (sg.getSequences(null).contains(s))
307 SequenceGroup[] ret = new SequenceGroup[temp.size()];
309 for (int i = 0; i < temp.size(); i++)
311 ret[i] = (SequenceGroup) temp.elementAt(i);
318 public void addGroup(SequenceGroup sg)
320 if (!groups.contains(sg))
322 if (hiddenSequences.getSize() > 0)
324 int i, iSize = sg.getSize();
325 for (i = 0; i < iSize; i++)
327 if (!sequences.contains(sg.getSequenceAt(i)))
329 sg.deleteSequence(sg.getSequenceAt(i), false);
335 if (sg.getSize() < 1)
341 groups.addElement(sg);
346 * remove any annotation that references gp
349 * (if null, removes all group associated annotation)
351 private void removeAnnotationForGroup(SequenceGroup gp)
353 if (annotations == null || annotations.length == 0)
357 // remove annotation very quickly
358 AlignmentAnnotation[] t, todelete = new AlignmentAnnotation[annotations.length], tokeep = new AlignmentAnnotation[annotations.length];
362 for (i = 0, p = 0, k = 0; i < annotations.length; i++)
364 if (annotations[i].groupRef != null)
366 todelete[p++] = annotations[i];
370 tokeep[k++] = annotations[i];
376 for (i = 0, p = 0, k = 0; i < annotations.length; i++)
378 if (annotations[i].groupRef == gp)
380 todelete[p++] = annotations[i];
384 tokeep[k++] = annotations[i];
390 // clear out the group associated annotation.
391 for (i = 0; i < p; i++)
393 unhookAnnotation(todelete[i]);
396 t = new AlignmentAnnotation[k];
397 for (i = 0; i < k; i++)
405 public void deleteAllGroups()
407 if (annotations != null)
409 removeAnnotationForGroup(null);
411 groups.removeAllElements();
415 public void deleteGroup(SequenceGroup g)
417 if (groups.contains(g))
419 removeAnnotationForGroup(g);
420 groups.removeElement(g);
425 public SequenceI findName(String name)
427 return findName(name, false);
433 * @see jalview.datamodel.AlignmentI#findName(java.lang.String, boolean)
435 public SequenceI findName(String token, boolean b)
437 return findName(null, token, b);
443 * @see jalview.datamodel.AlignmentI#findName(SequenceI, java.lang.String,
446 public SequenceI findName(SequenceI startAfter, String token, boolean b)
451 String sqname = null;
452 if (startAfter != null)
454 // try to find the sequence in the alignment
455 boolean matched = false;
456 while (i < sequences.size())
458 if (getSequenceAt(i++) == startAfter)
469 while (i < sequences.size())
471 sq = getSequenceAt(i);
472 sqname = sq.getName();
473 if (sqname.equals(token) // exact match
474 || (b && // allow imperfect matches - case varies
475 (sqname.equalsIgnoreCase(token))))
477 return getSequenceAt(i);
486 public SequenceI[] findSequenceMatch(String name)
488 Vector matches = new Vector();
491 while (i < sequences.size())
493 if (getSequenceAt(i).getName().equals(name))
495 matches.addElement(getSequenceAt(i));
500 SequenceI[] result = new SequenceI[matches.size()];
501 for (i = 0; i < result.length; i++)
503 result[i] = (SequenceI) matches.elementAt(i);
513 * @see jalview.datamodel.AlignmentI#findIndex(jalview.datamodel.SequenceI)
515 public int findIndex(SequenceI s)
519 while (i < sequences.size())
521 if (s == getSequenceAt(i))
536 * jalview.datamodel.AlignmentI#findIndex(jalview.datamodel.SearchResults)
538 public int findIndex(SearchResults results)
542 while (i < sequences.size())
544 if (results.involvesSequence(getSequenceAt(i)))
556 * @return DOCUMENT ME!
558 public int getHeight()
560 return sequences.size();
566 * @return DOCUMENT ME!
568 public int getWidth()
572 for (int i = 0; i < sequences.size(); i++)
574 if (getSequenceAt(i).getLength() > maxLength)
576 maxLength = getSequenceAt(i).getLength();
589 public void setGapCharacter(char gc)
593 for (int i = 0; i < sequences.size(); i++)
595 Sequence seq = (Sequence) sequences.elementAt(i);
596 seq.setSequence(seq.getSequenceAsString().replace('.', gc).replace(
597 '-', gc).replace(' ', gc));
604 * @return DOCUMENT ME!
606 public char getGapCharacter()
614 * @see jalview.datamodel.AlignmentI#isAligned()
616 public boolean isAligned()
618 return isAligned(false);
624 * @see jalview.datamodel.AlignmentI#isAligned(boolean)
626 public boolean isAligned(boolean includeHidden)
628 int width = getWidth();
629 if (hiddenSequences == null || hiddenSequences.getSize() == 0)
631 includeHidden = true; // no hidden sequences to check against.
633 for (int i = 0; i < sequences.size(); i++)
635 if (includeHidden || !hiddenSequences.isHidden(getSequenceAt(i)))
637 if (getSequenceAt(i).getLength() != width)
650 * @seejalview.datamodel.AlignmentI#deleteAnnotation(jalview.datamodel.
651 * AlignmentAnnotation)
653 public boolean deleteAnnotation(AlignmentAnnotation aa)
657 if (annotations != null)
659 aSize = annotations.length;
667 AlignmentAnnotation[] temp = new AlignmentAnnotation[aSize - 1];
669 boolean swap = false;
672 for (int i = 0; i < aSize; i++)
674 if (annotations[i] == aa)
679 if (tIndex < temp.length)
680 temp[tIndex++] = annotations[i];
686 unhookAnnotation(aa);
692 * remove any object references associated with this annotation
696 private void unhookAnnotation(AlignmentAnnotation aa)
698 if (aa.sequenceRef != null)
700 aa.sequenceRef.removeAlignmentAnnotation(aa);
702 if (aa.groupRef != null)
704 // probably need to do more here in the future (post 2.5.0)
712 * @seejalview.datamodel.AlignmentI#addAnnotation(jalview.datamodel.
713 * AlignmentAnnotation)
715 public void addAnnotation(AlignmentAnnotation aa)
717 addAnnotation(aa, -1);
723 * @seejalview.datamodel.AlignmentI#addAnnotation(jalview.datamodel.
724 * AlignmentAnnotation, int)
726 public void addAnnotation(AlignmentAnnotation aa, int pos)
729 if (annotations != null)
731 aSize = annotations.length + 1;
734 AlignmentAnnotation[] temp = new AlignmentAnnotation[aSize];
736 if (pos == -1 || pos >= aSize)
738 temp[aSize - 1] = aa;
747 for (i = 0; i < (aSize - 1); i++, p++)
755 temp[p] = annotations[i];
763 public void setAnnotationIndex(AlignmentAnnotation aa, int index)
765 if (aa == null || annotations == null || annotations.length - 1 < index)
770 int aSize = annotations.length;
771 AlignmentAnnotation[] temp = new AlignmentAnnotation[aSize];
775 for (int i = 0; i < aSize; i++)
784 temp[i] = annotations[i];
788 temp[i] = annotations[i - 1];
798 * @return DOCUMENT ME!
800 public AlignmentAnnotation[] getAlignmentAnnotation()
805 public void setNucleotide(boolean b)
817 public boolean isNucleotide()
819 if (type == NUCLEOTIDE)
829 public void setDataset(Alignment data)
831 if (dataset == null && data == null)
833 // Create a new dataset for this alignment.
834 // Can only be done once, if dataset is not null
835 // This will not be performed
836 SequenceI[] seqs = new SequenceI[getHeight()];
837 SequenceI currentSeq;
838 for (int i = 0; i < getHeight(); i++)
840 currentSeq = getSequenceAt(i);
841 if (currentSeq.getDatasetSequence() != null)
843 seqs[i] = (Sequence) currentSeq.getDatasetSequence();
847 seqs[i] = currentSeq.createDatasetSequence();
851 dataset = new Alignment(seqs);
853 else if (dataset == null && data != null)
857 dataset.addAlignmentRef();
861 * reference count for number of alignments referencing this one.
863 int alignmentRefs = 0;
866 * increase reference count to this alignment.
868 private void addAlignmentRef()
873 public Alignment getDataset()
878 public boolean padGaps()
880 boolean modified = false;
882 // Remove excess gaps from the end of alignment
886 for (int i = 0; i < sequences.size(); i++)
888 current = getSequenceAt(i);
889 for (int j = current.getLength(); j > maxLength; j--)
892 && !jalview.util.Comparison.isGap(current.getCharAt(j)))
903 for (int i = 0; i < sequences.size(); i++)
905 current = getSequenceAt(i);
906 cLength = current.getLength();
908 if (cLength < maxLength)
910 current.insertCharAt(cLength, maxLength - cLength, gapCharacter);
913 else if (current.getLength() > maxLength)
915 current.deleteChars(maxLength, current.getLength());
922 * Justify the sequences to the left or right by deleting and inserting gaps
923 * before the initial residue or after the terminal residue
926 * true if alignment padded to right, false to justify to left
927 * @return true if alignment was changed
929 public boolean justify(boolean right)
931 boolean modified = false;
933 // Remove excess gaps from the end of alignment
935 int ends[] = new int[sequences.size() * 2];
937 for (int i = 0; i < sequences.size(); i++)
939 current = getSequenceAt(i);
940 // This should really be a sequence method
941 ends[i * 2] = current.findIndex(current.getStart());
942 ends[i * 2 + 1] = current.findIndex(current.getStart()
943 + current.getLength());
944 boolean hitres = false;
945 for (int j = 0, rs = 0, ssiz = current.getLength(); j < ssiz; j++)
947 if (!jalview.util.Comparison.isGap(current.getCharAt(j)))
957 if (j - ends[i * 2] > maxLength)
959 maxLength = j - ends[i * 2];
967 // now edit the flanking gaps to justify to either left or right
968 int cLength, extent, diff;
969 for (int i = 0; i < sequences.size(); i++)
971 current = getSequenceAt(i);
973 cLength = 1 + ends[i * 2 + 1] - ends[i * 2];
974 diff = maxLength - cLength; // number of gaps to indent
975 extent = current.getLength();
979 if (extent > ends[i * 2 + 1])
981 current.deleteChars(ends[i * 2 + 1] + 1, extent);
984 if (ends[i * 2] > diff)
986 current.deleteChars(0, ends[i * 2] - diff);
991 if (ends[i * 2] < diff)
993 current.insertCharAt(0, diff - ends[i * 2], gapCharacter);
1001 if (ends[i * 2] > 0)
1003 current.deleteChars(0, ends[i * 2]);
1005 ends[i * 2 + 1] -= ends[i * 2];
1006 extent -= ends[i * 2];
1008 if (extent > maxLength)
1010 current.deleteChars(maxLength + 1, extent);
1015 if (extent < maxLength)
1017 current.insertCharAt(extent, maxLength - extent, gapCharacter);
1026 public HiddenSequences getHiddenSequences()
1028 return hiddenSequences;
1031 public CigarArray getCompactAlignment()
1033 SeqCigar alseqs[] = new SeqCigar[sequences.size()];
1034 for (int i = 0; i < sequences.size(); i++)
1036 alseqs[i] = new SeqCigar((SequenceI) sequences.elementAt(i));
1038 CigarArray cal = new CigarArray(alseqs);
1039 cal.addOperation(CigarArray.M, getWidth());
1043 public void setProperty(Object key, Object value)
1045 if (alignmentProperties == null)
1046 alignmentProperties = new Hashtable();
1048 alignmentProperties.put(key, value);
1051 public Object getProperty(Object key)
1053 if (alignmentProperties != null)
1054 return alignmentProperties.get(key);
1059 public Hashtable getProperties()
1061 return alignmentProperties;
1064 AlignedCodonFrame[] codonFrameList = null;
1070 * jalview.datamodel.AlignmentI#addCodonFrame(jalview.datamodel.AlignedCodonFrame
1073 public void addCodonFrame(AlignedCodonFrame codons)
1077 if (codonFrameList == null)
1079 codonFrameList = new AlignedCodonFrame[]
1083 AlignedCodonFrame[] t = new AlignedCodonFrame[codonFrameList.length + 1];
1084 System.arraycopy(codonFrameList, 0, t, 0, codonFrameList.length);
1085 t[codonFrameList.length] = codons;
1092 * @see jalview.datamodel.AlignmentI#getCodonFrame(int)
1094 public AlignedCodonFrame getCodonFrame(int index)
1096 return codonFrameList[index];
1103 * jalview.datamodel.AlignmentI#getCodonFrame(jalview.datamodel.SequenceI)
1105 public AlignedCodonFrame[] getCodonFrame(SequenceI seq)
1107 if (seq == null || codonFrameList == null)
1109 Vector cframes = new Vector();
1110 for (int f = 0; f < codonFrameList.length; f++)
1112 if (codonFrameList[f].involvesSequence(seq))
1113 cframes.addElement(codonFrameList[f]);
1115 if (cframes.size() == 0)
1117 AlignedCodonFrame[] cfr = new AlignedCodonFrame[cframes.size()];
1118 cframes.copyInto(cfr);
1125 * @see jalview.datamodel.AlignmentI#getCodonFrames()
1127 public AlignedCodonFrame[] getCodonFrames()
1129 return codonFrameList;
1135 * @seejalview.datamodel.AlignmentI#removeCodonFrame(jalview.datamodel.
1136 * AlignedCodonFrame)
1138 public boolean removeCodonFrame(AlignedCodonFrame codons)
1140 if (codons == null || codonFrameList == null)
1142 boolean removed = false;
1143 int i = 0, iSize = codonFrameList.length;
1146 if (codonFrameList[i] == codons)
1151 System.arraycopy(codonFrameList, i + 1, codonFrameList, i, iSize
1164 public void append(AlignmentI toappend)
1166 // TODO test this method for a future 2.5 release
1167 // currently tested for use in jalview.gui.SequenceFetcher
1168 boolean samegap = toappend.getGapCharacter() == getGapCharacter();
1169 char oldc = toappend.getGapCharacter();
1170 boolean hashidden = toappend.getHiddenSequences() != null
1171 && toappend.getHiddenSequences().hiddenSequences != null;
1172 // get all sequences including any hidden ones
1173 Vector sqs = (hashidden) ? toappend.getHiddenSequences()
1174 .getFullAlignment().getSequences() : toappend.getSequences();
1177 Enumeration sq = sqs.elements();
1178 while (sq.hasMoreElements())
1180 SequenceI addedsq = (SequenceI) sq.nextElement();
1183 char[] oldseq = addedsq.getSequence();
1184 for (int c = 0; c < oldseq.length; c++)
1186 if (oldseq[c] == oldc)
1188 oldseq[c] = gapCharacter;
1192 addSequence(addedsq);
1195 AlignmentAnnotation[] alan = toappend.getAlignmentAnnotation();
1196 for (int a = 0; alan != null && a < alan.length; a++)
1198 addAnnotation(alan[a]);
1200 AlignedCodonFrame[] acod = toappend.getCodonFrames();
1201 for (int a = 0; acod != null && a < acod.length; a++)
1203 this.addCodonFrame(acod[a]);
1205 Vector sg = toappend.getGroups();
1208 Enumeration el = sg.elements();
1209 while (el.hasMoreElements())
1211 addGroup((SequenceGroup) el.nextElement());
1214 if (toappend.getHiddenSequences() != null)
1216 HiddenSequences hs = toappend.getHiddenSequences();
1217 if (hiddenSequences == null)
1219 hiddenSequences = new HiddenSequences(this);
1221 if (hs.hiddenSequences != null)
1223 for (int s = 0; s < hs.hiddenSequences.length; s++)
1225 // hide the newly appended sequence in the alignment
1226 if (hs.hiddenSequences[s] != null)
1228 hiddenSequences.hideSequence(hs.hiddenSequences[s]);
1233 if (toappend.getProperties() != null)
1235 // we really can't do very much here - just try to concatenate strings
1236 // where property collisions occur.
1237 Enumeration key = toappend.getProperties().keys();
1238 while (key.hasMoreElements())
1240 Object k = key.nextElement();
1241 Object ourval = this.getProperty(k);
1242 Object toapprop = toappend.getProperty(k);
1245 if (ourval.getClass().equals(toapprop.getClass())
1246 && !ourval.equals(toapprop))
1248 if (ourval instanceof String)
1251 this.setProperty(k, ((String) ourval) + "; "
1252 + ((String) toapprop));
1256 if (ourval instanceof Vector)
1259 Enumeration theirv = ((Vector) toapprop).elements();
1260 while (theirv.hasMoreElements())
1262 ((Vector) ourval).addElement(theirv);
1270 // just add new property directly
1271 setProperty(k, toapprop);