2 * Jalview - A Sequence Alignment Editor and Viewer (Version 2.7)
3 * Copyright (C) 2011 J Procter, AM Waterhouse, J Engelhardt, LM Lui, G Barton, M Clamp, S Searle
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
11 * Jalview is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty
13 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
14 * PURPOSE. See the GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License along with Jalview. If not, see <http://www.gnu.org/licenses/>.
18 package jalview.datamodel;
22 import jalview.analysis.*;
25 * Data structure to hold and manipulate a multiple sequence alignment
31 public class Alignment implements AlignmentI
33 protected Alignment dataset;
35 protected Vector sequences;
37 protected Vector groups = new Vector();
39 protected char gapCharacter = '-';
41 protected int type = NUCLEOTIDE;
43 public static final int PROTEIN = 0;
45 public static final int NUCLEOTIDE = 1;
47 public boolean hasRNAStructure = false;
50 public AlignmentAnnotation[] annotations;
52 HiddenSequences hiddenSequences = new HiddenSequences(this);
54 public Hashtable alignmentProperties;
56 private void initAlignment(SequenceI[] seqs)
60 if (jalview.util.Comparison.isNucleotide(seqs))
69 sequences = new Vector();
71 for (i = 0; i < seqs.length; i++)
73 sequences.addElement(seqs[i]);
79 * Make an alignment from an array of Sequences.
83 public Alignment(SequenceI[] seqs)
89 * Make a new alignment from an array of SeqCigars
94 public Alignment(SeqCigar[] alseqs)
96 SequenceI[] seqs = SeqCigar.createAlignmentSequences(alseqs,
97 gapCharacter, new ColumnSelection(), null);
102 * Make a new alignment from an CigarArray JBPNote - can only do this when
103 * compactAlignment does not contain hidden regions. JBPNote - must also check
104 * that compactAlignment resolves to a set of SeqCigars - or construct them
107 * @param compactAlignment
110 public static AlignmentI createAlignment(CigarArray compactAlignment)
112 throw new Error("Alignment(CigarArray) not yet implemented");
113 // this(compactAlignment.refCigars);
119 * @return DOCUMENT ME!
121 public Vector getSequences()
126 public SequenceI[] getSequencesArray()
128 if (sequences == null)
130 SequenceI[] reply = new SequenceI[sequences.size()];
131 for (int i = 0; i < sequences.size(); i++)
133 reply[i] = (SequenceI) sequences.elementAt(i);
144 * @return DOCUMENT ME!
146 public SequenceI getSequenceAt(int i)
148 if (i>-1 && i < sequences.size())
150 return (SequenceI) sequences.elementAt(i);
157 * Adds a sequence to the alignment. Recalculates maxLength and size.
161 public void addSequence(SequenceI snew)
165 // maintain dataset integrity
166 if (snew.getDatasetSequence() != null)
168 getDataset().addSequence(snew.getDatasetSequence());
172 // derive new sequence
173 SequenceI adding = snew.deriveSequence();
174 getDataset().addSequence(adding.getDatasetSequence());
178 if (sequences == null)
180 initAlignment(new SequenceI[]
185 sequences.addElement(snew);
187 if (hiddenSequences != null)
188 hiddenSequences.adjustHeightSequenceAdded();
192 * Adds a sequence to the alignment. Recalculates maxLength and size.
196 public void setSequenceAt(int i, SequenceI snew)
198 SequenceI oldseq = getSequenceAt(i);
199 deleteSequence(oldseq);
201 sequences.setElementAt(snew, i);
207 * @return DOCUMENT ME!
209 public Vector getGroups()
214 public void finalize()
216 if (getDataset() != null)
217 getDataset().removeAlignmentRef();
223 hiddenSequences = null;
227 * decrement the alignmentRefs counter by one and call finalize if it goes to
230 private void removeAlignmentRef()
232 if (--alignmentRefs == 0)
244 public void deleteSequence(SequenceI s)
246 deleteSequence(findIndex(s));
255 public void deleteSequence(int i)
257 if (i > -1 && i < getHeight())
259 sequences.removeElementAt(i);
260 hiddenSequences.adjustHeightSequenceDeleted(i);
265 public SequenceGroup findGroup(SequenceI s)
267 for (int i = 0; i < this.groups.size(); i++)
269 SequenceGroup sg = (SequenceGroup) groups.elementAt(i);
271 if (sg.getSequences(null).contains(s))
286 * @return DOCUMENT ME!
288 public SequenceGroup[] findAllGroups(SequenceI s)
290 Vector temp = new Vector();
292 int gSize = groups.size();
293 for (int i = 0; i < gSize; i++)
295 SequenceGroup sg = (SequenceGroup) groups.elementAt(i);
296 if (sg == null || sg.getSequences(null) == null)
298 this.deleteGroup(sg);
303 if (sg.getSequences(null).contains(s))
309 SequenceGroup[] ret = new SequenceGroup[temp.size()];
311 for (int i = 0; i < temp.size(); i++)
313 ret[i] = (SequenceGroup) temp.elementAt(i);
320 public void addGroup(SequenceGroup sg)
322 if (!groups.contains(sg))
324 if (hiddenSequences.getSize() > 0)
326 int i, iSize = sg.getSize();
327 for (i = 0; i < iSize; i++)
329 if (!sequences.contains(sg.getSequenceAt(i)))
331 sg.deleteSequence(sg.getSequenceAt(i), false);
337 if (sg.getSize() < 1)
343 groups.addElement(sg);
348 * remove any annotation that references gp
351 * (if null, removes all group associated annotation)
353 private void removeAnnotationForGroup(SequenceGroup gp)
355 if (annotations == null || annotations.length == 0)
359 // remove annotation very quickly
360 AlignmentAnnotation[] t, todelete = new AlignmentAnnotation[annotations.length], tokeep = new AlignmentAnnotation[annotations.length];
364 for (i = 0, p = 0, k = 0; i < annotations.length; i++)
366 if (annotations[i].groupRef != null)
368 todelete[p++] = annotations[i];
372 tokeep[k++] = annotations[i];
378 for (i = 0, p = 0, k = 0; i < annotations.length; i++)
380 if (annotations[i].groupRef == gp)
382 todelete[p++] = annotations[i];
386 tokeep[k++] = annotations[i];
392 // clear out the group associated annotation.
393 for (i = 0; i < p; i++)
395 unhookAnnotation(todelete[i]);
398 t = new AlignmentAnnotation[k];
399 for (i = 0; i < k; i++)
407 public void deleteAllGroups()
409 if (annotations != null)
411 removeAnnotationForGroup(null);
413 groups.removeAllElements();
417 public void deleteGroup(SequenceGroup g)
419 if (groups.contains(g))
421 removeAnnotationForGroup(g);
422 groups.removeElement(g);
427 public SequenceI findName(String name)
429 return findName(name, false);
435 * @see jalview.datamodel.AlignmentI#findName(java.lang.String, boolean)
437 public SequenceI findName(String token, boolean b)
439 return findName(null, token, b);
445 * @see jalview.datamodel.AlignmentI#findName(SequenceI, java.lang.String,
448 public SequenceI findName(SequenceI startAfter, String token, boolean b)
453 String sqname = null;
454 if (startAfter != null)
456 // try to find the sequence in the alignment
457 boolean matched = false;
458 while (i < sequences.size())
460 if (getSequenceAt(i++) == startAfter)
471 while (i < sequences.size())
473 sq = getSequenceAt(i);
474 sqname = sq.getName();
475 if (sqname.equals(token) // exact match
476 || (b && // allow imperfect matches - case varies
477 (sqname.equalsIgnoreCase(token))))
479 return getSequenceAt(i);
488 public SequenceI[] findSequenceMatch(String name)
490 Vector matches = new Vector();
493 while (i < sequences.size())
495 if (getSequenceAt(i).getName().equals(name))
497 matches.addElement(getSequenceAt(i));
502 SequenceI[] result = new SequenceI[matches.size()];
503 for (i = 0; i < result.length; i++)
505 result[i] = (SequenceI) matches.elementAt(i);
515 * @see jalview.datamodel.AlignmentI#findIndex(jalview.datamodel.SequenceI)
517 public int findIndex(SequenceI s)
521 while (i < sequences.size())
523 if (s == getSequenceAt(i))
538 * jalview.datamodel.AlignmentI#findIndex(jalview.datamodel.SearchResults)
540 public int findIndex(SearchResults results)
544 while (i < sequences.size())
546 if (results.involvesSequence(getSequenceAt(i)))
558 * @return DOCUMENT ME!
560 public int getHeight()
562 return sequences.size();
568 * @return DOCUMENT ME!
570 public int getWidth()
574 for (int i = 0; i < sequences.size(); i++)
576 if (getSequenceAt(i).getLength() > maxLength)
578 maxLength = getSequenceAt(i).getLength();
591 public void setGapCharacter(char gc)
595 for (int i = 0; i < sequences.size(); i++)
597 Sequence seq = (Sequence) sequences.elementAt(i);
598 seq.setSequence(seq.getSequenceAsString().replace('.', gc)
599 .replace('-', gc).replace(' ', gc));
606 * @return DOCUMENT ME!
608 public char getGapCharacter()
616 * @see jalview.datamodel.AlignmentI#isAligned()
618 public boolean isAligned()
620 return isAligned(false);
626 * @see jalview.datamodel.AlignmentI#isAligned(boolean)
628 public boolean isAligned(boolean includeHidden)
630 int width = getWidth();
631 if (hiddenSequences == null || hiddenSequences.getSize() == 0)
633 includeHidden = true; // no hidden sequences to check against.
635 for (int i = 0; i < sequences.size(); i++)
637 if (includeHidden || !hiddenSequences.isHidden(getSequenceAt(i)))
639 if (getSequenceAt(i).getLength() != width)
652 * @seejalview.datamodel.AlignmentI#deleteAnnotation(jalview.datamodel.
653 * AlignmentAnnotation)
655 public boolean deleteAnnotation(AlignmentAnnotation aa)
657 return deleteAnnotation(aa, true);
660 public boolean deleteAnnotation(AlignmentAnnotation aa, boolean unhook)
664 if (annotations != null)
666 aSize = annotations.length;
674 AlignmentAnnotation[] temp = new AlignmentAnnotation[aSize - 1];
676 boolean swap = false;
679 for (int i = 0; i < aSize; i++)
681 if (annotations[i] == aa)
686 if (tIndex < temp.length)
687 temp[tIndex++] = annotations[i];
694 unhookAnnotation(aa);
701 * remove any object references associated with this annotation
705 private void unhookAnnotation(AlignmentAnnotation aa)
707 if (aa.sequenceRef != null)
709 aa.sequenceRef.removeAlignmentAnnotation(aa);
711 if (aa.groupRef != null)
713 // probably need to do more here in the future (post 2.5.0)
721 * @seejalview.datamodel.AlignmentI#addAnnotation(jalview.datamodel.
722 * AlignmentAnnotation)
724 public void addAnnotation(AlignmentAnnotation aa)
726 addAnnotation(aa, -1);
732 * @seejalview.datamodel.AlignmentI#addAnnotation(jalview.datamodel.
733 * AlignmentAnnotation, int)
735 public void addAnnotation(AlignmentAnnotation aa, int pos)
737 if(aa.getRNAStruc()!= null){
738 hasRNAStructure=true;
742 if (annotations != null)
744 aSize = annotations.length + 1;
747 AlignmentAnnotation[] temp = new AlignmentAnnotation[aSize];
749 if (pos == -1 || pos >= aSize)
751 temp[aSize - 1] = aa;
760 for (i = 0; i < (aSize - 1); i++, p++)
768 temp[p] = annotations[i];
776 public void setAnnotationIndex(AlignmentAnnotation aa, int index)
778 if (aa == null || annotations == null || annotations.length - 1 < index)
783 int aSize = annotations.length;
784 AlignmentAnnotation[] temp = new AlignmentAnnotation[aSize];
788 for (int i = 0; i < aSize; i++)
797 temp[i] = annotations[i];
801 temp[i] = annotations[i - 1];
811 * @return DOCUMENT ME!
813 public AlignmentAnnotation[] getAlignmentAnnotation()
818 public void setNucleotide(boolean b)
830 public boolean isNucleotide()
832 if (type == NUCLEOTIDE)
842 public boolean hasRNAStructure(){
843 //TODO can it happen that structure is removed from alignment?
844 return hasRNAStructure;
847 public void setDataset(Alignment data)
849 if (dataset == null && data == null)
851 // Create a new dataset for this alignment.
852 // Can only be done once, if dataset is not null
853 // This will not be performed
854 SequenceI[] seqs = new SequenceI[getHeight()];
855 SequenceI currentSeq;
856 for (int i = 0; i < getHeight(); i++)
858 currentSeq = getSequenceAt(i);
859 if (currentSeq.getDatasetSequence() != null)
861 seqs[i] = (Sequence) currentSeq.getDatasetSequence();
865 seqs[i] = currentSeq.createDatasetSequence();
869 dataset = new Alignment(seqs);
871 else if (dataset == null && data != null)
875 dataset.addAlignmentRef();
879 * reference count for number of alignments referencing this one.
881 int alignmentRefs = 0;
884 * increase reference count to this alignment.
886 private void addAlignmentRef()
891 public Alignment getDataset()
896 public boolean padGaps()
898 boolean modified = false;
900 // Remove excess gaps from the end of alignment
904 for (int i = 0; i < sequences.size(); i++)
906 current = getSequenceAt(i);
907 for (int j = current.getLength(); j > maxLength; j--)
910 && !jalview.util.Comparison.isGap(current.getCharAt(j)))
921 for (int i = 0; i < sequences.size(); i++)
923 current = getSequenceAt(i);
924 cLength = current.getLength();
926 if (cLength < maxLength)
928 current.insertCharAt(cLength, maxLength - cLength, gapCharacter);
931 else if (current.getLength() > maxLength)
933 current.deleteChars(maxLength, current.getLength());
940 * Justify the sequences to the left or right by deleting and inserting gaps
941 * before the initial residue or after the terminal residue
944 * true if alignment padded to right, false to justify to left
945 * @return true if alignment was changed
947 public boolean justify(boolean right)
949 boolean modified = false;
951 // Remove excess gaps from the end of alignment
953 int ends[] = new int[sequences.size() * 2];
955 for (int i = 0; i < sequences.size(); i++)
957 current = getSequenceAt(i);
958 // This should really be a sequence method
959 ends[i * 2] = current.findIndex(current.getStart());
960 ends[i * 2 + 1] = current.findIndex(current.getStart()
961 + current.getLength());
962 boolean hitres = false;
963 for (int j = 0, rs = 0, ssiz = current.getLength(); j < ssiz; j++)
965 if (!jalview.util.Comparison.isGap(current.getCharAt(j)))
975 if (j - ends[i * 2] > maxLength)
977 maxLength = j - ends[i * 2];
985 // now edit the flanking gaps to justify to either left or right
986 int cLength, extent, diff;
987 for (int i = 0; i < sequences.size(); i++)
989 current = getSequenceAt(i);
991 cLength = 1 + ends[i * 2 + 1] - ends[i * 2];
992 diff = maxLength - cLength; // number of gaps to indent
993 extent = current.getLength();
997 if (extent > ends[i * 2 + 1])
999 current.deleteChars(ends[i * 2 + 1] + 1, extent);
1002 if (ends[i * 2] > diff)
1004 current.deleteChars(0, ends[i * 2] - diff);
1009 if (ends[i * 2] < diff)
1011 current.insertCharAt(0, diff - ends[i * 2], gapCharacter);
1019 if (ends[i * 2] > 0)
1021 current.deleteChars(0, ends[i * 2]);
1023 ends[i * 2 + 1] -= ends[i * 2];
1024 extent -= ends[i * 2];
1026 if (extent > maxLength)
1028 current.deleteChars(maxLength + 1, extent);
1033 if (extent < maxLength)
1035 current.insertCharAt(extent, maxLength - extent, gapCharacter);
1044 public HiddenSequences getHiddenSequences()
1046 return hiddenSequences;
1049 public CigarArray getCompactAlignment()
1051 SeqCigar alseqs[] = new SeqCigar[sequences.size()];
1052 for (int i = 0; i < sequences.size(); i++)
1054 alseqs[i] = new SeqCigar((SequenceI) sequences.elementAt(i));
1056 CigarArray cal = new CigarArray(alseqs);
1057 cal.addOperation(CigarArray.M, getWidth());
1061 public void setProperty(Object key, Object value)
1063 if (alignmentProperties == null)
1064 alignmentProperties = new Hashtable();
1066 alignmentProperties.put(key, value);
1069 public Object getProperty(Object key)
1071 if (alignmentProperties != null)
1072 return alignmentProperties.get(key);
1077 public Hashtable getProperties()
1079 return alignmentProperties;
1082 AlignedCodonFrame[] codonFrameList = null;
1088 * jalview.datamodel.AlignmentI#addCodonFrame(jalview.datamodel.AlignedCodonFrame
1091 public void addCodonFrame(AlignedCodonFrame codons)
1095 if (codonFrameList == null)
1097 codonFrameList = new AlignedCodonFrame[]
1101 AlignedCodonFrame[] t = new AlignedCodonFrame[codonFrameList.length + 1];
1102 System.arraycopy(codonFrameList, 0, t, 0, codonFrameList.length);
1103 t[codonFrameList.length] = codons;
1110 * @see jalview.datamodel.AlignmentI#getCodonFrame(int)
1112 public AlignedCodonFrame getCodonFrame(int index)
1114 return codonFrameList[index];
1121 * jalview.datamodel.AlignmentI#getCodonFrame(jalview.datamodel.SequenceI)
1123 public AlignedCodonFrame[] getCodonFrame(SequenceI seq)
1125 if (seq == null || codonFrameList == null)
1127 Vector cframes = new Vector();
1128 for (int f = 0; f < codonFrameList.length; f++)
1130 if (codonFrameList[f].involvesSequence(seq))
1131 cframes.addElement(codonFrameList[f]);
1133 if (cframes.size() == 0)
1135 AlignedCodonFrame[] cfr = new AlignedCodonFrame[cframes.size()];
1136 cframes.copyInto(cfr);
1143 * @see jalview.datamodel.AlignmentI#getCodonFrames()
1145 public AlignedCodonFrame[] getCodonFrames()
1147 return codonFrameList;
1153 * @seejalview.datamodel.AlignmentI#removeCodonFrame(jalview.datamodel.
1154 * AlignedCodonFrame)
1156 public boolean removeCodonFrame(AlignedCodonFrame codons)
1158 if (codons == null || codonFrameList == null)
1160 boolean removed = false;
1161 int i = 0, iSize = codonFrameList.length;
1164 if (codonFrameList[i] == codons)
1169 System.arraycopy(codonFrameList, i + 1, codonFrameList, i, iSize
1182 public void append(AlignmentI toappend)
1184 // TODO test this method for a future 2.5 release
1185 // currently tested for use in jalview.gui.SequenceFetcher
1186 boolean samegap = toappend.getGapCharacter() == getGapCharacter();
1187 char oldc = toappend.getGapCharacter();
1188 boolean hashidden = toappend.getHiddenSequences() != null
1189 && toappend.getHiddenSequences().hiddenSequences != null;
1190 // get all sequences including any hidden ones
1191 Vector sqs = (hashidden) ? toappend.getHiddenSequences()
1192 .getFullAlignment().getSequences() : toappend.getSequences();
1195 Enumeration sq = sqs.elements();
1196 while (sq.hasMoreElements())
1198 SequenceI addedsq = (SequenceI) sq.nextElement();
1201 char[] oldseq = addedsq.getSequence();
1202 for (int c = 0; c < oldseq.length; c++)
1204 if (oldseq[c] == oldc)
1206 oldseq[c] = gapCharacter;
1210 addSequence(addedsq);
1213 AlignmentAnnotation[] alan = toappend.getAlignmentAnnotation();
1214 for (int a = 0; alan != null && a < alan.length; a++)
1216 addAnnotation(alan[a]);
1218 AlignedCodonFrame[] acod = toappend.getCodonFrames();
1219 for (int a = 0; acod != null && a < acod.length; a++)
1221 this.addCodonFrame(acod[a]);
1223 Vector sg = toappend.getGroups();
1226 Enumeration el = sg.elements();
1227 while (el.hasMoreElements())
1229 addGroup((SequenceGroup) el.nextElement());
1232 if (toappend.getHiddenSequences() != null)
1234 HiddenSequences hs = toappend.getHiddenSequences();
1235 if (hiddenSequences == null)
1237 hiddenSequences = new HiddenSequences(this);
1239 if (hs.hiddenSequences != null)
1241 for (int s = 0; s < hs.hiddenSequences.length; s++)
1243 // hide the newly appended sequence in the alignment
1244 if (hs.hiddenSequences[s] != null)
1246 hiddenSequences.hideSequence(hs.hiddenSequences[s]);
1251 if (toappend.getProperties() != null)
1253 // we really can't do very much here - just try to concatenate strings
1254 // where property collisions occur.
1255 Enumeration key = toappend.getProperties().keys();
1256 while (key.hasMoreElements())
1258 Object k = key.nextElement();
1259 Object ourval = this.getProperty(k);
1260 Object toapprop = toappend.getProperty(k);
1263 if (ourval.getClass().equals(toapprop.getClass())
1264 && !ourval.equals(toapprop))
1266 if (ourval instanceof String)
1269 this.setProperty(k, ((String) ourval) + "; "
1270 + ((String) toapprop));
1274 if (ourval instanceof Vector)
1277 Enumeration theirv = ((Vector) toapprop).elements();
1278 while (theirv.hasMoreElements())
1280 ((Vector) ourval).addElement(theirv);
1288 // just add new property directly
1289 setProperty(k, toapprop);