2 * Jalview - A Sequence Alignment Editor and Viewer (Version 2.7)
3 * Copyright (C) 2011 J Procter, AM Waterhouse, J Engelhardt, LM Lui, G Barton, M Clamp, S Searle
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
11 * Jalview is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty
13 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
14 * PURPOSE. See the GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License along with Jalview. If not, see <http://www.gnu.org/licenses/>.
18 package jalview.datamodel;
22 import jalview.analysis.*;
25 * Data structure to hold and manipulate a multiple sequence alignment
31 public class Alignment implements AlignmentI
33 protected Alignment dataset;
35 protected Vector sequences;
37 protected Vector groups = new Vector();
39 protected char gapCharacter = '-';
41 protected int type = NUCLEOTIDE;
43 public static final int PROTEIN = 0;
45 public static final int NUCLEOTIDE = 1;
47 public boolean hasRNAStructure = false;
50 public AlignmentAnnotation[] annotations;
52 HiddenSequences hiddenSequences = new HiddenSequences(this);
54 public Hashtable alignmentProperties;
56 private void initAlignment(SequenceI[] seqs)
60 if (jalview.util.Comparison.isNucleotide(seqs))
69 sequences = new Vector();
71 for (i = 0; i < seqs.length; i++)
73 sequences.addElement(seqs[i]);
79 * Make an alignment from an array of Sequences.
83 public Alignment(SequenceI[] seqs)
89 * Make a new alignment from an array of SeqCigars
94 public Alignment(SeqCigar[] alseqs)
96 SequenceI[] seqs = SeqCigar.createAlignmentSequences(alseqs,
97 gapCharacter, new ColumnSelection(), null);
102 * Make a new alignment from an CigarArray JBPNote - can only do this when
103 * compactAlignment does not contain hidden regions. JBPNote - must also check
104 * that compactAlignment resolves to a set of SeqCigars - or construct them
107 * @param compactAlignment
110 public static AlignmentI createAlignment(CigarArray compactAlignment)
112 throw new Error("Alignment(CigarArray) not yet implemented");
113 // this(compactAlignment.refCigars);
119 * @return DOCUMENT ME!
121 public Vector getSequences()
126 public List<SequenceI> getSequences(
127 Map<SequenceI, SequenceCollectionI> hiddenReps)
129 // TODO: in jalview 2.8 we don't do anything with hiddenreps - fix design to work on this.
133 public SequenceI[] getSequencesArray()
135 if (sequences == null)
137 SequenceI[] reply = new SequenceI[sequences.size()];
138 for (int i = 0; i < sequences.size(); i++)
140 reply[i] = (SequenceI) sequences.elementAt(i);
151 * @return DOCUMENT ME!
153 public SequenceI getSequenceAt(int i)
155 if (i>-1 && i < sequences.size())
157 return (SequenceI) sequences.elementAt(i);
164 * Adds a sequence to the alignment. Recalculates maxLength and size.
168 public void addSequence(SequenceI snew)
172 // maintain dataset integrity
173 if (snew.getDatasetSequence() != null)
175 getDataset().addSequence(snew.getDatasetSequence());
179 // derive new sequence
180 SequenceI adding = snew.deriveSequence();
181 getDataset().addSequence(adding.getDatasetSequence());
185 if (sequences == null)
187 initAlignment(new SequenceI[]
192 sequences.addElement(snew);
194 if (hiddenSequences != null)
195 hiddenSequences.adjustHeightSequenceAdded();
199 * Adds a sequence to the alignment. Recalculates maxLength and size.
203 public void setSequenceAt(int i, SequenceI snew)
205 SequenceI oldseq = getSequenceAt(i);
206 deleteSequence(oldseq);
208 sequences.setElementAt(snew, i);
214 * @return DOCUMENT ME!
216 public Vector getGroups()
221 public void finalize()
223 if (getDataset() != null)
224 getDataset().removeAlignmentRef();
230 hiddenSequences = null;
234 * decrement the alignmentRefs counter by one and call finalize if it goes to
237 private void removeAlignmentRef()
239 if (--alignmentRefs == 0)
251 public void deleteSequence(SequenceI s)
253 deleteSequence(findIndex(s));
262 public void deleteSequence(int i)
264 if (i > -1 && i < getHeight())
266 sequences.removeElementAt(i);
267 hiddenSequences.adjustHeightSequenceDeleted(i);
272 public SequenceGroup findGroup(SequenceI s)
274 for (int i = 0; i < this.groups.size(); i++)
276 SequenceGroup sg = (SequenceGroup) groups.elementAt(i);
278 if (sg.getSequences(null).contains(s))
293 * @return DOCUMENT ME!
295 public SequenceGroup[] findAllGroups(SequenceI s)
297 Vector temp = new Vector();
299 int gSize = groups.size();
300 for (int i = 0; i < gSize; i++)
302 SequenceGroup sg = (SequenceGroup) groups.elementAt(i);
303 if (sg == null || sg.getSequences(null) == null)
305 this.deleteGroup(sg);
310 if (sg.getSequences(null).contains(s))
316 SequenceGroup[] ret = new SequenceGroup[temp.size()];
318 for (int i = 0; i < temp.size(); i++)
320 ret[i] = (SequenceGroup) temp.elementAt(i);
327 public void addGroup(SequenceGroup sg)
329 if (!groups.contains(sg))
331 if (hiddenSequences.getSize() > 0)
333 int i, iSize = sg.getSize();
334 for (i = 0; i < iSize; i++)
336 if (!sequences.contains(sg.getSequenceAt(i)))
338 sg.deleteSequence(sg.getSequenceAt(i), false);
344 if (sg.getSize() < 1)
350 groups.addElement(sg);
355 * remove any annotation that references gp
358 * (if null, removes all group associated annotation)
360 private void removeAnnotationForGroup(SequenceGroup gp)
362 if (annotations == null || annotations.length == 0)
366 // remove annotation very quickly
367 AlignmentAnnotation[] t, todelete = new AlignmentAnnotation[annotations.length], tokeep = new AlignmentAnnotation[annotations.length];
371 for (i = 0, p = 0, k = 0; i < annotations.length; i++)
373 if (annotations[i].groupRef != null)
375 todelete[p++] = annotations[i];
379 tokeep[k++] = annotations[i];
385 for (i = 0, p = 0, k = 0; i < annotations.length; i++)
387 if (annotations[i].groupRef == gp)
389 todelete[p++] = annotations[i];
393 tokeep[k++] = annotations[i];
399 // clear out the group associated annotation.
400 for (i = 0; i < p; i++)
402 unhookAnnotation(todelete[i]);
405 t = new AlignmentAnnotation[k];
406 for (i = 0; i < k; i++)
414 public void deleteAllGroups()
416 if (annotations != null)
418 removeAnnotationForGroup(null);
420 groups.removeAllElements();
424 public void deleteGroup(SequenceGroup g)
426 if (groups.contains(g))
428 removeAnnotationForGroup(g);
429 groups.removeElement(g);
434 public SequenceI findName(String name)
436 return findName(name, false);
442 * @see jalview.datamodel.AlignmentI#findName(java.lang.String, boolean)
444 public SequenceI findName(String token, boolean b)
446 return findName(null, token, b);
452 * @see jalview.datamodel.AlignmentI#findName(SequenceI, java.lang.String,
455 public SequenceI findName(SequenceI startAfter, String token, boolean b)
460 String sqname = null;
461 if (startAfter != null)
463 // try to find the sequence in the alignment
464 boolean matched = false;
465 while (i < sequences.size())
467 if (getSequenceAt(i++) == startAfter)
478 while (i < sequences.size())
480 sq = getSequenceAt(i);
481 sqname = sq.getName();
482 if (sqname.equals(token) // exact match
483 || (b && // allow imperfect matches - case varies
484 (sqname.equalsIgnoreCase(token))))
486 return getSequenceAt(i);
495 public SequenceI[] findSequenceMatch(String name)
497 Vector matches = new Vector();
500 while (i < sequences.size())
502 if (getSequenceAt(i).getName().equals(name))
504 matches.addElement(getSequenceAt(i));
509 SequenceI[] result = new SequenceI[matches.size()];
510 for (i = 0; i < result.length; i++)
512 result[i] = (SequenceI) matches.elementAt(i);
522 * @see jalview.datamodel.AlignmentI#findIndex(jalview.datamodel.SequenceI)
524 public int findIndex(SequenceI s)
528 while (i < sequences.size())
530 if (s == getSequenceAt(i))
545 * jalview.datamodel.AlignmentI#findIndex(jalview.datamodel.SearchResults)
547 public int findIndex(SearchResults results)
551 while (i < sequences.size())
553 if (results.involvesSequence(getSequenceAt(i)))
565 * @return DOCUMENT ME!
567 public int getHeight()
569 return sequences.size();
575 * @return DOCUMENT ME!
577 public int getWidth()
581 for (int i = 0; i < sequences.size(); i++)
583 if (getSequenceAt(i).getLength() > maxLength)
585 maxLength = getSequenceAt(i).getLength();
598 public void setGapCharacter(char gc)
602 for (int i = 0; i < sequences.size(); i++)
604 Sequence seq = (Sequence) sequences.elementAt(i);
605 seq.setSequence(seq.getSequenceAsString().replace('.', gc)
606 .replace('-', gc).replace(' ', gc));
613 * @return DOCUMENT ME!
615 public char getGapCharacter()
623 * @see jalview.datamodel.AlignmentI#isAligned()
625 public boolean isAligned()
627 return isAligned(false);
633 * @see jalview.datamodel.AlignmentI#isAligned(boolean)
635 public boolean isAligned(boolean includeHidden)
637 int width = getWidth();
638 if (hiddenSequences == null || hiddenSequences.getSize() == 0)
640 includeHidden = true; // no hidden sequences to check against.
642 for (int i = 0; i < sequences.size(); i++)
644 if (includeHidden || !hiddenSequences.isHidden(getSequenceAt(i)))
646 if (getSequenceAt(i).getLength() != width)
659 * @seejalview.datamodel.AlignmentI#deleteAnnotation(jalview.datamodel.
660 * AlignmentAnnotation)
662 public boolean deleteAnnotation(AlignmentAnnotation aa)
664 return deleteAnnotation(aa, true);
667 public boolean deleteAnnotation(AlignmentAnnotation aa, boolean unhook)
671 if (annotations != null)
673 aSize = annotations.length;
681 AlignmentAnnotation[] temp = new AlignmentAnnotation[aSize - 1];
683 boolean swap = false;
686 for (int i = 0; i < aSize; i++)
688 if (annotations[i] == aa)
693 if (tIndex < temp.length)
694 temp[tIndex++] = annotations[i];
701 unhookAnnotation(aa);
708 * remove any object references associated with this annotation
712 private void unhookAnnotation(AlignmentAnnotation aa)
714 if (aa.sequenceRef != null)
716 aa.sequenceRef.removeAlignmentAnnotation(aa);
718 if (aa.groupRef != null)
720 // probably need to do more here in the future (post 2.5.0)
728 * @seejalview.datamodel.AlignmentI#addAnnotation(jalview.datamodel.
729 * AlignmentAnnotation)
731 public void addAnnotation(AlignmentAnnotation aa)
733 addAnnotation(aa, -1);
739 * @seejalview.datamodel.AlignmentI#addAnnotation(jalview.datamodel.
740 * AlignmentAnnotation, int)
742 public void addAnnotation(AlignmentAnnotation aa, int pos)
744 if(aa.getRNAStruc()!= null){
745 hasRNAStructure=true;
749 if (annotations != null)
751 aSize = annotations.length + 1;
754 AlignmentAnnotation[] temp = new AlignmentAnnotation[aSize];
756 if (pos == -1 || pos >= aSize)
758 temp[aSize - 1] = aa;
767 for (i = 0; i < (aSize - 1); i++, p++)
775 temp[p] = annotations[i];
783 public void setAnnotationIndex(AlignmentAnnotation aa, int index)
785 if (aa == null || annotations == null || annotations.length - 1 < index)
790 int aSize = annotations.length;
791 AlignmentAnnotation[] temp = new AlignmentAnnotation[aSize];
795 for (int i = 0; i < aSize; i++)
804 temp[i] = annotations[i];
808 temp[i] = annotations[i - 1];
817 * returns all annotation on the alignment
819 public AlignmentAnnotation[] getAlignmentAnnotation()
824 public void setNucleotide(boolean b)
836 public boolean isNucleotide()
838 if (type == NUCLEOTIDE)
848 public boolean hasRNAStructure(){
849 //TODO can it happen that structure is removed from alignment?
850 return hasRNAStructure;
853 public void setDataset(Alignment data)
855 if (dataset == null && data == null)
857 // Create a new dataset for this alignment.
858 // Can only be done once, if dataset is not null
859 // This will not be performed
860 SequenceI[] seqs = new SequenceI[getHeight()];
861 SequenceI currentSeq;
862 for (int i = 0; i < getHeight(); i++)
864 currentSeq = getSequenceAt(i);
865 if (currentSeq.getDatasetSequence() != null)
867 seqs[i] = (Sequence) currentSeq.getDatasetSequence();
871 seqs[i] = currentSeq.createDatasetSequence();
875 dataset = new Alignment(seqs);
877 else if (dataset == null && data != null)
881 dataset.addAlignmentRef();
885 * reference count for number of alignments referencing this one.
887 int alignmentRefs = 0;
890 * increase reference count to this alignment.
892 private void addAlignmentRef()
897 public Alignment getDataset()
902 public boolean padGaps()
904 boolean modified = false;
906 // Remove excess gaps from the end of alignment
910 for (int i = 0; i < sequences.size(); i++)
912 current = getSequenceAt(i);
913 for (int j = current.getLength(); j > maxLength; j--)
916 && !jalview.util.Comparison.isGap(current.getCharAt(j)))
927 for (int i = 0; i < sequences.size(); i++)
929 current = getSequenceAt(i);
930 cLength = current.getLength();
932 if (cLength < maxLength)
934 current.insertCharAt(cLength, maxLength - cLength, gapCharacter);
937 else if (current.getLength() > maxLength)
939 current.deleteChars(maxLength, current.getLength());
946 * Justify the sequences to the left or right by deleting and inserting gaps
947 * before the initial residue or after the terminal residue
950 * true if alignment padded to right, false to justify to left
951 * @return true if alignment was changed
953 public boolean justify(boolean right)
955 boolean modified = false;
957 // Remove excess gaps from the end of alignment
959 int ends[] = new int[sequences.size() * 2];
961 for (int i = 0; i < sequences.size(); i++)
963 current = getSequenceAt(i);
964 // This should really be a sequence method
965 ends[i * 2] = current.findIndex(current.getStart());
966 ends[i * 2 + 1] = current.findIndex(current.getStart()
967 + current.getLength());
968 boolean hitres = false;
969 for (int j = 0, rs = 0, ssiz = current.getLength(); j < ssiz; j++)
971 if (!jalview.util.Comparison.isGap(current.getCharAt(j)))
981 if (j - ends[i * 2] > maxLength)
983 maxLength = j - ends[i * 2];
991 // now edit the flanking gaps to justify to either left or right
992 int cLength, extent, diff;
993 for (int i = 0; i < sequences.size(); i++)
995 current = getSequenceAt(i);
997 cLength = 1 + ends[i * 2 + 1] - ends[i * 2];
998 diff = maxLength - cLength; // number of gaps to indent
999 extent = current.getLength();
1003 if (extent > ends[i * 2 + 1])
1005 current.deleteChars(ends[i * 2 + 1] + 1, extent);
1008 if (ends[i * 2] > diff)
1010 current.deleteChars(0, ends[i * 2] - diff);
1015 if (ends[i * 2] < diff)
1017 current.insertCharAt(0, diff - ends[i * 2], gapCharacter);
1025 if (ends[i * 2] > 0)
1027 current.deleteChars(0, ends[i * 2]);
1029 ends[i * 2 + 1] -= ends[i * 2];
1030 extent -= ends[i * 2];
1032 if (extent > maxLength)
1034 current.deleteChars(maxLength + 1, extent);
1039 if (extent < maxLength)
1041 current.insertCharAt(extent, maxLength - extent, gapCharacter);
1050 public HiddenSequences getHiddenSequences()
1052 return hiddenSequences;
1055 public CigarArray getCompactAlignment()
1057 SeqCigar alseqs[] = new SeqCigar[sequences.size()];
1058 for (int i = 0; i < sequences.size(); i++)
1060 alseqs[i] = new SeqCigar((SequenceI) sequences.elementAt(i));
1062 CigarArray cal = new CigarArray(alseqs);
1063 cal.addOperation(CigarArray.M, getWidth());
1067 public void setProperty(Object key, Object value)
1069 if (alignmentProperties == null)
1070 alignmentProperties = new Hashtable();
1072 alignmentProperties.put(key, value);
1075 public Object getProperty(Object key)
1077 if (alignmentProperties != null)
1078 return alignmentProperties.get(key);
1083 public Hashtable getProperties()
1085 return alignmentProperties;
1088 AlignedCodonFrame[] codonFrameList = null;
1094 * jalview.datamodel.AlignmentI#addCodonFrame(jalview.datamodel.AlignedCodonFrame
1097 public void addCodonFrame(AlignedCodonFrame codons)
1101 if (codonFrameList == null)
1103 codonFrameList = new AlignedCodonFrame[]
1107 AlignedCodonFrame[] t = new AlignedCodonFrame[codonFrameList.length + 1];
1108 System.arraycopy(codonFrameList, 0, t, 0, codonFrameList.length);
1109 t[codonFrameList.length] = codons;
1116 * @see jalview.datamodel.AlignmentI#getCodonFrame(int)
1118 public AlignedCodonFrame getCodonFrame(int index)
1120 return codonFrameList[index];
1127 * jalview.datamodel.AlignmentI#getCodonFrame(jalview.datamodel.SequenceI)
1129 public AlignedCodonFrame[] getCodonFrame(SequenceI seq)
1131 if (seq == null || codonFrameList == null)
1133 Vector cframes = new Vector();
1134 for (int f = 0; f < codonFrameList.length; f++)
1136 if (codonFrameList[f].involvesSequence(seq))
1137 cframes.addElement(codonFrameList[f]);
1139 if (cframes.size() == 0)
1141 AlignedCodonFrame[] cfr = new AlignedCodonFrame[cframes.size()];
1142 cframes.copyInto(cfr);
1149 * @see jalview.datamodel.AlignmentI#getCodonFrames()
1151 public AlignedCodonFrame[] getCodonFrames()
1153 return codonFrameList;
1159 * @seejalview.datamodel.AlignmentI#removeCodonFrame(jalview.datamodel.
1160 * AlignedCodonFrame)
1162 public boolean removeCodonFrame(AlignedCodonFrame codons)
1164 if (codons == null || codonFrameList == null)
1166 boolean removed = false;
1167 int i = 0, iSize = codonFrameList.length;
1170 if (codonFrameList[i] == codons)
1175 System.arraycopy(codonFrameList, i + 1, codonFrameList, i, iSize
1188 public void append(AlignmentI toappend)
1190 // TODO test this method for a future 2.5 release
1191 // currently tested for use in jalview.gui.SequenceFetcher
1192 boolean samegap = toappend.getGapCharacter() == getGapCharacter();
1193 char oldc = toappend.getGapCharacter();
1194 boolean hashidden = toappend.getHiddenSequences() != null
1195 && toappend.getHiddenSequences().hiddenSequences != null;
1196 // get all sequences including any hidden ones
1197 Vector sqs = (hashidden) ? toappend.getHiddenSequences()
1198 .getFullAlignment().getSequences() : toappend.getSequences();
1201 Enumeration sq = sqs.elements();
1202 while (sq.hasMoreElements())
1204 SequenceI addedsq = (SequenceI) sq.nextElement();
1207 char[] oldseq = addedsq.getSequence();
1208 for (int c = 0; c < oldseq.length; c++)
1210 if (oldseq[c] == oldc)
1212 oldseq[c] = gapCharacter;
1216 addSequence(addedsq);
1219 AlignmentAnnotation[] alan = toappend.getAlignmentAnnotation();
1220 for (int a = 0; alan != null && a < alan.length; a++)
1222 addAnnotation(alan[a]);
1224 AlignedCodonFrame[] acod = toappend.getCodonFrames();
1225 for (int a = 0; acod != null && a < acod.length; a++)
1227 this.addCodonFrame(acod[a]);
1229 List<SequenceGroup> sg = toappend.getGroups();
1232 for (SequenceGroup _sg:sg)
1237 if (toappend.getHiddenSequences() != null)
1239 HiddenSequences hs = toappend.getHiddenSequences();
1240 if (hiddenSequences == null)
1242 hiddenSequences = new HiddenSequences(this);
1244 if (hs.hiddenSequences != null)
1246 for (int s = 0; s < hs.hiddenSequences.length; s++)
1248 // hide the newly appended sequence in the alignment
1249 if (hs.hiddenSequences[s] != null)
1251 hiddenSequences.hideSequence(hs.hiddenSequences[s]);
1256 if (toappend.getProperties() != null)
1258 // we really can't do very much here - just try to concatenate strings
1259 // where property collisions occur.
1260 Enumeration key = toappend.getProperties().keys();
1261 while (key.hasMoreElements())
1263 Object k = key.nextElement();
1264 Object ourval = this.getProperty(k);
1265 Object toapprop = toappend.getProperty(k);
1268 if (ourval.getClass().equals(toapprop.getClass())
1269 && !ourval.equals(toapprop))
1271 if (ourval instanceof String)
1274 this.setProperty(k, ((String) ourval) + "; "
1275 + ((String) toapprop));
1279 if (ourval instanceof Vector)
1282 Enumeration theirv = ((Vector) toapprop).elements();
1283 while (theirv.hasMoreElements())
1285 ((Vector) ourval).addElement(theirv);
1293 // just add new property directly
1294 setProperty(k, toapprop);
1302 public AlignmentAnnotation findOrCreateAnnotation(String name, boolean autoCalc,
1303 SequenceI seqRef, SequenceGroup groupRef)
1305 for (AlignmentAnnotation annot :
1306 getAlignmentAnnotation())
1308 if (annot.autoCalculated == autoCalc
1309 && annot.getCalcId().equals(name)
1310 && annot.sequenceRef == seqRef && annot.groupRef == groupRef)
1315 AlignmentAnnotation annot = new AlignmentAnnotation(name, name,
1316 new Annotation[1], 0f, 0f, AlignmentAnnotation.BAR_GRAPH);
1317 annot.hasText = false;
1318 annot.setCalcId(new String(name));
1319 annot.autoCalculated = autoCalc;
1322 annot.setSequenceRef(seqRef);
1324 annot.groupRef = groupRef;
1325 addAnnotation(annot);
1331 public Iterable<AlignmentAnnotation> findAnnotation(String calcId)
1333 ArrayList<AlignmentAnnotation> aa=new ArrayList<AlignmentAnnotation>();
1334 for (AlignmentAnnotation a:getAlignmentAnnotation())
1336 if (a.getCalcId()==calcId || (a.getCalcId()!=null && calcId!=null && a.getCalcId().equals(calcId)))