2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.datamodel;
23 import jalview.analysis.AlignmentUtils;
24 import jalview.datamodel.AlignedCodonFrame.SequenceToSequenceMapping;
25 import jalview.io.FastaFile;
26 import jalview.util.Comparison;
27 import jalview.util.LinkedIdentityHashSet;
28 import jalview.util.MessageManager;
30 import java.util.ArrayList;
31 import java.util.Arrays;
32 import java.util.BitSet;
33 import java.util.Collections;
34 import java.util.Enumeration;
35 import java.util.HashSet;
36 import java.util.Hashtable;
37 import java.util.Iterator;
38 import java.util.List;
41 import java.util.Vector;
44 * Data structure to hold and manipulate a multiple sequence alignment
50 public class Alignment implements AlignmentI, AutoCloseable
52 private Alignment dataset;
54 private List<SequenceI> sequences;
56 protected List<SequenceGroup> groups;
58 protected char gapCharacter = '-';
60 private boolean nucleotide = true;
62 public boolean hasRNAStructure = false;
64 public AlignmentAnnotation[] annotations;
66 HiddenSequences hiddenSequences;
68 HiddenColumns hiddenCols;
70 public Hashtable alignmentProperties;
72 private List<AlignedCodonFrame> codonFrameList;
74 private void initAlignment(SequenceI[] seqs)
76 groups = Collections.synchronizedList(new ArrayList<SequenceGroup>());
77 hiddenSequences = new HiddenSequences(this);
78 hiddenCols = new HiddenColumns();
79 codonFrameList = new ArrayList<>();
81 nucleotide = Comparison.isNucleotide(seqs);
83 sequences = Collections.synchronizedList(new ArrayList<SequenceI>());
85 for (int i = 0; i < seqs.length; i++)
87 sequences.add(seqs[i]);
93 * Make a 'copy' alignment - sequences have new copies of features and
94 * annotations, but share the original dataset sequences.
96 public Alignment(AlignmentI al)
98 SequenceI[] seqs = al.getSequencesArray();
99 for (int i = 0; i < seqs.length; i++)
101 seqs[i] = new Sequence(seqs[i]);
107 * Share the same dataset sequence mappings (if any).
109 if (dataset == null && al.getDataset() == null)
111 this.setCodonFrames(al.getCodonFrames());
116 * Make an alignment from an array of Sequences.
120 public Alignment(SequenceI[] seqs)
126 * Make a new alignment from an array of SeqCigars
131 public Alignment(SeqCigar[] alseqs)
133 SequenceI[] seqs = SeqCigar.createAlignmentSequences(alseqs,
134 gapCharacter, new HiddenColumns(), null);
139 * Make a new alignment from an CigarArray JBPNote - can only do this when
140 * compactAlignment does not contain hidden regions. JBPNote - must also check
141 * that compactAlignment resolves to a set of SeqCigars - or construct them
144 * @param compactAlignment
147 public static AlignmentI createAlignment(CigarArray compactAlignment)
149 throw new Error(MessageManager
150 .getString("error.alignment_cigararray_not_implemented"));
151 // this(compactAlignment.refCigars);
155 public List<SequenceI> getSequences()
161 public List<SequenceI> getSequences(
162 Map<SequenceI, SequenceCollectionI> hiddenReps)
164 // TODO: in jalview 2.8 we don't do anything with hiddenreps - fix design to
170 public SequenceI[] getSequencesArray()
172 if (sequences == null)
176 synchronized (sequences)
178 return sequences.toArray(new SequenceI[sequences.size()]);
183 * Returns a map of lists of sequences keyed by sequence name.
188 public Map<String, List<SequenceI>> getSequencesByName()
190 return AlignmentUtils.getSequencesByName(this);
194 public SequenceI getSequenceAt(int i)
196 synchronized (sequences)
199 if (i > -1 && i < sequences.size())
201 return sequences.get(i);
209 public SequenceI getSequenceAtAbsoluteIndex(int i)
211 SequenceI seq = null;
212 if (getHiddenSequences().getSize() > 0)
214 seq = getHiddenSequences().getHiddenSequence(i);
217 // didn't find the sequence in the hidden sequences, get it from the
219 int index = getHiddenSequences().findIndexWithoutHiddenSeqs(i);
220 seq = getSequenceAt(index);
225 seq = getSequenceAt(i);
231 * Adds a sequence to the alignment. Recalculates maxLength and size. Note
232 * this currently does not recalculate whether or not the alignment is
233 * nucleotide, so mixed alignments may have undefined behaviour.
238 public void addSequence(SequenceI snew)
243 // maintain dataset integrity
244 SequenceI dsseq = snew.getDatasetSequence();
247 // derive new sequence
248 SequenceI adding = snew.deriveSequence();
250 dsseq = snew.getDatasetSequence();
252 if (getDataset().findIndex(dsseq) == -1)
254 getDataset().addSequence(dsseq);
258 if (sequences == null)
260 initAlignment(new SequenceI[] { snew });
264 synchronized (sequences)
269 if (hiddenSequences != null)
271 hiddenSequences.adjustHeightSequenceAdded();
276 public SequenceI replaceSequenceAt(int i, SequenceI snew)
278 synchronized (sequences)
280 if (sequences.size() > i)
282 return sequences.set(i, snew);
288 hiddenSequences.adjustHeightSequenceAdded();
295 * Inserts a sequence at a point in the alignment.
298 * the index of the position the sequence is to be inserted in.
301 public void insertSequenceAt(int i, SequenceI snew)
303 synchronized (sequences)
305 if (sequences.size() > i)
307 sequences.add(i, snew);
314 hiddenSequences.adjustHeightSequenceAdded();
323 * @return DOCUMENT ME!
326 public List<SequenceGroup> getGroups()
334 if (getDataset() != null)
338 getDataset().removeAlignmentRef();
339 } catch (Throwable e)
349 * Defensively nulls out references in case this object is not garbage
352 void nullReferences()
358 hiddenSequences = null;
362 * decrement the alignmentRefs counter by one and null references if it goes
367 private void removeAlignmentRef() throws Throwable
369 if (--alignmentRefs == 0)
376 public void deleteSequence(SequenceI s)
378 synchronized (sequences)
380 deleteSequence(findIndex(s));
385 public void deleteSequence(int i)
387 synchronized (sequences)
389 if (i > -1 && i < getHeight())
392 hiddenSequences.adjustHeightSequenceDeleted(i);
398 public void deleteHiddenSequence(int i)
400 synchronized (sequences)
402 if (i > -1 && i < getHeight())
412 * @see jalview.datamodel.AlignmentI#findGroup(jalview.datamodel.SequenceI)
415 public SequenceGroup findGroup(SequenceI seq, int position)
417 synchronized (groups)
419 for (SequenceGroup sg : groups)
421 if (sg.getSequences(null).contains(seq))
423 if (position >= sg.getStartRes() && position <= sg.getEndRes())
437 * jalview.datamodel.AlignmentI#findAllGroups(jalview.datamodel.SequenceI)
440 public SequenceGroup[] findAllGroups(SequenceI s)
442 ArrayList<SequenceGroup> temp = new ArrayList<>();
444 synchronized (groups)
446 int gSize = groups.size();
447 for (int i = 0; i < gSize; i++)
449 SequenceGroup sg = groups.get(i);
450 if (sg == null || sg.getSequences() == null)
452 this.deleteGroup(sg);
457 if (sg.getSequences().contains(s))
463 SequenceGroup[] ret = new SequenceGroup[temp.size()];
464 return temp.toArray(ret);
469 public void addGroup(SequenceGroup sg)
471 synchronized (groups)
473 if (!groups.contains(sg))
475 if (hiddenSequences.getSize() > 0)
477 int i, iSize = sg.getSize();
478 for (i = 0; i < iSize; i++)
480 if (!sequences.contains(sg.getSequenceAt(i)))
482 sg.deleteSequence(sg.getSequenceAt(i), false);
488 if (sg.getSize() < 1)
493 sg.setContext(this, true);
500 * remove any annotation that references gp
503 * (if null, removes all group associated annotation)
505 private void removeAnnotationForGroup(SequenceGroup gp)
507 if (annotations == null || annotations.length == 0)
511 // remove annotation very quickly
512 AlignmentAnnotation[] t,
513 todelete = new AlignmentAnnotation[annotations.length],
514 tokeep = new AlignmentAnnotation[annotations.length];
518 for (i = 0, p = 0, k = 0; i < annotations.length; i++)
520 if (annotations[i].groupRef != null)
522 todelete[p++] = annotations[i];
526 tokeep[k++] = annotations[i];
532 for (i = 0, p = 0, k = 0; i < annotations.length; i++)
534 if (annotations[i].groupRef == gp)
536 todelete[p++] = annotations[i];
540 tokeep[k++] = annotations[i];
546 // clear out the group associated annotation.
547 for (i = 0; i < p; i++)
549 unhookAnnotation(todelete[i]);
552 t = new AlignmentAnnotation[k];
553 for (i = 0; i < k; i++)
562 public void deleteAllGroups()
564 synchronized (groups)
566 if (annotations != null)
568 removeAnnotationForGroup(null);
570 for (SequenceGroup sg : groups)
572 sg.setContext(null, false);
580 public void deleteGroup(SequenceGroup g)
582 synchronized (groups)
584 if (groups.contains(g))
586 removeAnnotationForGroup(g);
588 g.setContext(null, false);
595 public SequenceI findName(String name)
597 return findName(name, false);
603 * @see jalview.datamodel.AlignmentI#findName(java.lang.String, boolean)
606 public SequenceI findName(String token, boolean b)
608 return findName(null, token, b);
614 * @see jalview.datamodel.AlignmentI#findName(SequenceI, java.lang.String,
618 public SequenceI findName(SequenceI startAfter, String token, boolean b)
623 String sqname = null;
624 int nseq = sequences.size();
625 if (startAfter != null)
627 // try to find the sequence in the alignment
628 boolean matched = false;
631 if (getSequenceAt(i++) == startAfter)
644 sq = getSequenceAt(i);
645 sqname = sq.getName();
646 if (sqname.equals(token) // exact match
647 || (b && // allow imperfect matches - case varies
648 (sqname.equalsIgnoreCase(token))))
650 return getSequenceAt(i);
660 public SequenceI[] findSequenceMatch(String name)
662 Vector matches = new Vector();
665 while (i < sequences.size())
667 if (getSequenceAt(i).getName().equals(name))
669 matches.addElement(getSequenceAt(i));
674 SequenceI[] result = new SequenceI[matches.size()];
675 for (i = 0; i < result.length; i++)
677 result[i] = (SequenceI) matches.elementAt(i);
687 * @see jalview.datamodel.AlignmentI#findIndex(jalview.datamodel.SequenceI)
690 public int findIndex(SequenceI s)
694 while (i < sequences.size())
696 if (s == getSequenceAt(i))
711 * jalview.datamodel.AlignmentI#findIndex(jalview.datamodel.SearchResults)
714 public int findIndex(SearchResultsI results)
718 while (i < sequences.size())
720 if (results.involvesSequence(getSequenceAt(i)))
730 public int getHeight()
732 return sequences.size();
736 public int getAbsoluteHeight()
738 return sequences.size() + getHiddenSequences().getSize();
742 public int getWidth()
746 for (int i = 0; i < sequences.size(); i++)
748 maxLength = Math.max(maxLength, getSequenceAt(i).getLength());
754 public int getVisibleWidth()
757 if (hiddenCols != null)
759 w -= hiddenCols.getSize();
771 public void setGapCharacter(char gc)
774 synchronized (sequences)
776 for (SequenceI seq : sequences)
778 seq.setSequence(seq.getSequenceAsString().replace('.', gc)
779 .replace('-', gc).replace(' ', gc));
787 * @return DOCUMENT ME!
790 public char getGapCharacter()
798 * @see jalview.datamodel.AlignmentI#isAligned()
801 public boolean isAligned()
803 return isAligned(false);
809 * @see jalview.datamodel.AlignmentI#isAligned(boolean)
812 public boolean isAligned(boolean includeHidden)
814 int width = getWidth();
815 if (hiddenSequences == null || hiddenSequences.getSize() == 0)
817 includeHidden = true; // no hidden sequences to check against.
819 for (int i = 0; i < sequences.size(); i++)
821 if (includeHidden || !hiddenSequences.isHidden(getSequenceAt(i)))
823 if (getSequenceAt(i).getLength() != width)
834 public boolean isHidden(int alignmentIndex)
836 return (getHiddenSequences().getHiddenSequence(alignmentIndex) != null);
840 * Delete all annotations, including auto-calculated if the flag is set true.
841 * Returns true if at least one annotation was deleted, else false.
843 * @param includingAutoCalculated
847 public boolean deleteAllAnnotations(boolean includingAutoCalculated)
849 boolean result = false;
850 for (AlignmentAnnotation alan : getAlignmentAnnotation())
852 if (!alan.autoCalculated || includingAutoCalculated)
854 deleteAnnotation(alan);
864 * @seejalview.datamodel.AlignmentI#deleteAnnotation(jalview.datamodel.
865 * AlignmentAnnotation)
868 public boolean deleteAnnotation(AlignmentAnnotation aa)
870 return deleteAnnotation(aa, true);
874 public boolean deleteAnnotation(AlignmentAnnotation aa, boolean unhook)
878 if (annotations != null)
880 aSize = annotations.length;
888 AlignmentAnnotation[] temp = new AlignmentAnnotation[aSize - 1];
890 boolean swap = false;
893 for (int i = 0; i < aSize; i++)
895 if (annotations[i] == aa)
900 if (tIndex < temp.length)
902 temp[tIndex++] = annotations[i];
911 unhookAnnotation(aa);
918 * remove any object references associated with this annotation
922 private void unhookAnnotation(AlignmentAnnotation aa)
924 if (aa.sequenceRef != null)
926 aa.sequenceRef.removeAlignmentAnnotation(aa);
928 if (aa.groupRef != null)
930 // probably need to do more here in the future (post 2.5.0)
938 * @seejalview.datamodel.AlignmentI#addAnnotation(jalview.datamodel.
939 * AlignmentAnnotation)
942 public void addAnnotation(AlignmentAnnotation aa)
944 addAnnotation(aa, -1);
950 * @seejalview.datamodel.AlignmentI#addAnnotation(jalview.datamodel.
951 * AlignmentAnnotation, int)
954 public void addAnnotation(AlignmentAnnotation aa, int pos)
956 if (aa.getRNAStruc() != null)
958 hasRNAStructure = true;
962 if (annotations != null)
964 aSize = annotations.length + 1;
967 AlignmentAnnotation[] temp = new AlignmentAnnotation[aSize];
969 if (pos == -1 || pos >= aSize)
971 temp[aSize - 1] = aa;
980 for (i = 0; i < (aSize - 1); i++, p++)
988 temp[p] = annotations[i];
997 public void setAnnotationIndex(AlignmentAnnotation aa, int index)
999 if (aa == null || annotations == null || annotations.length - 1 < index)
1004 int aSize = annotations.length;
1005 AlignmentAnnotation[] temp = new AlignmentAnnotation[aSize];
1009 for (int i = 0; i < aSize; i++)
1018 temp[i] = annotations[i];
1022 temp[i] = annotations[i - 1];
1031 * returns all annotation on the alignment
1033 public AlignmentAnnotation[] getAlignmentAnnotation()
1039 public boolean isNucleotide()
1045 public boolean hasRNAStructure()
1047 // TODO can it happen that structure is removed from alignment?
1048 return hasRNAStructure;
1052 public void setDataset(AlignmentI data)
1054 if (dataset == null && data == null)
1056 createDatasetAlignment();
1058 else if (dataset == null && data != null)
1062 throw new IllegalArgumentException("Circular dataset reference");
1064 if (!(data instanceof Alignment))
1067 "Implementation Error: jalview.datamodel.Alignment does not yet support other implementations of AlignmentI as its dataset reference");
1069 dataset = (Alignment) data;
1070 for (int i = 0; i < getHeight(); i++)
1072 SequenceI currentSeq = getSequenceAt(i);
1073 SequenceI dsq = currentSeq.getDatasetSequence();
1076 dsq = currentSeq.createDatasetSequence();
1077 dataset.addSequence(dsq);
1081 while (dsq.getDatasetSequence() != null)
1083 dsq = dsq.getDatasetSequence();
1085 if (dataset.findIndex(dsq) == -1)
1087 dataset.addSequence(dsq);
1092 dataset.addAlignmentRef();
1096 * add dataset sequences to seq for currentSeq and any sequences it references
1098 private void resolveAndAddDatasetSeq(SequenceI currentSeq,
1099 Set<SequenceI> seqs, boolean createDatasetSequence)
1101 SequenceI alignedSeq = currentSeq;
1102 if (currentSeq.getDatasetSequence() != null)
1104 currentSeq = currentSeq.getDatasetSequence();
1108 if (createDatasetSequence)
1110 currentSeq = currentSeq.createDatasetSequence();
1114 List<SequenceI> toProcess = new ArrayList<>();
1115 toProcess.add(currentSeq);
1116 while (toProcess.size() > 0)
1119 SequenceI curDs = toProcess.remove(0);
1121 if (!seqs.add(curDs))
1125 // iterate over database references, making sure we add forward referenced
1127 if (curDs.getDBRefs() != null)
1129 for (DBRefEntry dbr : curDs.getDBRefs())
1131 if (dbr.getMap() != null && dbr.getMap().getTo() != null)
1133 if (dbr.getMap().getTo() == alignedSeq)
1136 * update mapping to be to the newly created dataset sequence
1138 dbr.getMap().setTo(currentSeq);
1140 if (dbr.getMap().getTo().getDatasetSequence() != null)
1142 throw new Error("Implementation error: Map.getTo() for dbref "
1143 + dbr + " from " + curDs.getName()
1144 + " is not a dataset sequence.");
1146 // we recurse to add all forward references to dataset sequences via
1148 toProcess.add(dbr.getMap().getTo());
1156 * Creates a new dataset for this alignment. Can only be done once - if
1157 * dataset is not null this will not be performed.
1159 public void createDatasetAlignment()
1161 if (dataset != null)
1165 // try to avoid using SequenceI.equals at this stage, it will be expensive
1166 Set<SequenceI> seqs = new LinkedIdentityHashSet<>();
1168 for (int i = 0; i < getHeight(); i++)
1170 SequenceI currentSeq = getSequenceAt(i);
1171 resolveAndAddDatasetSeq(currentSeq, seqs, true);
1174 // verify all mappings are in dataset
1175 for (AlignedCodonFrame cf : codonFrameList)
1177 for (SequenceToSequenceMapping ssm : cf.getMappings())
1179 if (!seqs.contains(ssm.getFromSeq()))
1181 resolveAndAddDatasetSeq(ssm.getFromSeq(), seqs, false);
1183 if (!seqs.contains(ssm.getMapping().getTo()))
1185 resolveAndAddDatasetSeq(ssm.getMapping().getTo(), seqs, false);
1189 // finally construct dataset
1190 dataset = new Alignment(seqs.toArray(new SequenceI[seqs.size()]));
1191 // move mappings to the dataset alignment
1192 dataset.codonFrameList = this.codonFrameList;
1193 this.codonFrameList = null;
1197 * reference count for number of alignments referencing this one.
1199 int alignmentRefs = 0;
1202 * increase reference count to this alignment.
1204 private void addAlignmentRef()
1210 public Alignment getDataset()
1216 public boolean padGaps()
1218 boolean modified = false;
1220 // Remove excess gaps from the end of alignment
1224 int nseq = sequences.size();
1225 for (int i = 0; i < nseq; i++)
1227 current = getSequenceAt(i);
1228 for (int j = current.getLength(); j > maxLength; j--)
1231 && !jalview.util.Comparison.isGap(current.getCharAt(j)))
1242 for (int i = 0; i < nseq; i++)
1244 current = getSequenceAt(i);
1245 cLength = current.getLength();
1247 if (cLength < maxLength)
1249 current.insertCharAt(cLength, maxLength - cLength, gapCharacter);
1252 else if (current.getLength() > maxLength)
1254 current.deleteChars(maxLength, current.getLength());
1261 * Justify the sequences to the left or right by deleting and inserting gaps
1262 * before the initial residue or after the terminal residue
1265 * true if alignment padded to right, false to justify to left
1266 * @return true if alignment was changed
1269 public boolean justify(boolean right)
1271 boolean modified = false;
1273 // Remove excess gaps from the end of alignment
1275 int ends[] = new int[sequences.size() * 2];
1277 for (int i = 0; i < sequences.size(); i++)
1279 current = getSequenceAt(i);
1280 // This should really be a sequence method
1281 ends[i * 2] = current.findIndex(current.getStart());
1282 ends[i * 2 + 1] = current
1283 .findIndex(current.getStart() + current.getLength());
1284 boolean hitres = false;
1285 for (int j = 0, rs = 0, ssiz = current.getLength(); j < ssiz; j++)
1287 if (!jalview.util.Comparison.isGap(current.getCharAt(j)))
1296 ends[i * 2 + 1] = j;
1297 if (j - ends[i * 2] > maxLength)
1299 maxLength = j - ends[i * 2];
1307 // now edit the flanking gaps to justify to either left or right
1308 int cLength, extent, diff;
1309 for (int i = 0; i < sequences.size(); i++)
1311 current = getSequenceAt(i);
1313 cLength = 1 + ends[i * 2 + 1] - ends[i * 2];
1314 diff = maxLength - cLength; // number of gaps to indent
1315 extent = current.getLength();
1319 if (extent > ends[i * 2 + 1])
1321 current.deleteChars(ends[i * 2 + 1] + 1, extent);
1324 if (ends[i * 2] > diff)
1326 current.deleteChars(0, ends[i * 2] - diff);
1331 if (ends[i * 2] < diff)
1333 current.insertCharAt(0, diff - ends[i * 2], gapCharacter);
1341 if (ends[i * 2] > 0)
1343 current.deleteChars(0, ends[i * 2]);
1345 ends[i * 2 + 1] -= ends[i * 2];
1346 extent -= ends[i * 2];
1348 if (extent > maxLength)
1350 current.deleteChars(maxLength + 1, extent);
1355 if (extent < maxLength)
1357 current.insertCharAt(extent, maxLength - extent, gapCharacter);
1367 public HiddenSequences getHiddenSequences()
1369 return hiddenSequences;
1373 public HiddenColumns getHiddenColumns()
1379 public CigarArray getCompactAlignment()
1381 synchronized (sequences)
1383 SeqCigar alseqs[] = new SeqCigar[sequences.size()];
1385 for (SequenceI seq : sequences)
1387 alseqs[i++] = new SeqCigar(seq);
1389 CigarArray cal = new CigarArray(alseqs);
1390 cal.addOperation(CigarArray.M, getWidth());
1396 public void setProperty(Object key, Object value)
1398 if (alignmentProperties == null)
1400 alignmentProperties = new Hashtable();
1403 alignmentProperties.put(key, value);
1407 public Object getProperty(Object key)
1409 if (alignmentProperties != null)
1411 return alignmentProperties.get(key);
1420 public Hashtable getProperties()
1422 return alignmentProperties;
1426 * Adds the given mapping to the stored set. Note this may be held on the
1427 * dataset alignment.
1430 public void addCodonFrame(AlignedCodonFrame codons)
1432 List<AlignedCodonFrame> acfs = getCodonFrames();
1433 if (codons != null && acfs != null && !acfs.contains(codons))
1443 * jalview.datamodel.AlignmentI#getCodonFrame(jalview.datamodel.SequenceI)
1446 public List<AlignedCodonFrame> getCodonFrame(SequenceI seq)
1452 List<AlignedCodonFrame> cframes = new ArrayList<>();
1453 for (AlignedCodonFrame acf : getCodonFrames())
1455 if (acf.involvesSequence(seq))
1464 * Sets the codon frame mappings (replacing any existing mappings). Note the
1465 * mappings are set on the dataset alignment instead if there is one.
1467 * @see jalview.datamodel.AlignmentI#setCodonFrames()
1470 public void setCodonFrames(List<AlignedCodonFrame> acfs)
1472 if (dataset != null)
1474 dataset.setCodonFrames(acfs);
1478 this.codonFrameList = acfs;
1483 * Returns the set of codon frame mappings. Any changes to the returned set
1484 * will affect the alignment. The mappings are held on (and read from) the
1485 * dataset alignment if there is one.
1487 * @see jalview.datamodel.AlignmentI#getCodonFrames()
1490 public List<AlignedCodonFrame> getCodonFrames()
1492 // TODO: Fix this method to fix failing AlignedCodonFrame tests
1493 // this behaviour is currently incorrect. method should return codon frames
1494 // for just the alignment,
1495 // selected from dataset
1496 return dataset != null ? dataset.getCodonFrames() : codonFrameList;
1500 * Removes the given mapping from the stored set. Note that the mappings are
1501 * held on the dataset alignment if there is one.
1504 public boolean removeCodonFrame(AlignedCodonFrame codons)
1506 List<AlignedCodonFrame> acfs = getCodonFrames();
1507 if (codons == null || acfs == null)
1511 return acfs.remove(codons);
1515 public void append(AlignmentI toappend)
1517 // TODO JAL-1270 needs test coverage
1518 // currently tested for use in jalview.gui.SequenceFetcher
1519 char oldc = toappend.getGapCharacter();
1520 boolean samegap = oldc == getGapCharacter();
1521 boolean hashidden = toappend.getHiddenSequences() != null
1522 && toappend.getHiddenSequences().hiddenSequences != null;
1523 // get all sequences including any hidden ones
1524 List<SequenceI> sqs = (hashidden)
1525 ? toappend.getHiddenSequences().getFullAlignment()
1527 : toappend.getSequences();
1530 // avoid self append deadlock by
1531 List<SequenceI> toappendsq = new ArrayList<>();
1534 for (SequenceI addedsq : sqs)
1538 addedsq.replace(oldc, gapCharacter);
1540 toappendsq.add(addedsq);
1543 for (SequenceI addedsq : toappendsq)
1545 addSequence(addedsq);
1548 AlignmentAnnotation[] alan = toappend.getAlignmentAnnotation();
1549 for (int a = 0; alan != null && a < alan.length; a++)
1551 addAnnotation(alan[a]);
1555 getCodonFrames().addAll(toappend.getCodonFrames());
1557 List<SequenceGroup> sg = toappend.getGroups();
1560 for (SequenceGroup _sg : sg)
1565 if (toappend.getHiddenSequences() != null)
1567 HiddenSequences hs = toappend.getHiddenSequences();
1568 if (hiddenSequences == null)
1570 hiddenSequences = new HiddenSequences(this);
1572 if (hs.hiddenSequences != null)
1574 for (int s = 0; s < hs.hiddenSequences.length; s++)
1576 // hide the newly appended sequence in the alignment
1577 if (hs.hiddenSequences[s] != null)
1579 hiddenSequences.hideSequence(hs.hiddenSequences[s]);
1584 if (toappend.getProperties() != null)
1586 // we really can't do very much here - just try to concatenate strings
1587 // where property collisions occur.
1588 Enumeration key = toappend.getProperties().keys();
1589 while (key.hasMoreElements())
1591 Object k = key.nextElement();
1592 Object ourval = this.getProperty(k);
1593 Object toapprop = toappend.getProperty(k);
1596 if (ourval.getClass().equals(toapprop.getClass())
1597 && !ourval.equals(toapprop))
1599 if (ourval instanceof String)
1603 ((String) ourval) + "; " + ((String) toapprop));
1607 if (ourval instanceof Vector)
1610 Enumeration theirv = ((Vector) toapprop).elements();
1611 while (theirv.hasMoreElements())
1613 ((Vector) ourval).addElement(theirv);
1621 // just add new property directly
1622 setProperty(k, toapprop);
1630 public AlignmentAnnotation findOrCreateAnnotation(String name,
1631 String calcId, boolean autoCalc, SequenceI seqRef,
1632 SequenceGroup groupRef)
1634 AlignmentAnnotation annot = annotations == null ? null
1635 : AlignmentAnnotation.findFirstAnnotation(
1636 Arrays.asList(getAlignmentAnnotation()), name, calcId,
1637 autoCalc, seqRef, groupRef);
1642 annot = new AlignmentAnnotation(name, name, new Annotation[1], 0f, 0f,
1643 AlignmentAnnotation.BAR_GRAPH);
1644 annot.hasText = false;
1647 annot.setCalcId(calcId);
1649 annot.autoCalculated = autoCalc;
1652 annot.setSequenceRef(seqRef);
1654 annot.groupRef = groupRef;
1655 addAnnotation(annot);
1662 public AlignmentAnnotation updateFromOrCopyAnnotation(
1663 AlignmentAnnotation ala)
1665 AlignmentAnnotation annot = AlignmentAnnotation.findFirstAnnotation(
1666 Arrays.asList(getAlignmentAnnotation()), ala.label, ala.calcId,
1667 ala.autoCalculated, ala.sequenceRef, ala.groupRef);
1670 annot = new AlignmentAnnotation(ala);
1671 addAnnotation(annot);
1675 annot.updateAlignmentAnnotationFrom(ala);
1677 validateAnnotation(annot);
1682 public Iterable<AlignmentAnnotation> findAnnotation(String calcId)
1684 AlignmentAnnotation[] alignmentAnnotation = getAlignmentAnnotation();
1685 if (alignmentAnnotation != null)
1687 return AlignmentAnnotation.findAnnotation(
1688 Arrays.asList(getAlignmentAnnotation()), calcId);
1690 return Arrays.asList(new AlignmentAnnotation[] {});
1694 public Iterable<AlignmentAnnotation> findAnnotations(SequenceI seq,
1695 String calcId, String label)
1697 return AlignmentAnnotation.findAnnotations(
1698 Arrays.asList(getAlignmentAnnotation()), seq, calcId, label);
1702 public void moveSelectedSequencesByOne(SequenceGroup sg,
1703 Map<SequenceI, SequenceCollectionI> map, boolean up)
1705 synchronized (sequences)
1710 for (int i = 1, iSize = sequences.size(); i < iSize; i++)
1712 SequenceI seq = sequences.get(i);
1713 if (!sg.getSequences(map).contains(seq))
1718 SequenceI temp = sequences.get(i - 1);
1719 if (sg.getSequences(null).contains(temp))
1724 sequences.set(i, temp);
1725 sequences.set(i - 1, seq);
1730 for (int i = sequences.size() - 2; i > -1; i--)
1732 SequenceI seq = sequences.get(i);
1733 if (!sg.getSequences(map).contains(seq))
1738 SequenceI temp = sequences.get(i + 1);
1739 if (sg.getSequences(map).contains(temp))
1744 sequences.set(i, temp);
1745 sequences.set(i + 1, seq);
1753 public void validateAnnotation(AlignmentAnnotation alignmentAnnotation)
1755 alignmentAnnotation.validateRangeAndDisplay();
1756 if (isNucleotide() && alignmentAnnotation.isValidStruc())
1758 hasRNAStructure = true;
1762 private SequenceI seqrep = null;
1766 * @return the representative sequence for this group
1769 public SequenceI getSeqrep()
1775 * set the representative sequence for this group. Note - this affects the
1776 * interpretation of the Hidereps attribute.
1779 * the seqrep to set (null means no sequence representative)
1782 public void setSeqrep(SequenceI seqrep)
1784 this.seqrep = seqrep;
1789 * @return true if group has a sequence representative
1792 public boolean hasSeqrep()
1794 return seqrep != null;
1798 public int getEndRes()
1800 return getWidth() - 1;
1804 public int getStartRes()
1810 * In the case of AlignmentI - returns the dataset for the alignment, if set
1813 * @see jalview.datamodel.AnnotatedCollectionI#getContext()
1816 public AnnotatedCollectionI getContext()
1822 * Align this alignment like the given (mapped) one.
1825 public int alignAs(AlignmentI al)
1828 * Currently retains unmapped gaps (in introns), regaps mapped regions
1831 return alignAs(al, false, true);
1835 * Align this alignment 'the same as' the given one. Mapped sequences only are
1836 * realigned. If both of the same type (nucleotide/protein) then align both
1837 * identically. If this is nucleotide and the other is protein, make 3 gaps
1838 * for each gap in the protein sequences. If this is protein and the other is
1839 * nucleotide, insert a gap for each 3 gaps (or part thereof) between
1840 * nucleotide bases. If this is protein and the other is nucleotide, gaps
1841 * protein to match the relative ordering of codons in the nucleotide.
1843 * Parameters control whether gaps in exon (mapped) and intron (unmapped)
1844 * regions are preserved. Gaps that connect introns to exons are treated
1845 * conservatively, i.e. only preserved if both intron and exon gaps are
1846 * preserved. TODO: check caveats below where the implementation fails
1849 * - must have same dataset, and sequences in al must have equivalent
1850 * dataset sequence and start/end bounds under given mapping
1851 * @param preserveMappedGaps
1852 * if true, gaps within and between mapped codons are preserved
1853 * @param preserveUnmappedGaps
1854 * if true, gaps within and between unmapped codons are preserved
1857 public int alignAs(AlignmentI al, boolean preserveMappedGaps,
1858 boolean preserveUnmappedGaps)
1860 // TODO should this method signature be the one in the interface?
1861 // JBPComment - yes - neither flag is used, so should be deleted.
1862 boolean thisIsNucleotide = this.isNucleotide();
1863 boolean thatIsProtein = !al.isNucleotide();
1864 if (!thatIsProtein && !thisIsNucleotide)
1866 return AlignmentUtils.alignProteinAsDna(this, al);
1868 else if (thatIsProtein && thisIsNucleotide)
1870 return AlignmentUtils.alignCdsAsProtein(this, al);
1872 return AlignmentUtils.alignAs(this, al);
1876 * Returns the alignment in Fasta format. Behaviour of this method is not
1877 * guaranteed between versions.
1880 public String toString()
1882 return new FastaFile().print(getSequencesArray(), true);
1886 * Returns the set of distinct sequence names. No ordering is guaranteed.
1889 public Set<String> getSequenceNames()
1891 Set<String> names = new HashSet<>();
1892 for (SequenceI seq : getSequences())
1894 names.add(seq.getName());
1900 public boolean hasValidSequence()
1902 boolean hasValidSeq = false;
1903 for (SequenceI seq : getSequences())
1905 if ((seq.getEnd() - seq.getStart()) > 0)
1915 * Update any mappings to 'virtual' sequences to compatible real ones, if
1916 * present in the added sequences. Returns a count of mappings updated.
1922 public int realiseMappings(List<SequenceI> seqs)
1925 for (SequenceI seq : seqs)
1927 for (AlignedCodonFrame mapping : getCodonFrames())
1929 count += mapping.realiseWith(seq);
1936 * Returns the first AlignedCodonFrame that has a mapping between the given
1944 public AlignedCodonFrame getMapping(SequenceI mapFrom, SequenceI mapTo)
1946 for (AlignedCodonFrame acf : getCodonFrames())
1948 if (acf.getAaForDnaSeq(mapFrom) == mapTo)
1957 public boolean setHiddenColumns(HiddenColumns cols)
1959 boolean changed = cols == null ? hiddenCols != null
1960 : !cols.equals(hiddenCols);
1966 public void setupJPredAlignment()
1968 SequenceI repseq = getSequenceAt(0);
1970 HiddenColumns cs = new HiddenColumns();
1971 cs.hideList(repseq.getInsertions());
1972 setHiddenColumns(cs);
1976 public HiddenColumns propagateInsertions(SequenceI profileseq,
1977 AlignmentView input)
1981 char gc = getGapCharacter();
1982 Object[] alandhidden = input.getAlignmentAndHiddenColumns(gc);
1983 HiddenColumns nview = (HiddenColumns) alandhidden[1];
1984 SequenceI origseq = ((SequenceI[]) alandhidden[0])[profsqpos];
1985 return propagateInsertions(profileseq, origseq, nview);
1991 * sequence in al which corresponds to origseq
1993 * alignment which is to have gaps inserted into it
1995 * sequence corresponding to profileseq which defines gap map for
1998 private HiddenColumns propagateInsertions(SequenceI profileseq,
1999 SequenceI origseq, HiddenColumns hc)
2001 // take the set of hidden columns, and the set of gaps in origseq,
2002 // and remove all the hidden gaps from hiddenColumns
2004 // first get the gaps as a Bitset
2005 // then calculate hidden ^ not(gap)
2006 BitSet gaps = origseq.gapBitset();
2009 // for each sequence in the alignment, except the profile sequence,
2010 // insert gaps corresponding to each hidden region but where each hidden
2011 // column region is shifted backwards by the number of preceding visible
2012 // gaps update hidden columns at the same time
2013 HiddenColumns newhidden = new HiddenColumns();
2015 int numGapsBefore = 0;
2016 int gapPosition = 0;
2017 Iterator<int[]> it = hc.iterator();
2018 while (it.hasNext())
2020 int[] region = it.next();
2022 // get region coordinates accounting for gaps
2023 // we can rely on gaps not being *in* hidden regions because we already
2025 while (gapPosition < region[0])
2028 if (gaps.get(gapPosition))
2034 int left = region[0] - numGapsBefore;
2035 int right = region[1] - numGapsBefore;
2037 newhidden.hideColumns(left, right);
2038 padGaps(left, right, profileseq);
2044 * Pad gaps in all sequences in alignment except profileseq
2047 * position of first gap to insert
2049 * position of last gap to insert
2051 * sequence not to pad
2053 private void padGaps(int left, int right, SequenceI profileseq)
2055 char gc = getGapCharacter();
2057 // make a string with number of gaps = length of hidden region
2058 StringBuilder sb = new StringBuilder();
2059 for (int g = 0; g < right - left + 1; g++)
2064 // loop over the sequences and pad with gaps where required
2065 for (int s = 0, ns = getHeight(); s < ns; s++)
2067 SequenceI sqobj = getSequenceAt(s);
2068 if ((sqobj != profileseq) && (sqobj.getLength() >= left))
2070 String sq = sqobj.getSequenceAsString();
2072 sq.substring(0, left) + sb.toString() + sq.substring(left));
2078 public List<SequenceI> getHmmSequences()
2080 List<SequenceI> result = new ArrayList<>();
2081 for (int i = 0; i < sequences.size(); i++)
2083 SequenceI seq = sequences.get(i);
2084 if (seq.hasHMMProfile())