2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.datamodel;
23 import jalview.analysis.AlignmentUtils;
24 import jalview.io.FastaFile;
25 import jalview.util.Comparison;
26 import jalview.util.LinkedIdentityHashSet;
27 import jalview.util.MessageManager;
29 import java.util.ArrayList;
30 import java.util.Arrays;
31 import java.util.BitSet;
32 import java.util.Collections;
33 import java.util.Enumeration;
34 import java.util.HashSet;
35 import java.util.Hashtable;
36 import java.util.Iterator;
37 import java.util.List;
40 import java.util.Vector;
43 * Data structure to hold and manipulate a multiple sequence alignment
49 public class Alignment implements AlignmentI
51 private Alignment dataset;
53 private List<SequenceI> sequences;
55 protected List<SequenceGroup> groups;
57 protected char gapCharacter = '-';
59 private boolean nucleotide = true;
61 public boolean hasRNAStructure = false;
63 public AlignmentAnnotation[] annotations;
65 HiddenSequences hiddenSequences;
67 HiddenColumns hiddenCols;
69 public Hashtable alignmentProperties;
71 private List<AlignedCodonFrame> codonFrameList;
73 private void initAlignment(SequenceI[] seqs)
75 groups = Collections.synchronizedList(new ArrayList<SequenceGroup>());
76 hiddenSequences = new HiddenSequences(this);
77 hiddenCols = new HiddenColumns();
78 codonFrameList = new ArrayList<>();
80 nucleotide = Comparison.isNucleotide(seqs);
82 sequences = Collections.synchronizedList(new ArrayList<SequenceI>());
84 for (int i = 0; i < seqs.length; i++)
86 sequences.add(seqs[i]);
92 * Make a 'copy' alignment - sequences have new copies of features and
93 * annotations, but share the original dataset sequences.
95 public Alignment(AlignmentI al)
97 SequenceI[] seqs = al.getSequencesArray();
98 for (int i = 0; i < seqs.length; i++)
100 seqs[i] = new Sequence(seqs[i]);
106 * Share the same dataset sequence mappings (if any).
108 if (dataset == null && al.getDataset() == null)
110 this.setCodonFrames(al.getCodonFrames());
115 * Make an alignment from an array of Sequences.
119 public Alignment(SequenceI[] seqs)
125 * Make a new alignment from an array of SeqCigars
130 public Alignment(SeqCigar[] alseqs)
132 SequenceI[] seqs = SeqCigar.createAlignmentSequences(alseqs,
133 gapCharacter, new HiddenColumns(), null);
138 * Make a new alignment from an CigarArray JBPNote - can only do this when
139 * compactAlignment does not contain hidden regions. JBPNote - must also check
140 * that compactAlignment resolves to a set of SeqCigars - or construct them
143 * @param compactAlignment
146 public static AlignmentI createAlignment(CigarArray compactAlignment)
148 throw new Error(MessageManager
149 .getString("error.alignment_cigararray_not_implemented"));
150 // this(compactAlignment.refCigars);
154 public List<SequenceI> getSequences()
160 public List<SequenceI> getSequences(
161 Map<SequenceI, SequenceCollectionI> hiddenReps)
163 // TODO: in jalview 2.8 we don't do anything with hiddenreps - fix design to
169 public SequenceI[] getSequencesArray()
171 if (sequences == null)
175 synchronized (sequences)
177 return sequences.toArray(new SequenceI[sequences.size()]);
182 * Returns a map of lists of sequences keyed by sequence name.
187 public Map<String, List<SequenceI>> getSequencesByName()
189 return AlignmentUtils.getSequencesByName(this);
193 public SequenceI getSequenceAt(int i)
195 synchronized (sequences)
197 if (i > -1 && i < sequences.size())
199 return sequences.get(i);
207 public SequenceI getSequenceAtAbsoluteIndex(int i)
209 SequenceI seq = null;
210 if (getHiddenSequences().getSize() > 0)
212 seq = getHiddenSequences().getHiddenSequence(i);
215 // didn't find the sequence in the hidden sequences, get it from the
217 int index = getHiddenSequences().findIndexWithoutHiddenSeqs(i);
218 seq = getSequenceAt(index);
223 seq = getSequenceAt(i);
229 * Adds a sequence to the alignment. Recalculates maxLength and size. Note
230 * this currently does not recalculate whether or not the alignment is
231 * nucleotide, so mixed alignments may have undefined behaviour.
236 public void addSequence(SequenceI snew)
241 // maintain dataset integrity
242 SequenceI dsseq = snew.getDatasetSequence();
245 // derive new sequence
246 SequenceI adding = snew.deriveSequence();
248 dsseq = snew.getDatasetSequence();
250 if (getDataset().findIndex(dsseq) == -1)
252 getDataset().addSequence(dsseq);
256 if (sequences == null)
258 initAlignment(new SequenceI[] { snew });
262 synchronized (sequences)
267 if (hiddenSequences != null)
269 hiddenSequences.adjustHeightSequenceAdded();
274 public SequenceI replaceSequenceAt(int i, SequenceI snew)
276 synchronized (sequences)
278 if (sequences.size() > i)
280 return sequences.set(i, snew);
286 hiddenSequences.adjustHeightSequenceAdded();
295 * @return DOCUMENT ME!
298 public List<SequenceGroup> getGroups()
304 public void finalize() throws Throwable
306 if (getDataset() != null)
308 getDataset().removeAlignmentRef();
316 * Defensively nulls out references in case this object is not garbage
319 void nullReferences()
325 hiddenSequences = null;
329 * decrement the alignmentRefs counter by one and null references if it goes
334 private void removeAlignmentRef() throws Throwable
336 if (--alignmentRefs == 0)
343 public void deleteSequence(SequenceI s)
345 synchronized (sequences)
347 deleteSequence(findIndex(s));
352 public void deleteSequence(int i)
354 synchronized (sequences)
356 if (i > -1 && i < getHeight())
359 hiddenSequences.adjustHeightSequenceDeleted(i);
365 public void deleteHiddenSequence(int i)
367 synchronized (sequences)
369 if (i > -1 && i < getHeight())
379 * @see jalview.datamodel.AlignmentI#findGroup(jalview.datamodel.SequenceI)
382 public SequenceGroup findGroup(SequenceI seq, int position)
384 synchronized (groups)
386 for (SequenceGroup sg : groups)
388 if (sg.getSequences(null).contains(seq))
390 if (position >= sg.getStartRes() && position <= sg.getEndRes())
404 * jalview.datamodel.AlignmentI#findAllGroups(jalview.datamodel.SequenceI)
407 public SequenceGroup[] findAllGroups(SequenceI s)
409 ArrayList<SequenceGroup> temp = new ArrayList<>();
411 synchronized (groups)
413 int gSize = groups.size();
414 for (int i = 0; i < gSize; i++)
416 SequenceGroup sg = groups.get(i);
417 if (sg == null || sg.getSequences() == null)
419 this.deleteGroup(sg);
424 if (sg.getSequences().contains(s))
430 SequenceGroup[] ret = new SequenceGroup[temp.size()];
431 return temp.toArray(ret);
436 public void addGroup(SequenceGroup sg)
438 synchronized (groups)
440 if (!groups.contains(sg))
442 if (hiddenSequences.getSize() > 0)
444 int i, iSize = sg.getSize();
445 for (i = 0; i < iSize; i++)
447 if (!sequences.contains(sg.getSequenceAt(i)))
449 sg.deleteSequence(sg.getSequenceAt(i), false);
455 if (sg.getSize() < 1)
460 sg.setContext(this, true);
467 * remove any annotation that references gp
470 * (if null, removes all group associated annotation)
472 private void removeAnnotationForGroup(SequenceGroup gp)
474 if (annotations == null || annotations.length == 0)
478 // remove annotation very quickly
479 AlignmentAnnotation[] t,
480 todelete = new AlignmentAnnotation[annotations.length],
481 tokeep = new AlignmentAnnotation[annotations.length];
485 for (i = 0, p = 0, k = 0; i < annotations.length; i++)
487 if (annotations[i].groupRef != null)
489 todelete[p++] = annotations[i];
493 tokeep[k++] = annotations[i];
499 for (i = 0, p = 0, k = 0; i < annotations.length; i++)
501 if (annotations[i].groupRef == gp)
503 todelete[p++] = annotations[i];
507 tokeep[k++] = annotations[i];
513 // clear out the group associated annotation.
514 for (i = 0; i < p; i++)
516 unhookAnnotation(todelete[i]);
519 t = new AlignmentAnnotation[k];
520 for (i = 0; i < k; i++)
529 public void deleteAllGroups()
531 synchronized (groups)
533 if (annotations != null)
535 removeAnnotationForGroup(null);
537 for (SequenceGroup sg : groups)
539 sg.setContext(null, false);
547 public void deleteGroup(SequenceGroup g)
549 synchronized (groups)
551 if (groups.contains(g))
553 removeAnnotationForGroup(g);
555 g.setContext(null, false);
562 public SequenceI findName(String name)
564 return findName(name, false);
570 * @see jalview.datamodel.AlignmentI#findName(java.lang.String, boolean)
573 public SequenceI findName(String token, boolean b)
575 return findName(null, token, b);
581 * @see jalview.datamodel.AlignmentI#findName(SequenceI, java.lang.String,
585 public SequenceI findName(SequenceI startAfter, String token, boolean b)
590 String sqname = null;
591 if (startAfter != null)
593 // try to find the sequence in the alignment
594 boolean matched = false;
595 while (i < sequences.size())
597 if (getSequenceAt(i++) == startAfter)
608 while (i < sequences.size())
610 sq = getSequenceAt(i);
611 sqname = sq.getName();
612 if (sqname.equals(token) // exact match
613 || (b && // allow imperfect matches - case varies
614 (sqname.equalsIgnoreCase(token))))
616 return getSequenceAt(i);
626 public SequenceI[] findSequenceMatch(String name)
628 Vector matches = new Vector();
631 while (i < sequences.size())
633 if (getSequenceAt(i).getName().equals(name))
635 matches.addElement(getSequenceAt(i));
640 SequenceI[] result = new SequenceI[matches.size()];
641 for (i = 0; i < result.length; i++)
643 result[i] = (SequenceI) matches.elementAt(i);
653 * @see jalview.datamodel.AlignmentI#findIndex(jalview.datamodel.SequenceI)
656 public int findIndex(SequenceI s)
660 while (i < sequences.size())
662 if (s == getSequenceAt(i))
677 * jalview.datamodel.AlignmentI#findIndex(jalview.datamodel.SearchResults)
680 public int findIndex(SearchResultsI results)
684 while (i < sequences.size())
686 if (results.involvesSequence(getSequenceAt(i)))
696 public int getHeight()
698 return sequences.size();
702 public int getAbsoluteHeight()
704 return sequences.size() + getHiddenSequences().getSize();
708 public int getWidth()
712 for (int i = 0; i < sequences.size(); i++)
714 if (getSequenceAt(i).getLength() > maxLength)
716 maxLength = getSequenceAt(i).getLength();
724 public int getWidth()
726 final Wrapper temp = new Wrapper();
728 forEachSequence(new Consumer<SequenceI>()
731 public void accept(SequenceI s)
733 if (s.getLength() > temp.inner)
735 temp.inner = s.getLength();
738 }, 0, sequences.size() - 1);
743 public static class Wrapper
755 public void setGapCharacter(char gc)
758 synchronized (sequences)
760 for (SequenceI seq : sequences)
762 seq.setSequence(seq.getSequenceAsString().replace('.', gc)
763 .replace('-', gc).replace(' ', gc));
771 * @return DOCUMENT ME!
774 public char getGapCharacter()
782 * @see jalview.datamodel.AlignmentI#isAligned()
785 public boolean isAligned()
787 return isAligned(false);
793 * @see jalview.datamodel.AlignmentI#isAligned(boolean)
796 public boolean isAligned(boolean includeHidden)
798 int width = getWidth();
799 if (hiddenSequences == null || hiddenSequences.getSize() == 0)
801 includeHidden = true; // no hidden sequences to check against.
803 for (int i = 0; i < sequences.size(); i++)
805 if (includeHidden || !hiddenSequences.isHidden(getSequenceAt(i)))
807 if (getSequenceAt(i).getLength() != width)
818 public boolean isHidden(int alignmentIndex)
820 return (getHiddenSequences().getHiddenSequence(alignmentIndex) != null);
824 * Delete all annotations, including auto-calculated if the flag is set true.
825 * Returns true if at least one annotation was deleted, else false.
827 * @param includingAutoCalculated
831 public boolean deleteAllAnnotations(boolean includingAutoCalculated)
833 boolean result = false;
834 for (AlignmentAnnotation alan : getAlignmentAnnotation())
836 if (!alan.autoCalculated || includingAutoCalculated)
838 deleteAnnotation(alan);
848 * @seejalview.datamodel.AlignmentI#deleteAnnotation(jalview.datamodel.
849 * AlignmentAnnotation)
852 public boolean deleteAnnotation(AlignmentAnnotation aa)
854 return deleteAnnotation(aa, true);
858 public boolean deleteAnnotation(AlignmentAnnotation aa, boolean unhook)
862 if (annotations != null)
864 aSize = annotations.length;
872 AlignmentAnnotation[] temp = new AlignmentAnnotation[aSize - 1];
874 boolean swap = false;
877 for (int i = 0; i < aSize; i++)
879 if (annotations[i] == aa)
884 if (tIndex < temp.length)
886 temp[tIndex++] = annotations[i];
895 unhookAnnotation(aa);
902 * remove any object references associated with this annotation
906 private void unhookAnnotation(AlignmentAnnotation aa)
908 if (aa.sequenceRef != null)
910 aa.sequenceRef.removeAlignmentAnnotation(aa);
912 if (aa.groupRef != null)
914 // probably need to do more here in the future (post 2.5.0)
922 * @seejalview.datamodel.AlignmentI#addAnnotation(jalview.datamodel.
923 * AlignmentAnnotation)
926 public void addAnnotation(AlignmentAnnotation aa)
928 addAnnotation(aa, -1);
934 * @seejalview.datamodel.AlignmentI#addAnnotation(jalview.datamodel.
935 * AlignmentAnnotation, int)
938 public void addAnnotation(AlignmentAnnotation aa, int pos)
940 if (aa.getRNAStruc() != null)
942 hasRNAStructure = true;
946 if (annotations != null)
948 aSize = annotations.length + 1;
951 AlignmentAnnotation[] temp = new AlignmentAnnotation[aSize];
953 if (pos == -1 || pos >= aSize)
955 temp[aSize - 1] = aa;
964 for (i = 0; i < (aSize - 1); i++, p++)
972 temp[p] = annotations[i];
981 public void setAnnotationIndex(AlignmentAnnotation aa, int index)
983 if (aa == null || annotations == null || annotations.length - 1 < index)
988 int aSize = annotations.length;
989 AlignmentAnnotation[] temp = new AlignmentAnnotation[aSize];
993 for (int i = 0; i < aSize; i++)
1002 temp[i] = annotations[i];
1006 temp[i] = annotations[i - 1];
1015 * returns all annotation on the alignment
1017 public AlignmentAnnotation[] getAlignmentAnnotation()
1023 public boolean isNucleotide()
1029 public boolean hasRNAStructure()
1031 // TODO can it happen that structure is removed from alignment?
1032 return hasRNAStructure;
1036 public void setDataset(AlignmentI data)
1038 if (dataset == null && data == null)
1040 createDatasetAlignment();
1042 else if (dataset == null && data != null)
1046 throw new IllegalArgumentException("Circular dataset reference");
1048 if (!(data instanceof Alignment))
1051 "Implementation Error: jalview.datamodel.Alignment does not yet support other implementations of AlignmentI as its dataset reference");
1053 dataset = (Alignment) data;
1054 for (int i = 0; i < getHeight(); i++)
1056 SequenceI currentSeq = getSequenceAt(i);
1057 SequenceI dsq = currentSeq.getDatasetSequence();
1060 dsq = currentSeq.createDatasetSequence();
1061 dataset.addSequence(dsq);
1065 while (dsq.getDatasetSequence() != null)
1067 dsq = dsq.getDatasetSequence();
1069 if (dataset.findIndex(dsq) == -1)
1071 dataset.addSequence(dsq);
1076 dataset.addAlignmentRef();
1080 * add dataset sequences to seq for currentSeq and any sequences it references
1082 private void resolveAndAddDatasetSeq(SequenceI currentSeq,
1083 Set<SequenceI> seqs, boolean createDatasetSequence)
1085 SequenceI alignedSeq = currentSeq;
1086 if (currentSeq.getDatasetSequence() != null)
1088 currentSeq = currentSeq.getDatasetSequence();
1092 if (createDatasetSequence)
1094 currentSeq = currentSeq.createDatasetSequence();
1098 List<SequenceI> toProcess = new ArrayList<>();
1099 toProcess.add(currentSeq);
1100 while (toProcess.size() > 0)
1103 SequenceI curDs = toProcess.remove(0);
1105 if (!seqs.add(curDs))
1109 // iterate over database references, making sure we add forward referenced
1111 if (curDs.getDBRefs() != null)
1113 for (DBRefEntry dbr : curDs.getDBRefs())
1115 if (dbr.getMap() != null && dbr.getMap().getTo() != null)
1117 if (dbr.getMap().getTo() == alignedSeq)
1120 * update mapping to be to the newly created dataset sequence
1122 dbr.getMap().setTo(currentSeq);
1124 if (dbr.getMap().getTo().getDatasetSequence() != null)
1126 throw new Error("Implementation error: Map.getTo() for dbref "
1127 + dbr + " from " + curDs.getName()
1128 + " is not a dataset sequence.");
1130 // we recurse to add all forward references to dataset sequences via
1132 toProcess.add(dbr.getMap().getTo());
1140 * Creates a new dataset for this alignment. Can only be done once - if
1141 * dataset is not null this will not be performed.
1143 public void createDatasetAlignment()
1145 if (dataset != null)
1149 // try to avoid using SequenceI.equals at this stage, it will be expensive
1150 Set<SequenceI> seqs = new LinkedIdentityHashSet<>();
1152 for (int i = 0; i < getHeight(); i++)
1154 SequenceI currentSeq = getSequenceAt(i);
1155 resolveAndAddDatasetSeq(currentSeq, seqs, true);
1158 // verify all mappings are in dataset
1159 for (AlignedCodonFrame cf : codonFrameList)
1161 for (SequenceMapping ssm : cf.getMappings())
1163 if (!seqs.contains(ssm.getFromSeq()))
1165 resolveAndAddDatasetSeq(ssm.getFromSeq(), seqs, false);
1167 if (!seqs.contains(ssm.getMapping().getTo()))
1169 resolveAndAddDatasetSeq(ssm.getMapping().getTo(), seqs, false);
1173 // finally construct dataset
1174 dataset = new Alignment(seqs.toArray(new SequenceI[seqs.size()]));
1175 // move mappings to the dataset alignment
1176 dataset.codonFrameList = this.codonFrameList;
1177 this.codonFrameList = null;
1181 * reference count for number of alignments referencing this one.
1183 int alignmentRefs = 0;
1186 * increase reference count to this alignment.
1188 private void addAlignmentRef()
1194 public Alignment getDataset()
1200 public boolean padGaps()
1202 boolean modified = false;
1204 // Remove excess gaps from the end of alignment
1208 for (int i = 0; i < sequences.size(); i++)
1210 current = getSequenceAt(i);
1211 for (int j = current.getLength(); j > maxLength; j--)
1214 && !jalview.util.Comparison.isGap(current.getCharAt(j)))
1225 for (int i = 0; i < sequences.size(); i++)
1227 current = getSequenceAt(i);
1228 cLength = current.getLength();
1230 if (cLength < maxLength)
1232 current.insertCharAt(cLength, maxLength - cLength, gapCharacter);
1235 else if (current.getLength() > maxLength)
1237 current.deleteChars(maxLength, current.getLength());
1244 * Justify the sequences to the left or right by deleting and inserting gaps
1245 * before the initial residue or after the terminal residue
1248 * true if alignment padded to right, false to justify to left
1249 * @return true if alignment was changed
1252 public boolean justify(boolean right)
1254 boolean modified = false;
1256 // Remove excess gaps from the end of alignment
1258 int ends[] = new int[sequences.size() * 2];
1260 for (int i = 0; i < sequences.size(); i++)
1262 current = getSequenceAt(i);
1263 // This should really be a sequence method
1264 ends[i * 2] = current.findIndex(current.getStart());
1265 ends[i * 2 + 1] = current
1266 .findIndex(current.getStart() + current.getLength());
1267 boolean hitres = false;
1268 for (int j = 0, rs = 0, ssiz = current.getLength(); j < ssiz; j++)
1270 if (!jalview.util.Comparison.isGap(current.getCharAt(j)))
1279 ends[i * 2 + 1] = j;
1280 if (j - ends[i * 2] > maxLength)
1282 maxLength = j - ends[i * 2];
1290 // now edit the flanking gaps to justify to either left or right
1291 int cLength, extent, diff;
1292 for (int i = 0; i < sequences.size(); i++)
1294 current = getSequenceAt(i);
1296 cLength = 1 + ends[i * 2 + 1] - ends[i * 2];
1297 diff = maxLength - cLength; // number of gaps to indent
1298 extent = current.getLength();
1302 if (extent > ends[i * 2 + 1])
1304 current.deleteChars(ends[i * 2 + 1] + 1, extent);
1307 if (ends[i * 2] > diff)
1309 current.deleteChars(0, ends[i * 2] - diff);
1314 if (ends[i * 2] < diff)
1316 current.insertCharAt(0, diff - ends[i * 2], gapCharacter);
1324 if (ends[i * 2] > 0)
1326 current.deleteChars(0, ends[i * 2]);
1328 ends[i * 2 + 1] -= ends[i * 2];
1329 extent -= ends[i * 2];
1331 if (extent > maxLength)
1333 current.deleteChars(maxLength + 1, extent);
1338 if (extent < maxLength)
1340 current.insertCharAt(extent, maxLength - extent, gapCharacter);
1350 public HiddenSequences getHiddenSequences()
1352 return hiddenSequences;
1356 public HiddenColumns getHiddenColumns()
1362 public CigarArray getCompactAlignment()
1364 synchronized (sequences)
1366 SeqCigar alseqs[] = new SeqCigar[sequences.size()];
1368 for (SequenceI seq : sequences)
1370 alseqs[i++] = new SeqCigar(seq);
1372 CigarArray cal = new CigarArray(alseqs);
1373 cal.addOperation(CigarArray.M, getWidth());
1379 public void setProperty(Object key, Object value)
1381 if (alignmentProperties == null)
1383 alignmentProperties = new Hashtable();
1386 alignmentProperties.put(key, value);
1390 public Object getProperty(Object key)
1392 if (alignmentProperties != null)
1394 return alignmentProperties.get(key);
1403 public Hashtable getProperties()
1405 return alignmentProperties;
1409 * Adds the given mapping to the stored set. Note this may be held on the
1410 * dataset alignment.
1413 public void addCodonFrame(AlignedCodonFrame codons)
1415 List<AlignedCodonFrame> acfs = getCodonFrames();
1416 if (codons != null && acfs != null && !acfs.contains(codons))
1426 * jalview.datamodel.AlignmentI#getCodonFrame(jalview.datamodel.SequenceI)
1429 public List<AlignedCodonFrame> getCodonFrame(SequenceI seq)
1435 List<AlignedCodonFrame> cframes = new ArrayList<>();
1436 for (AlignedCodonFrame acf : getCodonFrames())
1438 if (acf.involvesSequence(seq))
1447 * Sets the codon frame mappings (replacing any existing mappings). Note the
1448 * mappings are set on the dataset alignment instead if there is one.
1450 * @see jalview.datamodel.AlignmentI#setCodonFrames()
1453 public void setCodonFrames(List<AlignedCodonFrame> acfs)
1455 if (dataset != null)
1457 dataset.setCodonFrames(acfs);
1461 this.codonFrameList = acfs;
1466 * Returns the set of codon frame mappings. Any changes to the returned set
1467 * will affect the alignment. The mappings are held on (and read from) the
1468 * dataset alignment if there is one.
1470 * @see jalview.datamodel.AlignmentI#getCodonFrames()
1473 public List<AlignedCodonFrame> getCodonFrames()
1475 // TODO: Fix this method to fix failing AlignedCodonFrame tests
1476 // this behaviour is currently incorrect. method should return codon frames
1477 // for just the alignment,
1478 // selected from dataset
1479 return dataset != null ? dataset.getCodonFrames() : codonFrameList;
1483 * Removes the given mapping from the stored set. Note that the mappings are
1484 * held on the dataset alignment if there is one.
1487 public boolean removeCodonFrame(AlignedCodonFrame codons)
1489 List<AlignedCodonFrame> acfs = getCodonFrames();
1490 if (codons == null || acfs == null)
1494 return acfs.remove(codons);
1498 public void append(AlignmentI toappend)
1500 // TODO JAL-1270 needs test coverage
1501 // currently tested for use in jalview.gui.SequenceFetcher
1502 char oldc = toappend.getGapCharacter();
1503 boolean samegap = oldc == getGapCharacter();
1504 boolean hashidden = toappend.getHiddenSequences() != null
1505 && toappend.getHiddenSequences().hiddenSequences != null;
1506 // get all sequences including any hidden ones
1507 List<SequenceI> sqs = (hashidden)
1508 ? toappend.getHiddenSequences().getFullAlignment()
1510 : toappend.getSequences();
1513 // avoid self append deadlock by
1514 List<SequenceI> toappendsq = new ArrayList<>();
1517 for (SequenceI addedsq : sqs)
1521 addedsq.replace(oldc, gapCharacter);
1523 toappendsq.add(addedsq);
1526 for (SequenceI addedsq : toappendsq)
1528 addSequence(addedsq);
1531 AlignmentAnnotation[] alan = toappend.getAlignmentAnnotation();
1532 for (int a = 0; alan != null && a < alan.length; a++)
1534 addAnnotation(alan[a]);
1538 getCodonFrames().addAll(toappend.getCodonFrames());
1540 List<SequenceGroup> sg = toappend.getGroups();
1543 for (SequenceGroup _sg : sg)
1548 if (toappend.getHiddenSequences() != null)
1550 HiddenSequences hs = toappend.getHiddenSequences();
1551 if (hiddenSequences == null)
1553 hiddenSequences = new HiddenSequences(this);
1555 if (hs.hiddenSequences != null)
1557 for (int s = 0; s < hs.hiddenSequences.length; s++)
1559 // hide the newly appended sequence in the alignment
1560 if (hs.hiddenSequences[s] != null)
1562 hiddenSequences.hideSequence(hs.hiddenSequences[s]);
1567 if (toappend.getProperties() != null)
1569 // we really can't do very much here - just try to concatenate strings
1570 // where property collisions occur.
1571 Enumeration key = toappend.getProperties().keys();
1572 while (key.hasMoreElements())
1574 Object k = key.nextElement();
1575 Object ourval = this.getProperty(k);
1576 Object toapprop = toappend.getProperty(k);
1579 if (ourval.getClass().equals(toapprop.getClass())
1580 && !ourval.equals(toapprop))
1582 if (ourval instanceof String)
1586 ((String) ourval) + "; " + ((String) toapprop));
1590 if (ourval instanceof Vector)
1593 Enumeration theirv = ((Vector) toapprop).elements();
1594 while (theirv.hasMoreElements())
1596 ((Vector) ourval).addElement(theirv);
1604 // just add new property directly
1605 setProperty(k, toapprop);
1613 public AlignmentAnnotation findOrCreateAnnotation(String name,
1614 String calcId, boolean autoCalc, SequenceI seqRef,
1615 SequenceGroup groupRef)
1617 if (annotations != null)
1619 for (AlignmentAnnotation annot : getAlignmentAnnotation())
1621 if (annot.autoCalculated == autoCalc && (name.equals(annot.label))
1622 && (calcId == null || annot.getCalcId().equals(calcId))
1623 && annot.sequenceRef == seqRef
1624 && annot.groupRef == groupRef)
1630 AlignmentAnnotation annot = new AlignmentAnnotation(name, name,
1631 new Annotation[1], 0f, 0f, AlignmentAnnotation.BAR_GRAPH);
1632 annot.hasText = false;
1635 annot.setCalcId(new String(calcId));
1637 annot.autoCalculated = autoCalc;
1640 annot.setSequenceRef(seqRef);
1642 annot.groupRef = groupRef;
1643 addAnnotation(annot);
1649 public Iterable<AlignmentAnnotation> findAnnotation(String calcId)
1651 AlignmentAnnotation[] alignmentAnnotation = getAlignmentAnnotation();
1652 if (alignmentAnnotation != null)
1654 return AlignmentAnnotation.findAnnotation(
1655 Arrays.asList(getAlignmentAnnotation()), calcId);
1657 return Arrays.asList(new AlignmentAnnotation[] {});
1661 public Iterable<AlignmentAnnotation> findAnnotations(SequenceI seq,
1662 String calcId, String label)
1664 return AlignmentAnnotation.findAnnotations(
1665 Arrays.asList(getAlignmentAnnotation()), seq, calcId, label);
1669 public void moveSelectedSequencesByOne(SequenceGroup sg,
1670 Map<SequenceI, SequenceCollectionI> map, boolean up)
1672 synchronized (sequences)
1677 for (int i = 1, iSize = sequences.size(); i < iSize; i++)
1679 SequenceI seq = sequences.get(i);
1680 if (!sg.getSequences(map).contains(seq))
1685 SequenceI temp = sequences.get(i - 1);
1686 if (sg.getSequences(null).contains(temp))
1691 sequences.set(i, temp);
1692 sequences.set(i - 1, seq);
1697 for (int i = sequences.size() - 2; i > -1; i--)
1699 SequenceI seq = sequences.get(i);
1700 if (!sg.getSequences(map).contains(seq))
1705 SequenceI temp = sequences.get(i + 1);
1706 if (sg.getSequences(map).contains(temp))
1711 sequences.set(i, temp);
1712 sequences.set(i + 1, seq);
1720 public void validateAnnotation(AlignmentAnnotation alignmentAnnotation)
1722 alignmentAnnotation.validateRangeAndDisplay();
1723 if (isNucleotide() && alignmentAnnotation.isValidStruc())
1725 hasRNAStructure = true;
1729 private SequenceI seqrep = null;
1733 * @return the representative sequence for this group
1736 public SequenceI getSeqrep()
1742 * set the representative sequence for this group. Note - this affects the
1743 * interpretation of the Hidereps attribute.
1746 * the seqrep to set (null means no sequence representative)
1749 public void setSeqrep(SequenceI seqrep)
1751 this.seqrep = seqrep;
1756 * @return true if group has a sequence representative
1759 public boolean hasSeqrep()
1761 return seqrep != null;
1765 public int getEndRes()
1767 return getWidth() - 1;
1771 public int getStartRes()
1777 * In the case of AlignmentI - returns the dataset for the alignment, if set
1780 * @see jalview.datamodel.AnnotatedCollectionI#getContext()
1783 public AnnotatedCollectionI getContext()
1789 * Align this alignment like the given (mapped) one.
1792 public int alignAs(AlignmentI al)
1795 * Currently retains unmapped gaps (in introns), regaps mapped regions
1798 return alignAs(al, false, true);
1802 * Align this alignment 'the same as' the given one. Mapped sequences only are
1803 * realigned. If both of the same type (nucleotide/protein) then align both
1804 * identically. If this is nucleotide and the other is protein, make 3 gaps
1805 * for each gap in the protein sequences. If this is protein and the other is
1806 * nucleotide, insert a gap for each 3 gaps (or part thereof) between
1807 * nucleotide bases. If this is protein and the other is nucleotide, gaps
1808 * protein to match the relative ordering of codons in the nucleotide.
1810 * Parameters control whether gaps in exon (mapped) and intron (unmapped)
1811 * regions are preserved. Gaps that connect introns to exons are treated
1812 * conservatively, i.e. only preserved if both intron and exon gaps are
1813 * preserved. TODO: check caveats below where the implementation fails
1816 * - must have same dataset, and sequences in al must have equivalent
1817 * dataset sequence and start/end bounds under given mapping
1818 * @param preserveMappedGaps
1819 * if true, gaps within and between mapped codons are preserved
1820 * @param preserveUnmappedGaps
1821 * if true, gaps within and between unmapped codons are preserved
1824 public int alignAs(AlignmentI al, boolean preserveMappedGaps,
1825 boolean preserveUnmappedGaps)
1827 // TODO should this method signature be the one in the interface?
1828 // JBPComment - yes - neither flag is used, so should be deleted.
1829 boolean thisIsNucleotide = this.isNucleotide();
1830 boolean thatIsProtein = !al.isNucleotide();
1831 if (!thatIsProtein && !thisIsNucleotide)
1833 return AlignmentUtils.alignProteinAsDna(this, al);
1835 else if (thatIsProtein && thisIsNucleotide)
1837 return AlignmentUtils.alignCdsAsProtein(this, al);
1839 return AlignmentUtils.alignAs(this, al);
1843 * Returns the alignment in Fasta format. Behaviour of this method is not
1844 * guaranteed between versions.
1847 public String toString()
1849 return new FastaFile().print(getSequencesArray(), true);
1853 * Returns the set of distinct sequence names. No ordering is guaranteed.
1856 public Set<String> getSequenceNames()
1858 Set<String> names = new HashSet<>();
1859 for (SequenceI seq : getSequences())
1861 names.add(seq.getName());
1867 public boolean hasValidSequence()
1869 boolean hasValidSeq = false;
1870 for (SequenceI seq : getSequences())
1872 if ((seq.getEnd() - seq.getStart()) > 0)
1882 * Update any mappings to 'virtual' sequences to compatible real ones, if
1883 * present in the added sequences. Returns a count of mappings updated.
1889 public int realiseMappings(List<SequenceI> seqs)
1892 for (SequenceI seq : seqs)
1894 for (AlignedCodonFrame mapping : getCodonFrames())
1896 count += mapping.realiseWith(seq);
1903 * Returns the first AlignedCodonFrame that has a mapping between the given
1911 public AlignedCodonFrame getMapping(SequenceI mapFrom, SequenceI mapTo)
1913 for (AlignedCodonFrame acf : getCodonFrames())
1915 if (acf.getAaForDnaSeq(mapFrom) == mapTo)
1924 public void setHiddenColumns(HiddenColumns cols)
1930 public void setupJPredAlignment()
1932 SequenceI repseq = getSequenceAt(0);
1934 HiddenColumns cs = new HiddenColumns();
1935 cs.hideList(repseq.getInsertions());
1936 setHiddenColumns(cs);
1940 public HiddenColumns propagateInsertions(SequenceI profileseq,
1941 AlignmentView input)
1945 char gc = getGapCharacter();
1946 Object[] alandhidden = input.getAlignmentAndHiddenColumns(gc);
1947 HiddenColumns nview = (HiddenColumns) alandhidden[1];
1948 SequenceI origseq = ((SequenceI[]) alandhidden[0])[profsqpos];
1949 return propagateInsertions(profileseq, origseq, nview);
1955 * sequence in al which corresponds to origseq
1957 * alignment which is to have gaps inserted into it
1959 * sequence corresponding to profileseq which defines gap map for
1962 private HiddenColumns propagateInsertions(SequenceI profileseq,
1963 SequenceI origseq, HiddenColumns hc)
1965 // take the set of hidden columns, and the set of gaps in origseq,
1966 // and remove all the hidden gaps from hiddenColumns
1968 // first get the gaps as a Bitset
1969 // then calculate hidden ^ not(gap)
1970 BitSet gaps = origseq.gapBitset();
1973 // for each sequence in the alignment, except the profile sequence,
1974 // insert gaps corresponding to each hidden region but where each hidden
1975 // column region is shifted backwards by the number of preceding visible
1976 // gaps update hidden columns at the same time
1977 HiddenColumns newhidden = new HiddenColumns();
1979 int numGapsBefore = 0;
1980 int gapPosition = 0;
1981 Iterator<int[]> it = hc.iterator();
1982 while (it.hasNext())
1984 int[] region = it.next();
1986 // get region coordinates accounting for gaps
1987 // we can rely on gaps not being *in* hidden regions because we already
1989 while (gapPosition < region[0])
1992 if (gaps.get(gapPosition))
1998 int left = region[0] - numGapsBefore;
1999 int right = region[1] - numGapsBefore;
2001 newhidden.hideColumns(left, right);
2002 padGaps(left, right, profileseq);
2008 * Pad gaps in all sequences in alignment except profileseq
2011 * position of first gap to insert
2013 * position of last gap to insert
2015 * sequence not to pad
2017 private void padGaps(int left, int right, SequenceI profileseq)
2019 char gc = getGapCharacter();
2021 // make a string with number of gaps = length of hidden region
2022 StringBuilder sb = new StringBuilder();
2023 for (int g = 0; g < right - left + 1; g++)
2028 // loop over the sequences and pad with gaps where required
2029 for (int s = 0, ns = getHeight(); s < ns; s++)
2031 SequenceI sqobj = getSequenceAt(s);
2032 if ((sqobj != profileseq) && (sqobj.getLength() >= left))
2034 String sq = sqobj.getSequenceAsString();
2036 sq.substring(0, left) + sb.toString() + sq.substring(left));