2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.datamodel;
23 import jalview.analysis.AlignmentUtils;
24 import jalview.analysis.Conservation;
25 import jalview.datamodel.AlignedCodonFrame.SequenceToSequenceMapping;
26 import jalview.io.FastaFile;
27 import jalview.util.Comparison;
28 import jalview.util.LinkedIdentityHashSet;
29 import jalview.util.MessageManager;
31 import java.util.ArrayList;
32 import java.util.Arrays;
33 import java.util.BitSet;
34 import java.util.Collections;
35 import java.util.Enumeration;
36 import java.util.HashSet;
37 import java.util.Hashtable;
38 import java.util.Iterator;
39 import java.util.List;
42 import java.util.Vector;
45 * Data structure to hold and manipulate a multiple sequence alignment
51 public class Alignment implements AlignmentI, AutoCloseable
53 private Alignment dataset;
55 private List<SequenceI> sequences;
57 protected List<SequenceGroup> groups;
59 protected char gapCharacter = '-';
61 private boolean nucleotide = true;
63 public boolean hasRNAStructure = false;
65 public AlignmentAnnotation[] annotations;
67 HiddenSequences hiddenSequences;
69 HiddenColumns hiddenCols;
71 public Hashtable alignmentProperties;
73 private List<AlignedCodonFrame> codonFrameList;
75 private Conservation conservation;
77 private ProfilesI consensus;
79 private Hashtable[] codonConsensus, rnaStructureConsensus;
81 private void initAlignment(SequenceI[] seqs)
83 groups = Collections.synchronizedList(new ArrayList<SequenceGroup>());
84 hiddenSequences = new HiddenSequences(this);
85 hiddenCols = new HiddenColumns();
86 codonFrameList = new ArrayList<>();
88 nucleotide = Comparison.isNucleotide(seqs);
90 sequences = Collections.synchronizedList(new ArrayList<SequenceI>());
92 for (int i = 0; i < seqs.length; i++)
94 sequences.add(seqs[i]);
100 * Make a 'copy' alignment - sequences have new copies of features and
101 * annotations, but share the original dataset sequences.
103 public Alignment(AlignmentI al)
105 SequenceI[] seqs = al.getSequencesArray();
106 for (int i = 0; i < seqs.length; i++)
108 seqs[i] = new Sequence(seqs[i]);
114 * Share the same dataset sequence mappings (if any).
116 if (dataset == null && al.getDataset() == null)
118 this.setCodonFrames(al.getCodonFrames());
123 * Make an alignment from an array of Sequences.
127 public Alignment(SequenceI[] seqs)
133 * Make a new alignment from an array of SeqCigars
138 public Alignment(SeqCigar[] alseqs)
140 SequenceI[] seqs = SeqCigar.createAlignmentSequences(alseqs,
141 gapCharacter, new HiddenColumns(), null);
146 * Make a new alignment from an CigarArray JBPNote - can only do this when
147 * compactAlignment does not contain hidden regions. JBPNote - must also check
148 * that compactAlignment resolves to a set of SeqCigars - or construct them
151 * @param compactAlignment
154 public static AlignmentI createAlignment(CigarArray compactAlignment)
156 throw new Error(MessageManager
157 .getString("error.alignment_cigararray_not_implemented"));
158 // this(compactAlignment.refCigars);
162 public List<SequenceI> getSequences()
168 public List<SequenceI> getSequences(
169 Map<SequenceI, SequenceCollectionI> hiddenReps)
171 // TODO: in jalview 2.8 we don't do anything with hiddenreps - fix design to
177 public SequenceI[] getSequencesArray()
179 if (sequences == null)
183 synchronized (sequences)
185 return sequences.toArray(new SequenceI[sequences.size()]);
190 * Returns a map of lists of sequences keyed by sequence name.
195 public Map<String, List<SequenceI>> getSequencesByName()
197 return AlignmentUtils.getSequencesByName(this);
201 public SequenceI getSequenceAt(int i)
203 synchronized (sequences)
205 if (i > -1 && i < sequences.size())
207 return sequences.get(i);
215 public SequenceI getSequenceAtAbsoluteIndex(int i)
217 SequenceI seq = null;
218 if (getHiddenSequences().getSize() > 0)
220 seq = getHiddenSequences().getHiddenSequence(i);
223 // didn't find the sequence in the hidden sequences, get it from the
225 int index = getHiddenSequences().findIndexWithoutHiddenSeqs(i);
226 seq = getSequenceAt(index);
231 seq = getSequenceAt(i);
237 * Adds a sequence to the alignment. Recalculates maxLength and size. Note
238 * this currently does not recalculate whether or not the alignment is
239 * nucleotide, so mixed alignments may have undefined behaviour.
244 public void addSequence(SequenceI snew)
249 // maintain dataset integrity
250 SequenceI dsseq = snew.getDatasetSequence();
253 // derive new sequence
254 SequenceI adding = snew.deriveSequence();
256 dsseq = snew.getDatasetSequence();
258 if (getDataset().findIndex(dsseq) == -1)
260 getDataset().addSequence(dsseq);
264 if (sequences == null)
266 initAlignment(new SequenceI[] { snew });
270 synchronized (sequences)
275 if (hiddenSequences != null)
277 hiddenSequences.adjustHeightSequenceAdded();
282 public SequenceI replaceSequenceAt(int i, SequenceI snew)
284 synchronized (sequences)
286 if (sequences.size() > i)
288 return sequences.set(i, snew);
294 hiddenSequences.adjustHeightSequenceAdded();
303 * @return DOCUMENT ME!
306 public List<SequenceGroup> getGroups()
314 if (getDataset() != null)
318 getDataset().removeAlignmentRef();
319 } catch (Throwable e)
329 * Defensively nulls out references in case this object is not garbage
332 void nullReferences()
338 hiddenSequences = null;
342 * decrement the alignmentRefs counter by one and null references if it goes
347 private void removeAlignmentRef() throws Throwable
349 if (--alignmentRefs == 0)
356 public void deleteSequence(SequenceI s)
358 synchronized (sequences)
360 deleteSequence(findIndex(s));
365 public void deleteSequence(int i)
367 synchronized (sequences)
369 if (i > -1 && i < getHeight())
372 hiddenSequences.adjustHeightSequenceDeleted(i);
378 public void deleteHiddenSequence(int i)
380 synchronized (sequences)
382 if (i > -1 && i < getHeight())
392 * @see jalview.datamodel.AlignmentI#findGroup(jalview.datamodel.SequenceI)
395 public SequenceGroup findGroup(SequenceI seq, int position)
397 synchronized (groups)
399 for (SequenceGroup sg : groups)
401 if (sg.getSequences(null).contains(seq))
403 if (position >= sg.getStartRes() && position <= sg.getEndRes())
417 * jalview.datamodel.AlignmentI#findAllGroups(jalview.datamodel.SequenceI)
420 public SequenceGroup[] findAllGroups(SequenceI s)
422 ArrayList<SequenceGroup> temp = new ArrayList<>();
424 synchronized (groups)
426 int gSize = groups.size();
427 for (int i = 0; i < gSize; i++)
429 SequenceGroup sg = groups.get(i);
430 if (sg == null || sg.getSequences() == null)
432 this.deleteGroup(sg);
437 if (sg.getSequences().contains(s))
443 SequenceGroup[] ret = new SequenceGroup[temp.size()];
444 return temp.toArray(ret);
449 public void addGroup(SequenceGroup sg)
451 synchronized (groups)
453 if (!groups.contains(sg))
455 if (hiddenSequences.getSize() > 0)
457 int i, iSize = sg.getSize();
458 for (i = 0; i < iSize; i++)
460 if (!sequences.contains(sg.getSequenceAt(i)))
462 sg.deleteSequence(sg.getSequenceAt(i), false);
468 if (sg.getSize() < 1)
473 sg.setContext(this, true);
480 * remove any annotation that references gp
483 * (if null, removes all group associated annotation)
485 private void removeAnnotationForGroup(SequenceGroup gp)
487 if (annotations == null || annotations.length == 0)
491 // remove annotation very quickly
492 AlignmentAnnotation[] t,
493 todelete = new AlignmentAnnotation[annotations.length],
494 tokeep = new AlignmentAnnotation[annotations.length];
498 for (i = 0, p = 0, k = 0; i < annotations.length; i++)
500 if (annotations[i].groupRef != null)
502 todelete[p++] = annotations[i];
506 tokeep[k++] = annotations[i];
512 for (i = 0, p = 0, k = 0; i < annotations.length; i++)
514 if (annotations[i].groupRef == gp)
516 todelete[p++] = annotations[i];
520 tokeep[k++] = annotations[i];
526 // clear out the group associated annotation.
527 for (i = 0; i < p; i++)
529 unhookAnnotation(todelete[i]);
532 t = new AlignmentAnnotation[k];
533 for (i = 0; i < k; i++)
542 public void deleteAllGroups()
544 synchronized (groups)
546 if (annotations != null)
548 removeAnnotationForGroup(null);
550 for (SequenceGroup sg : groups)
552 sg.setContext(null, false);
560 public void deleteGroup(SequenceGroup g)
562 synchronized (groups)
564 if (groups.contains(g))
566 removeAnnotationForGroup(g);
568 g.setContext(null, false);
575 public SequenceI findName(String name)
577 return findName(name, false);
583 * @see jalview.datamodel.AlignmentI#findName(java.lang.String, boolean)
586 public SequenceI findName(String token, boolean b)
588 return findName(null, token, b);
594 * @see jalview.datamodel.AlignmentI#findName(SequenceI, java.lang.String,
598 public SequenceI findName(SequenceI startAfter, String token, boolean b)
603 String sqname = null;
604 if (startAfter != null)
606 // try to find the sequence in the alignment
607 boolean matched = false;
608 while (i < sequences.size())
610 if (getSequenceAt(i++) == startAfter)
621 while (i < sequences.size())
623 sq = getSequenceAt(i);
624 sqname = sq.getName();
625 if (sqname.equals(token) // exact match
626 || (b && // allow imperfect matches - case varies
627 (sqname.equalsIgnoreCase(token))))
629 return getSequenceAt(i);
639 public SequenceI[] findSequenceMatch(String name)
641 Vector matches = new Vector();
644 while (i < sequences.size())
646 if (getSequenceAt(i).getName().equals(name))
648 matches.addElement(getSequenceAt(i));
653 SequenceI[] result = new SequenceI[matches.size()];
654 for (i = 0; i < result.length; i++)
656 result[i] = (SequenceI) matches.elementAt(i);
666 * @see jalview.datamodel.AlignmentI#findIndex(jalview.datamodel.SequenceI)
669 public int findIndex(SequenceI s)
673 while (i < sequences.size())
675 if (s == getSequenceAt(i))
690 * jalview.datamodel.AlignmentI#findIndex(jalview.datamodel.SearchResults)
693 public int findIndex(SearchResultsI results)
697 while (i < sequences.size())
699 if (results.involvesSequence(getSequenceAt(i)))
709 public int getHeight()
711 return sequences.size();
715 public int getAbsoluteHeight()
717 return sequences.size() + getHiddenSequences().getSize();
721 public int getWidth()
725 for (int i = 0; i < sequences.size(); i++)
727 maxLength = Math.max(maxLength, getSequenceAt(i).getLength());
733 public int getVisibleWidth()
736 if (hiddenCols != null)
738 w -= hiddenCols.getSize();
750 public void setGapCharacter(char gc)
753 synchronized (sequences)
755 for (SequenceI seq : sequences)
757 seq.setSequence(seq.getSequenceAsString().replace('.', gc)
758 .replace('-', gc).replace(' ', gc));
766 * @return DOCUMENT ME!
769 public char getGapCharacter()
777 * @see jalview.datamodel.AlignmentI#isAligned()
780 public boolean isAligned()
782 return isAligned(false);
788 * @see jalview.datamodel.AlignmentI#isAligned(boolean)
791 public boolean isAligned(boolean includeHidden)
793 int width = getWidth();
794 if (hiddenSequences == null || hiddenSequences.getSize() == 0)
796 includeHidden = true; // no hidden sequences to check against.
798 for (int i = 0; i < sequences.size(); i++)
800 if (includeHidden || !hiddenSequences.isHidden(getSequenceAt(i)))
802 if (getSequenceAt(i).getLength() != width)
813 public boolean isHidden(int alignmentIndex)
815 return (getHiddenSequences().getHiddenSequence(alignmentIndex) != null);
819 * Delete all annotations, including auto-calculated if the flag is set true.
820 * Returns true if at least one annotation was deleted, else false.
822 * @param includingAutoCalculated
826 public boolean deleteAllAnnotations(boolean includingAutoCalculated)
828 boolean result = false;
829 for (AlignmentAnnotation alan : getAlignmentAnnotation())
831 if (!alan.autoCalculated || includingAutoCalculated)
833 deleteAnnotation(alan);
843 * @seejalview.datamodel.AlignmentI#deleteAnnotation(jalview.datamodel.
844 * AlignmentAnnotation)
847 public boolean deleteAnnotation(AlignmentAnnotation aa)
849 return deleteAnnotation(aa, true);
853 public boolean deleteAnnotation(AlignmentAnnotation aa, boolean unhook)
857 if (annotations != null)
859 aSize = annotations.length;
867 AlignmentAnnotation[] temp = new AlignmentAnnotation[aSize - 1];
869 boolean swap = false;
872 for (int i = 0; i < aSize; i++)
874 if (annotations[i] == aa)
879 if (tIndex < temp.length)
881 temp[tIndex++] = annotations[i];
890 unhookAnnotation(aa);
897 * remove any object references associated with this annotation
901 private void unhookAnnotation(AlignmentAnnotation aa)
903 if (aa.sequenceRef != null)
905 aa.sequenceRef.removeAlignmentAnnotation(aa);
907 if (aa.groupRef != null)
909 // probably need to do more here in the future (post 2.5.0)
917 * @seejalview.datamodel.AlignmentI#addAnnotation(jalview.datamodel.
918 * AlignmentAnnotation)
921 public void addAnnotation(AlignmentAnnotation aa)
923 addAnnotation(aa, -1);
929 * @seejalview.datamodel.AlignmentI#addAnnotation(jalview.datamodel.
930 * AlignmentAnnotation, int)
933 public void addAnnotation(AlignmentAnnotation aa, int pos)
935 if (aa.getRNAStruc() != null)
937 hasRNAStructure = true;
941 if (annotations != null)
943 aSize = annotations.length + 1;
946 AlignmentAnnotation[] temp = new AlignmentAnnotation[aSize];
948 if (pos == -1 || pos >= aSize)
950 temp[aSize - 1] = aa;
959 for (i = 0; i < (aSize - 1); i++, p++)
967 temp[p] = annotations[i];
976 public void setAnnotationIndex(AlignmentAnnotation aa, int index)
978 if (aa == null || annotations == null || annotations.length - 1 < index)
983 int aSize = annotations.length;
984 AlignmentAnnotation[] temp = new AlignmentAnnotation[aSize];
988 for (int i = 0; i < aSize; i++)
997 temp[i] = annotations[i];
1001 temp[i] = annotations[i - 1];
1010 * returns all annotation on the alignment
1012 public AlignmentAnnotation[] getAlignmentAnnotation()
1018 public boolean isNucleotide()
1024 public boolean hasRNAStructure()
1026 // TODO can it happen that structure is removed from alignment?
1027 return hasRNAStructure;
1031 public void setDataset(AlignmentI data)
1033 if (dataset == null && data == null)
1035 createDatasetAlignment();
1037 else if (dataset == null && data != null)
1041 throw new IllegalArgumentException("Circular dataset reference");
1043 if (!(data instanceof Alignment))
1046 "Implementation Error: jalview.datamodel.Alignment does not yet support other implementations of AlignmentI as its dataset reference");
1048 dataset = (Alignment) data;
1049 for (int i = 0; i < getHeight(); i++)
1051 SequenceI currentSeq = getSequenceAt(i);
1052 SequenceI dsq = currentSeq.getDatasetSequence();
1055 dsq = currentSeq.createDatasetSequence();
1056 dataset.addSequence(dsq);
1060 while (dsq.getDatasetSequence() != null)
1062 dsq = dsq.getDatasetSequence();
1064 if (dataset.findIndex(dsq) == -1)
1066 dataset.addSequence(dsq);
1071 dataset.addAlignmentRef();
1075 * add dataset sequences to seq for currentSeq and any sequences it references
1077 private void resolveAndAddDatasetSeq(SequenceI currentSeq,
1078 Set<SequenceI> seqs, boolean createDatasetSequence)
1080 SequenceI alignedSeq = currentSeq;
1081 if (currentSeq.getDatasetSequence() != null)
1083 currentSeq = currentSeq.getDatasetSequence();
1087 if (createDatasetSequence)
1089 currentSeq = currentSeq.createDatasetSequence();
1093 List<SequenceI> toProcess = new ArrayList<>();
1094 toProcess.add(currentSeq);
1095 while (toProcess.size() > 0)
1098 SequenceI curDs = toProcess.remove(0);
1100 if (!seqs.add(curDs))
1104 // iterate over database references, making sure we add forward referenced
1106 if (curDs.getDBRefs() != null)
1108 for (DBRefEntry dbr : curDs.getDBRefs())
1110 if (dbr.getMap() != null && dbr.getMap().getTo() != null)
1112 if (dbr.getMap().getTo() == alignedSeq)
1115 * update mapping to be to the newly created dataset sequence
1117 dbr.getMap().setTo(currentSeq);
1119 if (dbr.getMap().getTo().getDatasetSequence() != null)
1121 throw new Error("Implementation error: Map.getTo() for dbref "
1122 + dbr + " from " + curDs.getName()
1123 + " is not a dataset sequence.");
1125 // we recurse to add all forward references to dataset sequences via
1127 toProcess.add(dbr.getMap().getTo());
1135 * Creates a new dataset for this alignment. Can only be done once - if
1136 * dataset is not null this will not be performed.
1138 public void createDatasetAlignment()
1140 if (dataset != null)
1144 // try to avoid using SequenceI.equals at this stage, it will be expensive
1145 Set<SequenceI> seqs = new LinkedIdentityHashSet<>();
1147 for (int i = 0; i < getHeight(); i++)
1149 SequenceI currentSeq = getSequenceAt(i);
1150 resolveAndAddDatasetSeq(currentSeq, seqs, true);
1153 // verify all mappings are in dataset
1154 for (AlignedCodonFrame cf : codonFrameList)
1156 for (SequenceToSequenceMapping ssm : cf.getMappings())
1158 if (!seqs.contains(ssm.getFromSeq()))
1160 resolveAndAddDatasetSeq(ssm.getFromSeq(), seqs, false);
1162 if (!seqs.contains(ssm.getMapping().getTo()))
1164 resolveAndAddDatasetSeq(ssm.getMapping().getTo(), seqs, false);
1168 // finally construct dataset
1169 dataset = new Alignment(seqs.toArray(new SequenceI[seqs.size()]));
1170 // move mappings to the dataset alignment
1171 dataset.codonFrameList = this.codonFrameList;
1172 this.codonFrameList = null;
1176 * reference count for number of alignments referencing this one.
1178 int alignmentRefs = 0;
1181 * increase reference count to this alignment.
1183 private void addAlignmentRef()
1189 public Alignment getDataset()
1195 public boolean padGaps()
1197 boolean modified = false;
1199 // Remove excess gaps from the end of alignment
1203 for (int i = 0; i < sequences.size(); i++)
1205 current = getSequenceAt(i);
1206 for (int j = current.getLength(); j > maxLength; j--)
1209 && !jalview.util.Comparison.isGap(current.getCharAt(j)))
1220 for (int i = 0; i < sequences.size(); i++)
1222 current = getSequenceAt(i);
1223 cLength = current.getLength();
1225 if (cLength < maxLength)
1227 current.insertCharAt(cLength, maxLength - cLength, gapCharacter);
1230 else if (current.getLength() > maxLength)
1232 current.deleteChars(maxLength, current.getLength());
1239 * Justify the sequences to the left or right by deleting and inserting gaps
1240 * before the initial residue or after the terminal residue
1243 * true if alignment padded to right, false to justify to left
1244 * @return true if alignment was changed
1247 public boolean justify(boolean right)
1249 boolean modified = false;
1251 // Remove excess gaps from the end of alignment
1253 int ends[] = new int[sequences.size() * 2];
1255 for (int i = 0; i < sequences.size(); i++)
1257 current = getSequenceAt(i);
1258 // This should really be a sequence method
1259 ends[i * 2] = current.findIndex(current.getStart());
1260 ends[i * 2 + 1] = current
1261 .findIndex(current.getStart() + current.getLength());
1262 boolean hitres = false;
1263 for (int j = 0, rs = 0, ssiz = current.getLength(); j < ssiz; j++)
1265 if (!jalview.util.Comparison.isGap(current.getCharAt(j)))
1274 ends[i * 2 + 1] = j;
1275 if (j - ends[i * 2] > maxLength)
1277 maxLength = j - ends[i * 2];
1285 // now edit the flanking gaps to justify to either left or right
1286 int cLength, extent, diff;
1287 for (int i = 0; i < sequences.size(); i++)
1289 current = getSequenceAt(i);
1291 cLength = 1 + ends[i * 2 + 1] - ends[i * 2];
1292 diff = maxLength - cLength; // number of gaps to indent
1293 extent = current.getLength();
1297 if (extent > ends[i * 2 + 1])
1299 current.deleteChars(ends[i * 2 + 1] + 1, extent);
1302 if (ends[i * 2] > diff)
1304 current.deleteChars(0, ends[i * 2] - diff);
1309 if (ends[i * 2] < diff)
1311 current.insertCharAt(0, diff - ends[i * 2], gapCharacter);
1319 if (ends[i * 2] > 0)
1321 current.deleteChars(0, ends[i * 2]);
1323 ends[i * 2 + 1] -= ends[i * 2];
1324 extent -= ends[i * 2];
1326 if (extent > maxLength)
1328 current.deleteChars(maxLength + 1, extent);
1333 if (extent < maxLength)
1335 current.insertCharAt(extent, maxLength - extent, gapCharacter);
1345 public HiddenSequences getHiddenSequences()
1347 return hiddenSequences;
1351 public HiddenColumns getHiddenColumns()
1357 public CigarArray getCompactAlignment()
1359 synchronized (sequences)
1361 SeqCigar alseqs[] = new SeqCigar[sequences.size()];
1363 for (SequenceI seq : sequences)
1365 alseqs[i++] = new SeqCigar(seq);
1367 CigarArray cal = new CigarArray(alseqs);
1368 cal.addOperation(CigarArray.M, getWidth());
1374 public void setProperty(Object key, Object value)
1376 if (alignmentProperties == null)
1378 alignmentProperties = new Hashtable();
1381 alignmentProperties.put(key, value);
1385 public Object getProperty(Object key)
1387 if (alignmentProperties != null)
1389 return alignmentProperties.get(key);
1398 public Hashtable getProperties()
1400 return alignmentProperties;
1404 * Adds the given mapping to the stored set. Note this may be held on the
1405 * dataset alignment.
1408 public void addCodonFrame(AlignedCodonFrame codons)
1410 List<AlignedCodonFrame> acfs = getCodonFrames();
1411 if (codons != null && acfs != null && !acfs.contains(codons))
1421 * jalview.datamodel.AlignmentI#getCodonFrame(jalview.datamodel.SequenceI)
1424 public List<AlignedCodonFrame> getCodonFrame(SequenceI seq)
1430 List<AlignedCodonFrame> cframes = new ArrayList<>();
1431 for (AlignedCodonFrame acf : getCodonFrames())
1433 if (acf.involvesSequence(seq))
1442 * Sets the codon frame mappings (replacing any existing mappings). Note the
1443 * mappings are set on the dataset alignment instead if there is one.
1445 * @see jalview.datamodel.AlignmentI#setCodonFrames()
1448 public void setCodonFrames(List<AlignedCodonFrame> acfs)
1450 if (dataset != null)
1452 dataset.setCodonFrames(acfs);
1456 this.codonFrameList = acfs;
1461 * Returns the set of codon frame mappings. Any changes to the returned set
1462 * will affect the alignment. The mappings are held on (and read from) the
1463 * dataset alignment if there is one.
1465 * @see jalview.datamodel.AlignmentI#getCodonFrames()
1468 public List<AlignedCodonFrame> getCodonFrames()
1470 // TODO: Fix this method to fix failing AlignedCodonFrame tests
1471 // this behaviour is currently incorrect. method should return codon frames
1472 // for just the alignment,
1473 // selected from dataset
1474 return dataset != null ? dataset.getCodonFrames() : codonFrameList;
1478 * Removes the given mapping from the stored set. Note that the mappings are
1479 * held on the dataset alignment if there is one.
1482 public boolean removeCodonFrame(AlignedCodonFrame codons)
1484 List<AlignedCodonFrame> acfs = getCodonFrames();
1485 if (codons == null || acfs == null)
1489 return acfs.remove(codons);
1493 public void append(AlignmentI toappend)
1495 // TODO JAL-1270 needs test coverage
1496 // currently tested for use in jalview.gui.SequenceFetcher
1497 char oldc = toappend.getGapCharacter();
1498 boolean samegap = oldc == getGapCharacter();
1499 boolean hashidden = toappend.getHiddenSequences() != null
1500 && toappend.getHiddenSequences().hiddenSequences != null;
1501 // get all sequences including any hidden ones
1502 List<SequenceI> sqs = (hashidden)
1503 ? toappend.getHiddenSequences().getFullAlignment()
1505 : toappend.getSequences();
1508 // avoid self append deadlock by
1509 List<SequenceI> toappendsq = new ArrayList<>();
1512 for (SequenceI addedsq : sqs)
1516 addedsq.replace(oldc, gapCharacter);
1518 toappendsq.add(addedsq);
1521 for (SequenceI addedsq : toappendsq)
1523 addSequence(addedsq);
1526 AlignmentAnnotation[] alan = toappend.getAlignmentAnnotation();
1527 for (int a = 0; alan != null && a < alan.length; a++)
1529 addAnnotation(alan[a]);
1533 getCodonFrames().addAll(toappend.getCodonFrames());
1535 List<SequenceGroup> sg = toappend.getGroups();
1538 for (SequenceGroup _sg : sg)
1543 if (toappend.getHiddenSequences() != null)
1545 HiddenSequences hs = toappend.getHiddenSequences();
1546 if (hiddenSequences == null)
1548 hiddenSequences = new HiddenSequences(this);
1550 if (hs.hiddenSequences != null)
1552 for (int s = 0; s < hs.hiddenSequences.length; s++)
1554 // hide the newly appended sequence in the alignment
1555 if (hs.hiddenSequences[s] != null)
1557 hiddenSequences.hideSequence(hs.hiddenSequences[s]);
1562 if (toappend.getProperties() != null)
1564 // we really can't do very much here - just try to concatenate strings
1565 // where property collisions occur.
1566 Enumeration key = toappend.getProperties().keys();
1567 while (key.hasMoreElements())
1569 Object k = key.nextElement();
1570 Object ourval = this.getProperty(k);
1571 Object toapprop = toappend.getProperty(k);
1574 if (ourval.getClass().equals(toapprop.getClass())
1575 && !ourval.equals(toapprop))
1577 if (ourval instanceof String)
1581 ((String) ourval) + "; " + ((String) toapprop));
1585 if (ourval instanceof Vector)
1588 Enumeration theirv = ((Vector) toapprop).elements();
1589 while (theirv.hasMoreElements())
1591 ((Vector) ourval).addElement(theirv);
1599 // just add new property directly
1600 setProperty(k, toapprop);
1608 public AlignmentAnnotation findOrCreateAnnotation(String name,
1609 String calcId, boolean autoCalc, SequenceI seqRef,
1610 SequenceGroup groupRef)
1612 if (annotations != null)
1614 for (AlignmentAnnotation annot : getAlignmentAnnotation())
1616 if (annot.autoCalculated == autoCalc && (name.equals(annot.label))
1617 && (calcId == null || annot.getCalcId().equals(calcId))
1618 && annot.sequenceRef == seqRef
1619 && annot.groupRef == groupRef)
1625 AlignmentAnnotation annot = new AlignmentAnnotation(name, name,
1626 new Annotation[1], 0f, 0f, AlignmentAnnotation.BAR_GRAPH);
1627 annot.hasText = false;
1630 annot.setCalcId(new String(calcId));
1632 annot.autoCalculated = autoCalc;
1635 annot.setSequenceRef(seqRef);
1637 annot.groupRef = groupRef;
1638 addAnnotation(annot);
1644 public Iterable<AlignmentAnnotation> findAnnotation(String calcId)
1646 AlignmentAnnotation[] alignmentAnnotation = getAlignmentAnnotation();
1647 if (alignmentAnnotation != null)
1649 return AlignmentAnnotation.findAnnotation(
1650 Arrays.asList(getAlignmentAnnotation()), calcId);
1652 return Arrays.asList(new AlignmentAnnotation[] {});
1656 public Iterable<AlignmentAnnotation> findAnnotations(SequenceI seq,
1657 String calcId, String label)
1659 return AlignmentAnnotation.findAnnotations(
1660 Arrays.asList(getAlignmentAnnotation()), seq, calcId, label);
1664 public void moveSelectedSequencesByOne(SequenceGroup sg,
1665 Map<SequenceI, SequenceCollectionI> map, boolean up)
1667 synchronized (sequences)
1672 for (int i = 1, iSize = sequences.size(); i < iSize; i++)
1674 SequenceI seq = sequences.get(i);
1675 if (!sg.getSequences(map).contains(seq))
1680 SequenceI temp = sequences.get(i - 1);
1681 if (sg.getSequences(null).contains(temp))
1686 sequences.set(i, temp);
1687 sequences.set(i - 1, seq);
1692 for (int i = sequences.size() - 2; i > -1; i--)
1694 SequenceI seq = sequences.get(i);
1695 if (!sg.getSequences(map).contains(seq))
1700 SequenceI temp = sequences.get(i + 1);
1701 if (sg.getSequences(map).contains(temp))
1706 sequences.set(i, temp);
1707 sequences.set(i + 1, seq);
1715 public void validateAnnotation(AlignmentAnnotation alignmentAnnotation)
1717 alignmentAnnotation.validateRangeAndDisplay();
1718 if (isNucleotide() && alignmentAnnotation.isValidStruc())
1720 hasRNAStructure = true;
1724 private SequenceI seqrep = null;
1728 * @return the representative sequence for this group
1731 public SequenceI getSeqrep()
1737 * set the representative sequence for this group. Note - this affects the
1738 * interpretation of the Hidereps attribute.
1741 * the seqrep to set (null means no sequence representative)
1744 public void setSeqrep(SequenceI seqrep)
1746 this.seqrep = seqrep;
1751 * @return true if group has a sequence representative
1754 public boolean hasSeqrep()
1756 return seqrep != null;
1760 public int getEndRes()
1762 return getWidth() - 1;
1766 public int getStartRes()
1772 * In the case of AlignmentI - returns the dataset for the alignment, if set
1775 * @see jalview.datamodel.AnnotatedCollectionI#getContext()
1778 public AnnotatedCollectionI getContext()
1784 * Align this alignment like the given (mapped) one.
1787 public int alignAs(AlignmentI al)
1790 * Currently retains unmapped gaps (in introns), regaps mapped regions
1793 return alignAs(al, false, true);
1797 * Align this alignment 'the same as' the given one. Mapped sequences only are
1798 * realigned. If both of the same type (nucleotide/protein) then align both
1799 * identically. If this is nucleotide and the other is protein, make 3 gaps
1800 * for each gap in the protein sequences. If this is protein and the other is
1801 * nucleotide, insert a gap for each 3 gaps (or part thereof) between
1802 * nucleotide bases. If this is protein and the other is nucleotide, gaps
1803 * protein to match the relative ordering of codons in the nucleotide.
1805 * Parameters control whether gaps in exon (mapped) and intron (unmapped)
1806 * regions are preserved. Gaps that connect introns to exons are treated
1807 * conservatively, i.e. only preserved if both intron and exon gaps are
1808 * preserved. TODO: check caveats below where the implementation fails
1811 * - must have same dataset, and sequences in al must have equivalent
1812 * dataset sequence and start/end bounds under given mapping
1813 * @param preserveMappedGaps
1814 * if true, gaps within and between mapped codons are preserved
1815 * @param preserveUnmappedGaps
1816 * if true, gaps within and between unmapped codons are preserved
1819 public int alignAs(AlignmentI al, boolean preserveMappedGaps,
1820 boolean preserveUnmappedGaps)
1822 // TODO should this method signature be the one in the interface?
1823 // JBPComment - yes - neither flag is used, so should be deleted.
1824 boolean thisIsNucleotide = this.isNucleotide();
1825 boolean thatIsProtein = !al.isNucleotide();
1826 if (!thatIsProtein && !thisIsNucleotide)
1828 return AlignmentUtils.alignProteinAsDna(this, al);
1830 else if (thatIsProtein && thisIsNucleotide)
1832 return AlignmentUtils.alignCdsAsProtein(this, al);
1834 return AlignmentUtils.alignAs(this, al);
1838 * Returns the alignment in Fasta format. Behaviour of this method is not
1839 * guaranteed between versions.
1842 public String toString()
1844 return new FastaFile().print(getSequencesArray(), true);
1848 * Returns the set of distinct sequence names. No ordering is guaranteed.
1851 public Set<String> getSequenceNames()
1853 Set<String> names = new HashSet<>();
1854 for (SequenceI seq : getSequences())
1856 names.add(seq.getName());
1862 public boolean hasValidSequence()
1864 boolean hasValidSeq = false;
1865 for (SequenceI seq : getSequences())
1867 if ((seq.getEnd() - seq.getStart()) > 0)
1877 * Update any mappings to 'virtual' sequences to compatible real ones, if
1878 * present in the added sequences. Returns a count of mappings updated.
1884 public int realiseMappings(List<SequenceI> seqs)
1887 for (SequenceI seq : seqs)
1889 for (AlignedCodonFrame mapping : getCodonFrames())
1891 count += mapping.realiseWith(seq);
1898 * Returns the first AlignedCodonFrame that has a mapping between the given
1906 public AlignedCodonFrame getMapping(SequenceI mapFrom, SequenceI mapTo)
1908 for (AlignedCodonFrame acf : getCodonFrames())
1910 if (acf.getAaForDnaSeq(mapFrom) == mapTo)
1919 public boolean setHiddenColumns(HiddenColumns cols)
1921 boolean changed = cols == null ? hiddenCols != null
1922 : !cols.equals(hiddenCols);
1928 public void setupJPredAlignment()
1930 SequenceI repseq = getSequenceAt(0);
1932 HiddenColumns cs = new HiddenColumns();
1933 cs.hideList(repseq.getInsertions());
1934 setHiddenColumns(cs);
1938 public HiddenColumns propagateInsertions(SequenceI profileseq,
1939 AlignmentView input)
1943 char gc = getGapCharacter();
1944 Object[] alandhidden = input.getAlignmentAndHiddenColumns(gc);
1945 HiddenColumns nview = (HiddenColumns) alandhidden[1];
1946 SequenceI origseq = ((SequenceI[]) alandhidden[0])[profsqpos];
1947 return propagateInsertions(profileseq, origseq, nview);
1953 * sequence in al which corresponds to origseq
1955 * alignment which is to have gaps inserted into it
1957 * sequence corresponding to profileseq which defines gap map for
1960 private HiddenColumns propagateInsertions(SequenceI profileseq,
1961 SequenceI origseq, HiddenColumns hc)
1963 // take the set of hidden columns, and the set of gaps in origseq,
1964 // and remove all the hidden gaps from hiddenColumns
1966 // first get the gaps as a Bitset
1967 // then calculate hidden ^ not(gap)
1968 BitSet gaps = origseq.gapBitset();
1971 // for each sequence in the alignment, except the profile sequence,
1972 // insert gaps corresponding to each hidden region but where each hidden
1973 // column region is shifted backwards by the number of preceding visible
1974 // gaps update hidden columns at the same time
1975 HiddenColumns newhidden = new HiddenColumns();
1977 int numGapsBefore = 0;
1978 int gapPosition = 0;
1979 Iterator<int[]> it = hc.iterator();
1980 while (it.hasNext())
1982 int[] region = it.next();
1984 // get region coordinates accounting for gaps
1985 // we can rely on gaps not being *in* hidden regions because we already
1987 while (gapPosition < region[0])
1990 if (gaps.get(gapPosition))
1996 int left = region[0] - numGapsBefore;
1997 int right = region[1] - numGapsBefore;
1999 newhidden.hideColumns(left, right);
2000 padGaps(left, right, profileseq);
2006 * Pad gaps in all sequences in alignment except profileseq
2009 * position of first gap to insert
2011 * position of last gap to insert
2013 * sequence not to pad
2015 private void padGaps(int left, int right, SequenceI profileseq)
2017 char gc = getGapCharacter();
2019 // make a string with number of gaps = length of hidden region
2020 StringBuilder sb = new StringBuilder();
2021 for (int g = 0; g < right - left + 1; g++)
2026 // loop over the sequences and pad with gaps where required
2027 for (int s = 0, ns = getHeight(); s < ns; s++)
2029 SequenceI sqobj = getSequenceAt(s);
2030 if ((sqobj != profileseq) && (sqobj.getLength() >= left))
2032 String sq = sqobj.getSequenceAsString();
2034 sq.substring(0, left) + sb.toString() + sq.substring(left));
2040 public Hashtable[] getComplementConsensusHash()
2042 return codonConsensus;
2046 public Conservation getConservation()
2048 return conservation;
2052 public Hashtable[] getRnaStructureConsensusHash()
2054 return rnaStructureConsensus;
2058 public ProfilesI getSequenceConsensusHash()
2064 public void setComplementConsensusHash(Hashtable[] hconsensus)
2066 codonConsensus = hconsensus;
2071 public void setConservation(Conservation cons)
2073 conservation = cons;
2078 public void setRnaStructureConsensusHash(Hashtable[] hStrucConsensus)
2080 rnaStructureConsensus = hStrucConsensus;
2085 public void setSequenceConsensusHash(ProfilesI hconsensus)
2087 consensus = hconsensus;