2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.datamodel;
23 import jalview.analysis.AlignmentUtils;
24 import jalview.datamodel.AlignedCodonFrame.SequenceToSequenceMapping;
25 import jalview.io.FastaFile;
26 import jalview.util.Comparison;
27 import jalview.util.LinkedIdentityHashSet;
28 import jalview.util.MessageManager;
30 import java.util.ArrayList;
31 import java.util.Arrays;
32 import java.util.BitSet;
33 import java.util.Collections;
34 import java.util.Enumeration;
35 import java.util.HashSet;
36 import java.util.Hashtable;
37 import java.util.Iterator;
38 import java.util.List;
41 import java.util.Vector;
44 * Data structure to hold and manipulate a multiple sequence alignment
50 public class Alignment implements AlignmentI
52 private Alignment dataset;
54 private List<SequenceI> sequences;
56 private SequenceI hmmConsensus;
58 protected List<SequenceGroup> groups;
60 protected char gapCharacter = '-';
62 private boolean nucleotide = true;
64 public boolean hasRNAStructure = false;
66 public AlignmentAnnotation[] annotations;
68 HiddenSequences hiddenSequences;
70 HiddenColumns hiddenCols;
72 public Hashtable alignmentProperties;
74 private List<AlignedCodonFrame> codonFrameList;
76 private void initAlignment(SequenceI[] seqs)
78 groups = Collections.synchronizedList(new ArrayList<SequenceGroup>());
79 hiddenSequences = new HiddenSequences(this);
80 hiddenCols = new HiddenColumns();
81 codonFrameList = new ArrayList<>();
83 nucleotide = Comparison.isNucleotide(seqs);
85 sequences = Collections.synchronizedList(new ArrayList<SequenceI>());
87 for (int i = 0; i < seqs.length; i++)
89 sequences.add(seqs[i]);
95 * Make a 'copy' alignment - sequences have new copies of features and
96 * annotations, but share the original dataset sequences.
98 public Alignment(AlignmentI al)
100 SequenceI[] seqs = al.getSequencesArray();
101 for (int i = 0; i < seqs.length; i++)
103 seqs[i] = new Sequence(seqs[i]);
109 * Share the same dataset sequence mappings (if any).
111 if (dataset == null && al.getDataset() == null)
113 this.setCodonFrames(al.getCodonFrames());
118 * Make an alignment from an array of Sequences.
122 public Alignment(SequenceI[] seqs)
128 * Make a new alignment from an array of SeqCigars
133 public Alignment(SeqCigar[] alseqs)
135 SequenceI[] seqs = SeqCigar.createAlignmentSequences(alseqs,
136 gapCharacter, new HiddenColumns(), null);
141 * Make a new alignment from an CigarArray JBPNote - can only do this when
142 * compactAlignment does not contain hidden regions. JBPNote - must also check
143 * that compactAlignment resolves to a set of SeqCigars - or construct them
146 * @param compactAlignment
149 public static AlignmentI createAlignment(CigarArray compactAlignment)
151 throw new Error(MessageManager
152 .getString("error.alignment_cigararray_not_implemented"));
153 // this(compactAlignment.refCigars);
157 public List<SequenceI> getSequences()
163 public List<SequenceI> getSequences(
164 Map<SequenceI, SequenceCollectionI> hiddenReps)
166 // TODO: in jalview 2.8 we don't do anything with hiddenreps - fix design to
172 public SequenceI[] getSequencesArray()
174 if (sequences == null)
178 synchronized (sequences)
180 return sequences.toArray(new SequenceI[sequences.size()]);
185 * Returns a map of lists of sequences keyed by sequence name.
190 public Map<String, List<SequenceI>> getSequencesByName()
192 return AlignmentUtils.getSequencesByName(this);
196 public SequenceI getSequenceAt(int i)
198 synchronized (sequences)
200 if (i > -1 && i < sequences.size())
202 return sequences.get(i);
210 public SequenceI getSequenceAtAbsoluteIndex(int i)
212 SequenceI seq = null;
213 if (getHiddenSequences().getSize() > 0)
215 seq = getHiddenSequences().getHiddenSequence(i);
218 // didn't find the sequence in the hidden sequences, get it from the
220 int index = getHiddenSequences().findIndexWithoutHiddenSeqs(i);
221 seq = getSequenceAt(index);
226 seq = getSequenceAt(i);
232 * Adds a sequence to the alignment. Recalculates maxLength and size. Note
233 * this currently does not recalculate whether or not the alignment is
234 * nucleotide, so mixed alignments may have undefined behaviour.
239 public void addSequence(SequenceI snew)
244 // maintain dataset integrity
245 SequenceI dsseq = snew.getDatasetSequence();
248 // derive new sequence
249 SequenceI adding = snew.deriveSequence();
251 dsseq = snew.getDatasetSequence();
253 if (getDataset().findIndex(dsseq) == -1)
255 getDataset().addSequence(dsseq);
259 if (sequences == null)
261 initAlignment(new SequenceI[] { snew });
265 synchronized (sequences)
270 if (hiddenSequences != null)
272 hiddenSequences.adjustHeightSequenceAdded();
277 public SequenceI replaceSequenceAt(int i, SequenceI snew)
279 synchronized (sequences)
281 if (sequences.size() > i)
283 return sequences.set(i, snew);
289 hiddenSequences.adjustHeightSequenceAdded();
296 * Inserts a sequence at a point in the alignment.
299 * the index of the position the sequence is to be inserted in.
302 public void insertSequenceAt(int i, SequenceI snew)
304 synchronized (sequences)
306 if (sequences.size() > i)
308 sequences.add(i, snew);
315 hiddenSequences.adjustHeightSequenceAdded();
324 * @return DOCUMENT ME!
327 public List<SequenceGroup> getGroups()
333 public void finalize() throws Throwable
335 if (getDataset() != null)
337 getDataset().removeAlignmentRef();
345 * Defensively nulls out references in case this object is not garbage
348 void nullReferences()
354 hiddenSequences = null;
358 * decrement the alignmentRefs counter by one and null references if it goes
363 private void removeAlignmentRef() throws Throwable
365 if (--alignmentRefs == 0)
372 public void deleteSequence(SequenceI s)
374 synchronized (sequences)
376 deleteSequence(findIndex(s));
381 public void deleteSequence(int i)
383 synchronized (sequences)
385 if (i > -1 && i < getHeight())
388 hiddenSequences.adjustHeightSequenceDeleted(i);
394 public void deleteHiddenSequence(int i)
396 synchronized (sequences)
398 if (i > -1 && i < getHeight())
408 * @see jalview.datamodel.AlignmentI#findGroup(jalview.datamodel.SequenceI)
411 public SequenceGroup findGroup(SequenceI seq, int position)
413 synchronized (groups)
415 for (SequenceGroup sg : groups)
417 if (sg.getSequences(null).contains(seq))
419 if (position >= sg.getStartRes() && position <= sg.getEndRes())
433 * jalview.datamodel.AlignmentI#findAllGroups(jalview.datamodel.SequenceI)
436 public SequenceGroup[] findAllGroups(SequenceI s)
438 ArrayList<SequenceGroup> temp = new ArrayList<>();
440 synchronized (groups)
442 int gSize = groups.size();
443 for (int i = 0; i < gSize; i++)
445 SequenceGroup sg = groups.get(i);
446 if (sg == null || sg.getSequences() == null)
448 this.deleteGroup(sg);
453 if (sg.getSequences().contains(s))
459 SequenceGroup[] ret = new SequenceGroup[temp.size()];
460 return temp.toArray(ret);
465 public void addGroup(SequenceGroup sg)
467 synchronized (groups)
469 if (!groups.contains(sg))
471 if (hiddenSequences.getSize() > 0)
473 int i, iSize = sg.getSize();
474 for (i = 0; i < iSize; i++)
476 if (!sequences.contains(sg.getSequenceAt(i)))
478 sg.deleteSequence(sg.getSequenceAt(i), false);
484 if (sg.getSize() < 1)
489 sg.setContext(this, true);
496 * remove any annotation that references gp
499 * (if null, removes all group associated annotation)
501 private void removeAnnotationForGroup(SequenceGroup gp)
503 if (annotations == null || annotations.length == 0)
507 // remove annotation very quickly
508 AlignmentAnnotation[] t,
509 todelete = new AlignmentAnnotation[annotations.length],
510 tokeep = new AlignmentAnnotation[annotations.length];
514 for (i = 0, p = 0, k = 0; i < annotations.length; i++)
516 if (annotations[i].groupRef != null)
518 todelete[p++] = annotations[i];
522 tokeep[k++] = annotations[i];
528 for (i = 0, p = 0, k = 0; i < annotations.length; i++)
530 if (annotations[i].groupRef == gp)
532 todelete[p++] = annotations[i];
536 tokeep[k++] = annotations[i];
542 // clear out the group associated annotation.
543 for (i = 0; i < p; i++)
545 unhookAnnotation(todelete[i]);
548 t = new AlignmentAnnotation[k];
549 for (i = 0; i < k; i++)
558 public void deleteAllGroups()
560 synchronized (groups)
562 if (annotations != null)
564 removeAnnotationForGroup(null);
566 for (SequenceGroup sg : groups)
568 sg.setContext(null, false);
576 public void deleteGroup(SequenceGroup g)
578 synchronized (groups)
580 if (groups.contains(g))
582 removeAnnotationForGroup(g);
584 g.setContext(null, false);
591 public SequenceI findName(String name)
593 return findName(name, false);
599 * @see jalview.datamodel.AlignmentI#findName(java.lang.String, boolean)
602 public SequenceI findName(String token, boolean b)
604 return findName(null, token, b);
610 * @see jalview.datamodel.AlignmentI#findName(SequenceI, java.lang.String,
614 public SequenceI findName(SequenceI startAfter, String token, boolean b)
619 String sqname = null;
620 if (startAfter != null)
622 // try to find the sequence in the alignment
623 boolean matched = false;
624 while (i < sequences.size())
626 if (getSequenceAt(i++) == startAfter)
637 while (i < sequences.size())
639 sq = getSequenceAt(i);
640 sqname = sq.getName();
641 if (sqname.equals(token) // exact match
642 || (b && // allow imperfect matches - case varies
643 (sqname.equalsIgnoreCase(token))))
645 return getSequenceAt(i);
655 public SequenceI[] findSequenceMatch(String name)
657 Vector matches = new Vector();
660 while (i < sequences.size())
662 if (getSequenceAt(i).getName().equals(name))
664 matches.addElement(getSequenceAt(i));
669 SequenceI[] result = new SequenceI[matches.size()];
670 for (i = 0; i < result.length; i++)
672 result[i] = (SequenceI) matches.elementAt(i);
682 * @see jalview.datamodel.AlignmentI#findIndex(jalview.datamodel.SequenceI)
685 public int findIndex(SequenceI s)
689 while (i < sequences.size())
691 if (s == getSequenceAt(i))
706 * jalview.datamodel.AlignmentI#findIndex(jalview.datamodel.SearchResults)
709 public int findIndex(SearchResultsI results)
713 while (i < sequences.size())
715 if (results.involvesSequence(getSequenceAt(i)))
725 public int getHeight()
727 return sequences.size();
731 public int getAbsoluteHeight()
733 return sequences.size() + getHiddenSequences().getSize();
737 public int getWidth()
741 for (int i = 0; i < sequences.size(); i++)
743 if (getSequenceAt(i).getLength() > maxLength)
745 maxLength = getSequenceAt(i).getLength();
753 public int getWidth()
755 final Wrapper temp = new Wrapper();
757 forEachSequence(new Consumer<SequenceI>()
760 public void accept(SequenceI s)
762 if (s.getLength() > temp.inner)
764 temp.inner = s.getLength();
767 }, 0, sequences.size() - 1);
772 public static class Wrapper
784 public void setGapCharacter(char gc)
787 synchronized (sequences)
789 for (SequenceI seq : sequences)
791 seq.setSequence(seq.getSequenceAsString().replace('.', gc)
792 .replace('-', gc).replace(' ', gc));
800 * @return DOCUMENT ME!
803 public char getGapCharacter()
811 * @see jalview.datamodel.AlignmentI#isAligned()
814 public boolean isAligned()
816 return isAligned(false);
822 * @see jalview.datamodel.AlignmentI#isAligned(boolean)
825 public boolean isAligned(boolean includeHidden)
827 int width = getWidth();
828 if (hiddenSequences == null || hiddenSequences.getSize() == 0)
830 includeHidden = true; // no hidden sequences to check against.
832 for (int i = 0; i < sequences.size(); i++)
834 if (includeHidden || !hiddenSequences.isHidden(getSequenceAt(i)))
836 if (getSequenceAt(i).getLength() != width)
847 public boolean isHidden(int alignmentIndex)
849 return (getHiddenSequences().getHiddenSequence(alignmentIndex) != null);
853 * Delete all annotations, including auto-calculated if the flag is set true.
854 * Returns true if at least one annotation was deleted, else false.
856 * @param includingAutoCalculated
860 public boolean deleteAllAnnotations(boolean includingAutoCalculated)
862 boolean result = false;
863 for (AlignmentAnnotation alan : getAlignmentAnnotation())
865 if (!alan.autoCalculated || includingAutoCalculated)
867 deleteAnnotation(alan);
877 * @seejalview.datamodel.AlignmentI#deleteAnnotation(jalview.datamodel.
878 * AlignmentAnnotation)
881 public boolean deleteAnnotation(AlignmentAnnotation aa)
883 return deleteAnnotation(aa, true);
887 public boolean deleteAnnotation(AlignmentAnnotation aa, boolean unhook)
891 if (annotations != null)
893 aSize = annotations.length;
901 AlignmentAnnotation[] temp = new AlignmentAnnotation[aSize - 1];
903 boolean swap = false;
906 for (int i = 0; i < aSize; i++)
908 if (annotations[i] == aa)
913 if (tIndex < temp.length)
915 temp[tIndex++] = annotations[i];
924 unhookAnnotation(aa);
931 * remove any object references associated with this annotation
935 private void unhookAnnotation(AlignmentAnnotation aa)
937 if (aa.sequenceRef != null)
939 aa.sequenceRef.removeAlignmentAnnotation(aa);
941 if (aa.groupRef != null)
943 // probably need to do more here in the future (post 2.5.0)
951 * @seejalview.datamodel.AlignmentI#addAnnotation(jalview.datamodel.
952 * AlignmentAnnotation)
955 public void addAnnotation(AlignmentAnnotation aa)
957 addAnnotation(aa, -1);
963 * @seejalview.datamodel.AlignmentI#addAnnotation(jalview.datamodel.
964 * AlignmentAnnotation, int)
967 public void addAnnotation(AlignmentAnnotation aa, int pos)
969 if (aa.getRNAStruc() != null)
971 hasRNAStructure = true;
975 if (annotations != null)
977 aSize = annotations.length + 1;
980 AlignmentAnnotation[] temp = new AlignmentAnnotation[aSize];
982 if (pos == -1 || pos >= aSize)
984 temp[aSize - 1] = aa;
993 for (i = 0; i < (aSize - 1); i++, p++)
1001 temp[p] = annotations[i];
1010 public void setAnnotationIndex(AlignmentAnnotation aa, int index)
1012 if (aa == null || annotations == null || annotations.length - 1 < index)
1017 int aSize = annotations.length;
1018 AlignmentAnnotation[] temp = new AlignmentAnnotation[aSize];
1022 for (int i = 0; i < aSize; i++)
1031 temp[i] = annotations[i];
1035 temp[i] = annotations[i - 1];
1044 * returns all annotation on the alignment
1046 public AlignmentAnnotation[] getAlignmentAnnotation()
1052 public boolean isNucleotide()
1058 public boolean hasRNAStructure()
1060 // TODO can it happen that structure is removed from alignment?
1061 return hasRNAStructure;
1065 public void setDataset(AlignmentI data)
1067 if (dataset == null && data == null)
1069 createDatasetAlignment();
1071 else if (dataset == null && data != null)
1075 throw new IllegalArgumentException("Circular dataset reference");
1077 if (!(data instanceof Alignment))
1080 "Implementation Error: jalview.datamodel.Alignment does not yet support other implementations of AlignmentI as its dataset reference");
1082 dataset = (Alignment) data;
1083 for (int i = 0; i < getHeight(); i++)
1085 SequenceI currentSeq = getSequenceAt(i);
1086 SequenceI dsq = currentSeq.getDatasetSequence();
1089 dsq = currentSeq.createDatasetSequence();
1090 dataset.addSequence(dsq);
1094 while (dsq.getDatasetSequence() != null)
1096 dsq = dsq.getDatasetSequence();
1098 if (dataset.findIndex(dsq) == -1)
1100 dataset.addSequence(dsq);
1105 dataset.addAlignmentRef();
1109 * add dataset sequences to seq for currentSeq and any sequences it references
1111 private void resolveAndAddDatasetSeq(SequenceI currentSeq,
1112 Set<SequenceI> seqs, boolean createDatasetSequence)
1114 SequenceI alignedSeq = currentSeq;
1115 if (currentSeq.getDatasetSequence() != null)
1117 currentSeq = currentSeq.getDatasetSequence();
1121 if (createDatasetSequence)
1123 currentSeq = currentSeq.createDatasetSequence();
1127 List<SequenceI> toProcess = new ArrayList<>();
1128 toProcess.add(currentSeq);
1129 while (toProcess.size() > 0)
1132 SequenceI curDs = toProcess.remove(0);
1134 if (!seqs.add(curDs))
1138 // iterate over database references, making sure we add forward referenced
1140 if (curDs.getDBRefs() != null)
1142 for (DBRefEntry dbr : curDs.getDBRefs())
1144 if (dbr.getMap() != null && dbr.getMap().getTo() != null)
1146 if (dbr.getMap().getTo() == alignedSeq)
1149 * update mapping to be to the newly created dataset sequence
1151 dbr.getMap().setTo(currentSeq);
1153 if (dbr.getMap().getTo().getDatasetSequence() != null)
1155 throw new Error("Implementation error: Map.getTo() for dbref "
1156 + dbr + " from " + curDs.getName()
1157 + " is not a dataset sequence.");
1159 // we recurse to add all forward references to dataset sequences via
1161 toProcess.add(dbr.getMap().getTo());
1169 * Creates a new dataset for this alignment. Can only be done once - if
1170 * dataset is not null this will not be performed.
1172 public void createDatasetAlignment()
1174 if (dataset != null)
1178 // try to avoid using SequenceI.equals at this stage, it will be expensive
1179 Set<SequenceI> seqs = new LinkedIdentityHashSet<>();
1181 for (int i = 0; i < getHeight(); i++)
1183 SequenceI currentSeq = getSequenceAt(i);
1184 resolveAndAddDatasetSeq(currentSeq, seqs, true);
1187 // verify all mappings are in dataset
1188 for (AlignedCodonFrame cf : codonFrameList)
1190 for (SequenceToSequenceMapping ssm : cf.getMappings())
1192 if (!seqs.contains(ssm.getFromSeq()))
1194 resolveAndAddDatasetSeq(ssm.getFromSeq(), seqs, false);
1196 if (!seqs.contains(ssm.getMapping().getTo()))
1198 resolveAndAddDatasetSeq(ssm.getMapping().getTo(), seqs, false);
1202 // finally construct dataset
1203 dataset = new Alignment(seqs.toArray(new SequenceI[seqs.size()]));
1204 // move mappings to the dataset alignment
1205 dataset.codonFrameList = this.codonFrameList;
1206 this.codonFrameList = null;
1210 * reference count for number of alignments referencing this one.
1212 int alignmentRefs = 0;
1215 * increase reference count to this alignment.
1217 private void addAlignmentRef()
1223 public Alignment getDataset()
1229 public boolean padGaps()
1231 boolean modified = false;
1233 // Remove excess gaps from the end of alignment
1237 for (int i = 0; i < sequences.size(); i++)
1239 current = getSequenceAt(i);
1240 for (int j = current.getLength(); j > maxLength; j--)
1243 && !jalview.util.Comparison.isGap(current.getCharAt(j)))
1254 for (int i = 0; i < sequences.size(); i++)
1256 current = getSequenceAt(i);
1257 cLength = current.getLength();
1259 if (cLength < maxLength)
1261 current.insertCharAt(cLength, maxLength - cLength, gapCharacter);
1264 else if (current.getLength() > maxLength)
1266 current.deleteChars(maxLength, current.getLength());
1273 * Justify the sequences to the left or right by deleting and inserting gaps
1274 * before the initial residue or after the terminal residue
1277 * true if alignment padded to right, false to justify to left
1278 * @return true if alignment was changed
1281 public boolean justify(boolean right)
1283 boolean modified = false;
1285 // Remove excess gaps from the end of alignment
1287 int ends[] = new int[sequences.size() * 2];
1289 for (int i = 0; i < sequences.size(); i++)
1291 current = getSequenceAt(i);
1292 // This should really be a sequence method
1293 ends[i * 2] = current.findIndex(current.getStart());
1294 ends[i * 2 + 1] = current
1295 .findIndex(current.getStart() + current.getLength());
1296 boolean hitres = false;
1297 for (int j = 0, rs = 0, ssiz = current.getLength(); j < ssiz; j++)
1299 if (!jalview.util.Comparison.isGap(current.getCharAt(j)))
1308 ends[i * 2 + 1] = j;
1309 if (j - ends[i * 2] > maxLength)
1311 maxLength = j - ends[i * 2];
1319 // now edit the flanking gaps to justify to either left or right
1320 int cLength, extent, diff;
1321 for (int i = 0; i < sequences.size(); i++)
1323 current = getSequenceAt(i);
1325 cLength = 1 + ends[i * 2 + 1] - ends[i * 2];
1326 diff = maxLength - cLength; // number of gaps to indent
1327 extent = current.getLength();
1331 if (extent > ends[i * 2 + 1])
1333 current.deleteChars(ends[i * 2 + 1] + 1, extent);
1336 if (ends[i * 2] > diff)
1338 current.deleteChars(0, ends[i * 2] - diff);
1343 if (ends[i * 2] < diff)
1345 current.insertCharAt(0, diff - ends[i * 2], gapCharacter);
1353 if (ends[i * 2] > 0)
1355 current.deleteChars(0, ends[i * 2]);
1357 ends[i * 2 + 1] -= ends[i * 2];
1358 extent -= ends[i * 2];
1360 if (extent > maxLength)
1362 current.deleteChars(maxLength + 1, extent);
1367 if (extent < maxLength)
1369 current.insertCharAt(extent, maxLength - extent, gapCharacter);
1379 public HiddenSequences getHiddenSequences()
1381 return hiddenSequences;
1385 public HiddenColumns getHiddenColumns()
1391 public CigarArray getCompactAlignment()
1393 synchronized (sequences)
1395 SeqCigar alseqs[] = new SeqCigar[sequences.size()];
1397 for (SequenceI seq : sequences)
1399 alseqs[i++] = new SeqCigar(seq);
1401 CigarArray cal = new CigarArray(alseqs);
1402 cal.addOperation(CigarArray.M, getWidth());
1408 public void setProperty(Object key, Object value)
1410 if (alignmentProperties == null)
1412 alignmentProperties = new Hashtable();
1415 alignmentProperties.put(key, value);
1419 public Object getProperty(Object key)
1421 if (alignmentProperties != null)
1423 return alignmentProperties.get(key);
1432 public Hashtable getProperties()
1434 return alignmentProperties;
1438 * Adds the given mapping to the stored set. Note this may be held on the
1439 * dataset alignment.
1442 public void addCodonFrame(AlignedCodonFrame codons)
1444 List<AlignedCodonFrame> acfs = getCodonFrames();
1445 if (codons != null && acfs != null && !acfs.contains(codons))
1455 * jalview.datamodel.AlignmentI#getCodonFrame(jalview.datamodel.SequenceI)
1458 public List<AlignedCodonFrame> getCodonFrame(SequenceI seq)
1464 List<AlignedCodonFrame> cframes = new ArrayList<>();
1465 for (AlignedCodonFrame acf : getCodonFrames())
1467 if (acf.involvesSequence(seq))
1476 * Sets the codon frame mappings (replacing any existing mappings). Note the
1477 * mappings are set on the dataset alignment instead if there is one.
1479 * @see jalview.datamodel.AlignmentI#setCodonFrames()
1482 public void setCodonFrames(List<AlignedCodonFrame> acfs)
1484 if (dataset != null)
1486 dataset.setCodonFrames(acfs);
1490 this.codonFrameList = acfs;
1495 * Returns the set of codon frame mappings. Any changes to the returned set
1496 * will affect the alignment. The mappings are held on (and read from) the
1497 * dataset alignment if there is one.
1499 * @see jalview.datamodel.AlignmentI#getCodonFrames()
1502 public List<AlignedCodonFrame> getCodonFrames()
1504 // TODO: Fix this method to fix failing AlignedCodonFrame tests
1505 // this behaviour is currently incorrect. method should return codon frames
1506 // for just the alignment,
1507 // selected from dataset
1508 return dataset != null ? dataset.getCodonFrames() : codonFrameList;
1512 * Removes the given mapping from the stored set. Note that the mappings are
1513 * held on the dataset alignment if there is one.
1516 public boolean removeCodonFrame(AlignedCodonFrame codons)
1518 List<AlignedCodonFrame> acfs = getCodonFrames();
1519 if (codons == null || acfs == null)
1523 return acfs.remove(codons);
1527 public void append(AlignmentI toappend)
1529 // TODO JAL-1270 needs test coverage
1530 // currently tested for use in jalview.gui.SequenceFetcher
1531 char oldc = toappend.getGapCharacter();
1532 boolean samegap = oldc == getGapCharacter();
1533 boolean hashidden = toappend.getHiddenSequences() != null
1534 && toappend.getHiddenSequences().hiddenSequences != null;
1535 // get all sequences including any hidden ones
1536 List<SequenceI> sqs = (hashidden)
1537 ? toappend.getHiddenSequences().getFullAlignment()
1539 : toappend.getSequences();
1542 // avoid self append deadlock by
1543 List<SequenceI> toappendsq = new ArrayList<>();
1546 for (SequenceI addedsq : sqs)
1550 addedsq.replace(oldc, gapCharacter);
1552 toappendsq.add(addedsq);
1555 for (SequenceI addedsq : toappendsq)
1557 addSequence(addedsq);
1560 AlignmentAnnotation[] alan = toappend.getAlignmentAnnotation();
1561 for (int a = 0; alan != null && a < alan.length; a++)
1563 addAnnotation(alan[a]);
1567 getCodonFrames().addAll(toappend.getCodonFrames());
1569 List<SequenceGroup> sg = toappend.getGroups();
1572 for (SequenceGroup _sg : sg)
1577 if (toappend.getHiddenSequences() != null)
1579 HiddenSequences hs = toappend.getHiddenSequences();
1580 if (hiddenSequences == null)
1582 hiddenSequences = new HiddenSequences(this);
1584 if (hs.hiddenSequences != null)
1586 for (int s = 0; s < hs.hiddenSequences.length; s++)
1588 // hide the newly appended sequence in the alignment
1589 if (hs.hiddenSequences[s] != null)
1591 hiddenSequences.hideSequence(hs.hiddenSequences[s]);
1596 if (toappend.getProperties() != null)
1598 // we really can't do very much here - just try to concatenate strings
1599 // where property collisions occur.
1600 Enumeration key = toappend.getProperties().keys();
1601 while (key.hasMoreElements())
1603 Object k = key.nextElement();
1604 Object ourval = this.getProperty(k);
1605 Object toapprop = toappend.getProperty(k);
1608 if (ourval.getClass().equals(toapprop.getClass())
1609 && !ourval.equals(toapprop))
1611 if (ourval instanceof String)
1615 ((String) ourval) + "; " + ((String) toapprop));
1619 if (ourval instanceof Vector)
1622 Enumeration theirv = ((Vector) toapprop).elements();
1623 while (theirv.hasMoreElements())
1625 ((Vector) ourval).addElement(theirv);
1633 // just add new property directly
1634 setProperty(k, toapprop);
1642 public AlignmentAnnotation findOrCreateAnnotation(String name,
1643 String calcId, boolean autoCalc, SequenceI seqRef,
1644 SequenceGroup groupRef)
1646 if (annotations != null)
1648 for (AlignmentAnnotation annot : getAlignmentAnnotation())
1650 if (annot.autoCalculated == autoCalc && (name.equals(annot.label))
1651 && (calcId == null || annot.getCalcId().equals(calcId))
1652 && annot.sequenceRef == seqRef
1653 && annot.groupRef == groupRef)
1659 AlignmentAnnotation annot = new AlignmentAnnotation(name, name,
1660 new Annotation[1], 0f, 0f, AlignmentAnnotation.BAR_GRAPH);
1661 annot.hasText = false;
1664 annot.setCalcId(calcId);
1666 annot.autoCalculated = autoCalc;
1669 annot.setSequenceRef(seqRef);
1671 annot.groupRef = groupRef;
1672 addAnnotation(annot);
1678 public Iterable<AlignmentAnnotation> findAnnotation(String calcId)
1680 AlignmentAnnotation[] alignmentAnnotation = getAlignmentAnnotation();
1681 if (alignmentAnnotation != null)
1683 return AlignmentAnnotation.findAnnotation(
1684 Arrays.asList(getAlignmentAnnotation()), calcId);
1686 return Arrays.asList(new AlignmentAnnotation[] {});
1690 public Iterable<AlignmentAnnotation> findAnnotations(SequenceI seq,
1691 String calcId, String label)
1693 return AlignmentAnnotation.findAnnotations(
1694 Arrays.asList(getAlignmentAnnotation()), seq, calcId, label);
1698 public void moveSelectedSequencesByOne(SequenceGroup sg,
1699 Map<SequenceI, SequenceCollectionI> map, boolean up)
1701 synchronized (sequences)
1706 for (int i = 1, iSize = sequences.size(); i < iSize; i++)
1708 SequenceI seq = sequences.get(i);
1709 if (!sg.getSequences(map).contains(seq))
1714 SequenceI temp = sequences.get(i - 1);
1715 if (sg.getSequences(null).contains(temp))
1720 sequences.set(i, temp);
1721 sequences.set(i - 1, seq);
1726 for (int i = sequences.size() - 2; i > -1; i--)
1728 SequenceI seq = sequences.get(i);
1729 if (!sg.getSequences(map).contains(seq))
1734 SequenceI temp = sequences.get(i + 1);
1735 if (sg.getSequences(map).contains(temp))
1740 sequences.set(i, temp);
1741 sequences.set(i + 1, seq);
1749 public void validateAnnotation(AlignmentAnnotation alignmentAnnotation)
1751 alignmentAnnotation.validateRangeAndDisplay();
1752 if (isNucleotide() && alignmentAnnotation.isValidStruc())
1754 hasRNAStructure = true;
1758 private SequenceI seqrep = null;
1762 * @return the representative sequence for this group
1765 public SequenceI getSeqrep()
1771 * set the representative sequence for this group. Note - this affects the
1772 * interpretation of the Hidereps attribute.
1775 * the seqrep to set (null means no sequence representative)
1778 public void setSeqrep(SequenceI seqrep)
1780 this.seqrep = seqrep;
1785 * @return true if group has a sequence representative
1788 public boolean hasSeqrep()
1790 return seqrep != null;
1794 public int getEndRes()
1796 return getWidth() - 1;
1800 public int getStartRes()
1806 * In the case of AlignmentI - returns the dataset for the alignment, if set
1809 * @see jalview.datamodel.AnnotatedCollectionI#getContext()
1812 public AnnotatedCollectionI getContext()
1818 * Align this alignment like the given (mapped) one.
1821 public int alignAs(AlignmentI al)
1824 * Currently retains unmapped gaps (in introns), regaps mapped regions
1827 return alignAs(al, false, true);
1831 * Align this alignment 'the same as' the given one. Mapped sequences only are
1832 * realigned. If both of the same type (nucleotide/protein) then align both
1833 * identically. If this is nucleotide and the other is protein, make 3 gaps
1834 * for each gap in the protein sequences. If this is protein and the other is
1835 * nucleotide, insert a gap for each 3 gaps (or part thereof) between
1836 * nucleotide bases. If this is protein and the other is nucleotide, gaps
1837 * protein to match the relative ordering of codons in the nucleotide.
1839 * Parameters control whether gaps in exon (mapped) and intron (unmapped)
1840 * regions are preserved. Gaps that connect introns to exons are treated
1841 * conservatively, i.e. only preserved if both intron and exon gaps are
1842 * preserved. TODO: check caveats below where the implementation fails
1845 * - must have same dataset, and sequences in al must have equivalent
1846 * dataset sequence and start/end bounds under given mapping
1847 * @param preserveMappedGaps
1848 * if true, gaps within and between mapped codons are preserved
1849 * @param preserveUnmappedGaps
1850 * if true, gaps within and between unmapped codons are preserved
1853 public int alignAs(AlignmentI al, boolean preserveMappedGaps,
1854 boolean preserveUnmappedGaps)
1856 // TODO should this method signature be the one in the interface?
1857 // JBPComment - yes - neither flag is used, so should be deleted.
1858 boolean thisIsNucleotide = this.isNucleotide();
1859 boolean thatIsProtein = !al.isNucleotide();
1860 if (!thatIsProtein && !thisIsNucleotide)
1862 return AlignmentUtils.alignProteinAsDna(this, al);
1864 else if (thatIsProtein && thisIsNucleotide)
1866 return AlignmentUtils.alignCdsAsProtein(this, al);
1868 return AlignmentUtils.alignAs(this, al);
1872 * Returns the alignment in Fasta format. Behaviour of this method is not
1873 * guaranteed between versions.
1876 public String toString()
1878 return new FastaFile().print(getSequencesArray(), true);
1882 * Returns the set of distinct sequence names. No ordering is guaranteed.
1885 public Set<String> getSequenceNames()
1887 Set<String> names = new HashSet<>();
1888 for (SequenceI seq : getSequences())
1890 names.add(seq.getName());
1896 public boolean hasValidSequence()
1898 boolean hasValidSeq = false;
1899 for (SequenceI seq : getSequences())
1901 if ((seq.getEnd() - seq.getStart()) > 0)
1911 * Update any mappings to 'virtual' sequences to compatible real ones, if
1912 * present in the added sequences. Returns a count of mappings updated.
1918 public int realiseMappings(List<SequenceI> seqs)
1921 for (SequenceI seq : seqs)
1923 for (AlignedCodonFrame mapping : getCodonFrames())
1925 count += mapping.realiseWith(seq);
1932 * Returns the first AlignedCodonFrame that has a mapping between the given
1940 public AlignedCodonFrame getMapping(SequenceI mapFrom, SequenceI mapTo)
1942 for (AlignedCodonFrame acf : getCodonFrames())
1944 if (acf.getAaForDnaSeq(mapFrom) == mapTo)
1953 public void setHiddenColumns(HiddenColumns cols)
1959 public SequenceI getHmmConsensus()
1961 return hmmConsensus;
1965 public void setHmmConsensus(SequenceI hmmConsensus)
1967 this.hmmConsensus = hmmConsensus;
1971 public void setupJPredAlignment()
1973 SequenceI repseq = getSequenceAt(0);
1975 HiddenColumns cs = new HiddenColumns();
1976 cs.hideList(repseq.getInsertions());
1977 setHiddenColumns(cs);
1981 public HiddenColumns propagateInsertions(SequenceI profileseq,
1982 AlignmentView input)
1986 char gc = getGapCharacter();
1987 Object[] alandhidden = input.getAlignmentAndHiddenColumns(gc);
1988 HiddenColumns nview = (HiddenColumns) alandhidden[1];
1989 SequenceI origseq = ((SequenceI[]) alandhidden[0])[profsqpos];
1990 return propagateInsertions(profileseq, origseq, nview);
1996 * sequence in al which corresponds to origseq
1998 * alignment which is to have gaps inserted into it
2000 * sequence corresponding to profileseq which defines gap map for
2003 private HiddenColumns propagateInsertions(SequenceI profileseq,
2004 SequenceI origseq, HiddenColumns hc)
2006 // take the set of hidden columns, and the set of gaps in origseq,
2007 // and remove all the hidden gaps from hiddenColumns
2009 // first get the gaps as a Bitset
2010 // then calculate hidden ^ not(gap)
2011 BitSet gaps = origseq.gapBitset();
2014 // for each sequence in the alignment, except the profile sequence,
2015 // insert gaps corresponding to each hidden region but where each hidden
2016 // column region is shifted backwards by the number of preceding visible
2017 // gaps update hidden columns at the same time
2018 HiddenColumns newhidden = new HiddenColumns();
2020 int numGapsBefore = 0;
2021 int gapPosition = 0;
2022 Iterator<int[]> it = hc.iterator();
2023 while (it.hasNext())
2025 int[] region = it.next();
2027 // get region coordinates accounting for gaps
2028 // we can rely on gaps not being *in* hidden regions because we already
2030 while (gapPosition < region[0])
2033 if (gaps.get(gapPosition))
2039 int left = region[0] - numGapsBefore;
2040 int right = region[1] - numGapsBefore;
2042 newhidden.hideColumns(left, right);
2043 padGaps(left, right, profileseq);
2049 * Pad gaps in all sequences in alignment except profileseq
2052 * position of first gap to insert
2054 * position of last gap to insert
2056 * sequence not to pad
2058 private void padGaps(int left, int right, SequenceI profileseq)
2060 char gc = getGapCharacter();
2062 // make a string with number of gaps = length of hidden region
2063 StringBuilder sb = new StringBuilder();
2064 for (int g = 0; g < right - left + 1; g++)
2069 // loop over the sequences and pad with gaps where required
2070 for (int s = 0, ns = getHeight(); s < ns; s++)
2072 SequenceI sqobj = getSequenceAt(s);
2073 if ((sqobj != profileseq) && (sqobj.getLength() >= left))
2075 String sq = sqobj.getSequenceAsString();
2077 sq.substring(0, left) + sb.toString() + sq.substring(left));