2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.datamodel;
23 import jalview.analysis.AlignmentUtils;
24 import jalview.datamodel.AlignedCodonFrame.SequenceToSequenceMapping;
25 import jalview.io.FastaFile;
26 import jalview.util.Comparison;
27 import jalview.util.LinkedIdentityHashSet;
28 import jalview.util.MessageManager;
30 import java.util.ArrayList;
31 import java.util.Collections;
32 import java.util.Enumeration;
33 import java.util.HashSet;
34 import java.util.Hashtable;
35 import java.util.List;
38 import java.util.Vector;
41 * Data structure to hold and manipulate a multiple sequence alignment
47 public class Alignment implements AlignmentI
49 private Alignment dataset;
51 protected List<SequenceI> sequences;
53 protected List<SequenceGroup> groups;
55 protected char gapCharacter = '-';
57 private boolean nucleotide = true;
59 public boolean hasRNAStructure = false;
61 public AlignmentAnnotation[] annotations;
63 HiddenSequences hiddenSequences;
65 HiddenColumns hiddenCols;
67 public Hashtable alignmentProperties;
69 private List<AlignedCodonFrame> codonFrameList;
71 private void initAlignment(SequenceI[] seqs)
73 groups = Collections.synchronizedList(new ArrayList<SequenceGroup>());
74 hiddenSequences = new HiddenSequences(this);
75 hiddenCols = new HiddenColumns();
76 codonFrameList = new ArrayList<>();
78 nucleotide = Comparison.isNucleotide(seqs);
80 sequences = Collections.synchronizedList(new ArrayList<SequenceI>());
82 for (int i = 0; i < seqs.length; i++)
84 sequences.add(seqs[i]);
90 * Make a 'copy' alignment - sequences have new copies of features and
91 * annotations, but share the original dataset sequences.
93 public Alignment(AlignmentI al)
95 SequenceI[] seqs = al.getSequencesArray();
96 for (int i = 0; i < seqs.length; i++)
98 seqs[i] = new Sequence(seqs[i]);
104 * Share the same dataset sequence mappings (if any).
106 if (dataset == null && al.getDataset() == null)
108 this.setCodonFrames(al.getCodonFrames());
113 * Make an alignment from an array of Sequences.
117 public Alignment(SequenceI[] seqs)
123 * Make a new alignment from an array of SeqCigars
128 public Alignment(SeqCigar[] alseqs)
130 SequenceI[] seqs = SeqCigar.createAlignmentSequences(alseqs,
131 gapCharacter, new HiddenColumns(), null);
136 * Make a new alignment from an CigarArray JBPNote - can only do this when
137 * compactAlignment does not contain hidden regions. JBPNote - must also check
138 * that compactAlignment resolves to a set of SeqCigars - or construct them
141 * @param compactAlignment
144 public static AlignmentI createAlignment(CigarArray compactAlignment)
146 throw new Error(MessageManager
147 .getString("error.alignment_cigararray_not_implemented"));
148 // this(compactAlignment.refCigars);
152 public List<SequenceI> getSequences()
158 public List<SequenceI> getSequences(
159 Map<SequenceI, SequenceCollectionI> hiddenReps)
161 // TODO: in jalview 2.8 we don't do anything with hiddenreps - fix design to
167 public SequenceI[] getSequencesArray()
169 if (sequences == null)
173 synchronized (sequences)
175 return sequences.toArray(new SequenceI[sequences.size()]);
180 * Returns a map of lists of sequences keyed by sequence name.
185 public Map<String, List<SequenceI>> getSequencesByName()
187 return AlignmentUtils.getSequencesByName(this);
191 public SequenceI getSequenceAt(int i)
193 synchronized (sequences)
195 if (i > -1 && i < sequences.size())
197 return sequences.get(i);
204 public SequenceI getSequenceAtAbsoluteIndex(int i)
206 SequenceI seq = null;
207 if (getHiddenSequences().getSize() > 0)
209 seq = getHiddenSequences().getHiddenSequence(i);
212 // didn't find the sequence in the hidden sequences, get it from the
214 int index = getHiddenSequences().findIndexWithoutHiddenSeqs(i);
215 seq = getSequenceAt(index);
220 seq = getSequenceAt(i);
226 * Adds a sequence to the alignment. Recalculates maxLength and size. Note
227 * this currently does not recalculate whether or not the alignment is
228 * nucleotide, so mixed alignments may have undefined behaviour.
233 public void addSequence(SequenceI snew)
238 // maintain dataset integrity
239 SequenceI dsseq = snew.getDatasetSequence();
242 // derive new sequence
243 SequenceI adding = snew.deriveSequence();
245 dsseq = snew.getDatasetSequence();
247 if (getDataset().findIndex(dsseq) == -1)
249 getDataset().addSequence(dsseq);
253 if (sequences == null)
255 initAlignment(new SequenceI[] { snew });
259 synchronized (sequences)
264 if (hiddenSequences != null)
266 hiddenSequences.adjustHeightSequenceAdded();
271 public SequenceI replaceSequenceAt(int i, SequenceI snew)
273 synchronized (sequences)
275 if (sequences.size() > i)
277 return sequences.set(i, snew);
283 hiddenSequences.adjustHeightSequenceAdded();
290 * Inserts a sequence at a point in the alignment.
293 * the index of the position the sequence is to be inserted in.
296 public void insertSequenceAt(int i, SequenceI snew)
298 synchronized (sequences)
300 if (sequences.size() > i)
302 sequences.add(i, snew);
309 hiddenSequences.adjustHeightSequenceAdded();
318 * @return DOCUMENT ME!
321 public List<SequenceGroup> getGroups()
327 public void finalize() throws Throwable
329 if (getDataset() != null)
331 getDataset().removeAlignmentRef();
339 * Defensively nulls out references in case this object is not garbage
342 void nullReferences()
348 hiddenSequences = null;
352 * decrement the alignmentRefs counter by one and null references if it goes
357 private void removeAlignmentRef() throws Throwable
359 if (--alignmentRefs == 0)
366 public void deleteSequence(SequenceI s)
368 synchronized (sequences)
370 deleteSequence(findIndex(s));
375 public void deleteSequence(int i)
377 synchronized (sequences)
379 if (i > -1 && i < getHeight())
382 hiddenSequences.adjustHeightSequenceDeleted(i);
388 public void deleteHiddenSequence(int i)
390 synchronized (sequences)
392 if (i > -1 && i < getHeight())
402 * @see jalview.datamodel.AlignmentI#findGroup(jalview.datamodel.SequenceI)
405 public SequenceGroup findGroup(SequenceI seq, int position)
407 synchronized (groups)
409 for (SequenceGroup sg : groups)
411 if (sg.getSequences(null).contains(seq))
413 if (position >= sg.getStartRes() && position <= sg.getEndRes())
427 * jalview.datamodel.AlignmentI#findAllGroups(jalview.datamodel.SequenceI)
430 public SequenceGroup[] findAllGroups(SequenceI s)
432 ArrayList<SequenceGroup> temp = new ArrayList<>();
434 synchronized (groups)
436 int gSize = groups.size();
437 for (int i = 0; i < gSize; i++)
439 SequenceGroup sg = groups.get(i);
440 if (sg == null || sg.getSequences() == null)
442 this.deleteGroup(sg);
447 if (sg.getSequences().contains(s))
453 SequenceGroup[] ret = new SequenceGroup[temp.size()];
454 return temp.toArray(ret);
459 public void addGroup(SequenceGroup sg)
461 synchronized (groups)
463 if (!groups.contains(sg))
465 if (hiddenSequences.getSize() > 0)
467 int i, iSize = sg.getSize();
468 for (i = 0; i < iSize; i++)
470 if (!sequences.contains(sg.getSequenceAt(i)))
472 sg.deleteSequence(sg.getSequenceAt(i), false);
478 if (sg.getSize() < 1)
483 sg.setContext(this, true);
490 * remove any annotation that references gp
493 * (if null, removes all group associated annotation)
495 private void removeAnnotationForGroup(SequenceGroup gp)
497 if (annotations == null || annotations.length == 0)
501 // remove annotation very quickly
502 AlignmentAnnotation[] t,
503 todelete = new AlignmentAnnotation[annotations.length],
504 tokeep = new AlignmentAnnotation[annotations.length];
508 for (i = 0, p = 0, k = 0; i < annotations.length; i++)
510 if (annotations[i].groupRef != null)
512 todelete[p++] = annotations[i];
516 tokeep[k++] = annotations[i];
522 for (i = 0, p = 0, k = 0; i < annotations.length; i++)
524 if (annotations[i].groupRef == gp)
526 todelete[p++] = annotations[i];
530 tokeep[k++] = annotations[i];
536 // clear out the group associated annotation.
537 for (i = 0; i < p; i++)
539 unhookAnnotation(todelete[i]);
542 t = new AlignmentAnnotation[k];
543 for (i = 0; i < k; i++)
552 public void deleteAllGroups()
554 synchronized (groups)
556 if (annotations != null)
558 removeAnnotationForGroup(null);
560 for (SequenceGroup sg : groups)
562 sg.setContext(null, false);
570 public void deleteGroup(SequenceGroup g)
572 synchronized (groups)
574 if (groups.contains(g))
576 removeAnnotationForGroup(g);
578 g.setContext(null, false);
585 public SequenceI findName(String name)
587 return findName(name, false);
593 * @see jalview.datamodel.AlignmentI#findName(java.lang.String, boolean)
596 public SequenceI findName(String token, boolean b)
598 return findName(null, token, b);
604 * @see jalview.datamodel.AlignmentI#findName(SequenceI, java.lang.String,
608 public SequenceI findName(SequenceI startAfter, String token, boolean b)
613 String sqname = null;
614 if (startAfter != null)
616 // try to find the sequence in the alignment
617 boolean matched = false;
618 while (i < sequences.size())
620 if (getSequenceAt(i++) == startAfter)
631 while (i < sequences.size())
633 sq = getSequenceAt(i);
634 sqname = sq.getName();
635 if (sqname.equals(token) // exact match
636 || (b && // allow imperfect matches - case varies
637 (sqname.equalsIgnoreCase(token))))
639 return getSequenceAt(i);
649 public SequenceI[] findSequenceMatch(String name)
651 Vector matches = new Vector();
654 while (i < sequences.size())
656 if (getSequenceAt(i).getName().equals(name))
658 matches.addElement(getSequenceAt(i));
663 SequenceI[] result = new SequenceI[matches.size()];
664 for (i = 0; i < result.length; i++)
666 result[i] = (SequenceI) matches.elementAt(i);
676 * @see jalview.datamodel.AlignmentI#findIndex(jalview.datamodel.SequenceI)
679 public int findIndex(SequenceI s)
683 while (i < sequences.size())
685 if (s == getSequenceAt(i))
700 * jalview.datamodel.AlignmentI#findIndex(jalview.datamodel.SearchResults)
703 public int findIndex(SearchResultsI results)
707 while (i < sequences.size())
709 if (results.involvesSequence(getSequenceAt(i)))
719 public int getHeight()
721 return sequences.size();
725 public int getAbsoluteHeight()
727 return sequences.size() + getHiddenSequences().getSize();
731 public int getWidth()
735 for (int i = 0; i < sequences.size(); i++)
737 if (getSequenceAt(i).getLength() > maxLength)
739 maxLength = getSequenceAt(i).getLength();
753 public void setGapCharacter(char gc)
756 synchronized (sequences)
758 for (SequenceI seq : sequences)
760 seq.setSequence(seq.getSequenceAsString().replace('.', gc)
761 .replace('-', gc).replace(' ', gc));
769 * @return DOCUMENT ME!
772 public char getGapCharacter()
780 * @see jalview.datamodel.AlignmentI#isAligned()
783 public boolean isAligned()
785 return isAligned(false);
791 * @see jalview.datamodel.AlignmentI#isAligned(boolean)
794 public boolean isAligned(boolean includeHidden)
796 int width = getWidth();
797 if (hiddenSequences == null || hiddenSequences.getSize() == 0)
799 includeHidden = true; // no hidden sequences to check against.
801 for (int i = 0; i < sequences.size(); i++)
803 if (includeHidden || !hiddenSequences.isHidden(getSequenceAt(i)))
805 if (getSequenceAt(i).getLength() != width)
816 public boolean isHidden(int alignmentIndex)
818 return (getHiddenSequences().getHiddenSequence(alignmentIndex) != null);
822 * Delete all annotations, including auto-calculated if the flag is set true.
823 * Returns true if at least one annotation was deleted, else false.
825 * @param includingAutoCalculated
829 public boolean deleteAllAnnotations(boolean includingAutoCalculated)
831 boolean result = false;
832 for (AlignmentAnnotation alan : getAlignmentAnnotation())
834 if (!alan.autoCalculated || includingAutoCalculated)
836 deleteAnnotation(alan);
846 * @seejalview.datamodel.AlignmentI#deleteAnnotation(jalview.datamodel.
847 * AlignmentAnnotation)
850 public boolean deleteAnnotation(AlignmentAnnotation aa)
852 return deleteAnnotation(aa, true);
856 public boolean deleteAnnotation(AlignmentAnnotation aa, boolean unhook)
860 if (annotations != null)
862 aSize = annotations.length;
870 AlignmentAnnotation[] temp = new AlignmentAnnotation[aSize - 1];
872 boolean swap = false;
875 for (int i = 0; i < aSize; i++)
877 if (annotations[i] == aa)
882 if (tIndex < temp.length)
884 temp[tIndex++] = annotations[i];
893 unhookAnnotation(aa);
900 * remove any object references associated with this annotation
904 private void unhookAnnotation(AlignmentAnnotation aa)
906 if (aa.sequenceRef != null)
908 aa.sequenceRef.removeAlignmentAnnotation(aa);
910 if (aa.groupRef != null)
912 // probably need to do more here in the future (post 2.5.0)
920 * @seejalview.datamodel.AlignmentI#addAnnotation(jalview.datamodel.
921 * AlignmentAnnotation)
924 public void addAnnotation(AlignmentAnnotation aa)
926 addAnnotation(aa, -1);
932 * @seejalview.datamodel.AlignmentI#addAnnotation(jalview.datamodel.
933 * AlignmentAnnotation, int)
936 public void addAnnotation(AlignmentAnnotation aa, int pos)
938 if (aa.getRNAStruc() != null)
940 hasRNAStructure = true;
944 if (annotations != null)
946 aSize = annotations.length + 1;
949 AlignmentAnnotation[] temp = new AlignmentAnnotation[aSize];
951 if (pos == -1 || pos >= aSize)
953 temp[aSize - 1] = aa;
962 for (i = 0; i < (aSize - 1); i++, p++)
970 temp[p] = annotations[i];
979 public void setAnnotationIndex(AlignmentAnnotation aa, int index)
981 if (aa == null || annotations == null || annotations.length - 1 < index)
986 int aSize = annotations.length;
987 AlignmentAnnotation[] temp = new AlignmentAnnotation[aSize];
991 for (int i = 0; i < aSize; i++)
1000 temp[i] = annotations[i];
1004 temp[i] = annotations[i - 1];
1013 * returns all annotation on the alignment
1015 public AlignmentAnnotation[] getAlignmentAnnotation()
1021 public boolean isNucleotide()
1027 public boolean hasRNAStructure()
1029 // TODO can it happen that structure is removed from alignment?
1030 return hasRNAStructure;
1034 public void setDataset(AlignmentI data)
1036 if (dataset == null && data == null)
1038 createDatasetAlignment();
1040 else if (dataset == null && data != null)
1044 throw new IllegalArgumentException("Circular dataset reference");
1046 if (!(data instanceof Alignment))
1049 "Implementation Error: jalview.datamodel.Alignment does not yet support other implementations of AlignmentI as its dataset reference");
1051 dataset = (Alignment) data;
1052 for (int i = 0; i < getHeight(); i++)
1054 SequenceI currentSeq = getSequenceAt(i);
1055 SequenceI dsq = currentSeq.getDatasetSequence();
1058 dsq = currentSeq.createDatasetSequence();
1059 dataset.addSequence(dsq);
1063 while (dsq.getDatasetSequence() != null)
1065 dsq = dsq.getDatasetSequence();
1067 if (dataset.findIndex(dsq) == -1)
1069 dataset.addSequence(dsq);
1074 dataset.addAlignmentRef();
1078 * add dataset sequences to seq for currentSeq and any sequences it references
1080 private void resolveAndAddDatasetSeq(SequenceI currentSeq,
1081 Set<SequenceI> seqs, boolean createDatasetSequence)
1083 SequenceI alignedSeq = currentSeq;
1084 if (currentSeq.getDatasetSequence() != null)
1086 currentSeq = currentSeq.getDatasetSequence();
1090 if (createDatasetSequence)
1092 currentSeq = currentSeq.createDatasetSequence();
1096 List<SequenceI> toProcess = new ArrayList<>();
1097 toProcess.add(currentSeq);
1098 while (toProcess.size() > 0)
1101 SequenceI curDs = toProcess.remove(0);
1103 if (!seqs.add(curDs))
1107 // iterate over database references, making sure we add forward referenced
1109 if (curDs.getDBRefs() != null)
1111 for (DBRefEntry dbr : curDs.getDBRefs())
1113 if (dbr.getMap() != null && dbr.getMap().getTo() != null)
1115 if (dbr.getMap().getTo() == alignedSeq)
1118 * update mapping to be to the newly created dataset sequence
1120 dbr.getMap().setTo(currentSeq);
1122 if (dbr.getMap().getTo().getDatasetSequence() != null)
1124 throw new Error("Implementation error: Map.getTo() for dbref "
1125 + dbr + " from " + curDs.getName()
1126 + " is not a dataset sequence.");
1128 // we recurse to add all forward references to dataset sequences via
1130 toProcess.add(dbr.getMap().getTo());
1138 * Creates a new dataset for this alignment. Can only be done once - if
1139 * dataset is not null this will not be performed.
1141 public void createDatasetAlignment()
1143 if (dataset != null)
1147 // try to avoid using SequenceI.equals at this stage, it will be expensive
1148 Set<SequenceI> seqs = new LinkedIdentityHashSet<>();
1150 for (int i = 0; i < getHeight(); i++)
1152 SequenceI currentSeq = getSequenceAt(i);
1153 resolveAndAddDatasetSeq(currentSeq, seqs, true);
1156 // verify all mappings are in dataset
1157 for (AlignedCodonFrame cf : codonFrameList)
1159 for (SequenceToSequenceMapping ssm : cf.getMappings())
1161 if (!seqs.contains(ssm.getFromSeq()))
1163 resolveAndAddDatasetSeq(ssm.getFromSeq(), seqs, false);
1165 if (!seqs.contains(ssm.getMapping().getTo()))
1167 resolveAndAddDatasetSeq(ssm.getMapping().getTo(), seqs, false);
1171 // finally construct dataset
1172 dataset = new Alignment(seqs.toArray(new SequenceI[seqs.size()]));
1173 // move mappings to the dataset alignment
1174 dataset.codonFrameList = this.codonFrameList;
1175 this.codonFrameList = null;
1179 * reference count for number of alignments referencing this one.
1181 int alignmentRefs = 0;
1184 * increase reference count to this alignment.
1186 private void addAlignmentRef()
1192 public Alignment getDataset()
1198 public boolean padGaps()
1200 boolean modified = false;
1202 // Remove excess gaps from the end of alignment
1206 for (int i = 0; i < sequences.size(); i++)
1208 current = getSequenceAt(i);
1209 for (int j = current.getLength(); j > maxLength; j--)
1212 && !jalview.util.Comparison.isGap(current.getCharAt(j)))
1223 for (int i = 0; i < sequences.size(); i++)
1225 current = getSequenceAt(i);
1226 cLength = current.getLength();
1228 if (cLength < maxLength)
1230 current.insertCharAt(cLength, maxLength - cLength, gapCharacter);
1233 else if (current.getLength() > maxLength)
1235 current.deleteChars(maxLength, current.getLength());
1242 * Justify the sequences to the left or right by deleting and inserting gaps
1243 * before the initial residue or after the terminal residue
1246 * true if alignment padded to right, false to justify to left
1247 * @return true if alignment was changed
1250 public boolean justify(boolean right)
1252 boolean modified = false;
1254 // Remove excess gaps from the end of alignment
1256 int ends[] = new int[sequences.size() * 2];
1258 for (int i = 0; i < sequences.size(); i++)
1260 current = getSequenceAt(i);
1261 // This should really be a sequence method
1262 ends[i * 2] = current.findIndex(current.getStart());
1263 ends[i * 2 + 1] = current
1264 .findIndex(current.getStart() + current.getLength());
1265 boolean hitres = false;
1266 for (int j = 0, rs = 0, ssiz = current.getLength(); j < ssiz; j++)
1268 if (!jalview.util.Comparison.isGap(current.getCharAt(j)))
1277 ends[i * 2 + 1] = j;
1278 if (j - ends[i * 2] > maxLength)
1280 maxLength = j - ends[i * 2];
1288 // now edit the flanking gaps to justify to either left or right
1289 int cLength, extent, diff;
1290 for (int i = 0; i < sequences.size(); i++)
1292 current = getSequenceAt(i);
1294 cLength = 1 + ends[i * 2 + 1] - ends[i * 2];
1295 diff = maxLength - cLength; // number of gaps to indent
1296 extent = current.getLength();
1300 if (extent > ends[i * 2 + 1])
1302 current.deleteChars(ends[i * 2 + 1] + 1, extent);
1305 if (ends[i * 2] > diff)
1307 current.deleteChars(0, ends[i * 2] - diff);
1312 if (ends[i * 2] < diff)
1314 current.insertCharAt(0, diff - ends[i * 2], gapCharacter);
1322 if (ends[i * 2] > 0)
1324 current.deleteChars(0, ends[i * 2]);
1326 ends[i * 2 + 1] -= ends[i * 2];
1327 extent -= ends[i * 2];
1329 if (extent > maxLength)
1331 current.deleteChars(maxLength + 1, extent);
1336 if (extent < maxLength)
1338 current.insertCharAt(extent, maxLength - extent, gapCharacter);
1348 public HiddenSequences getHiddenSequences()
1350 return hiddenSequences;
1354 public HiddenColumns getHiddenColumns()
1360 public CigarArray getCompactAlignment()
1362 synchronized (sequences)
1364 SeqCigar alseqs[] = new SeqCigar[sequences.size()];
1366 for (SequenceI seq : sequences)
1368 alseqs[i++] = new SeqCigar(seq);
1370 CigarArray cal = new CigarArray(alseqs);
1371 cal.addOperation(CigarArray.M, getWidth());
1377 public void setProperty(Object key, Object value)
1379 if (alignmentProperties == null)
1381 alignmentProperties = new Hashtable();
1384 alignmentProperties.put(key, value);
1388 public Object getProperty(Object key)
1390 if (alignmentProperties != null)
1392 return alignmentProperties.get(key);
1401 public Hashtable getProperties()
1403 return alignmentProperties;
1407 * Adds the given mapping to the stored set. Note this may be held on the
1408 * dataset alignment.
1411 public void addCodonFrame(AlignedCodonFrame codons)
1413 List<AlignedCodonFrame> acfs = getCodonFrames();
1414 if (codons != null && acfs != null && !acfs.contains(codons))
1424 * jalview.datamodel.AlignmentI#getCodonFrame(jalview.datamodel.SequenceI)
1427 public List<AlignedCodonFrame> getCodonFrame(SequenceI seq)
1433 List<AlignedCodonFrame> cframes = new ArrayList<>();
1434 for (AlignedCodonFrame acf : getCodonFrames())
1436 if (acf.involvesSequence(seq))
1445 * Sets the codon frame mappings (replacing any existing mappings). Note the
1446 * mappings are set on the dataset alignment instead if there is one.
1448 * @see jalview.datamodel.AlignmentI#setCodonFrames()
1451 public void setCodonFrames(List<AlignedCodonFrame> acfs)
1453 if (dataset != null)
1455 dataset.setCodonFrames(acfs);
1459 this.codonFrameList = acfs;
1464 * Returns the set of codon frame mappings. Any changes to the returned set
1465 * will affect the alignment. The mappings are held on (and read from) the
1466 * dataset alignment if there is one.
1468 * @see jalview.datamodel.AlignmentI#getCodonFrames()
1471 public List<AlignedCodonFrame> getCodonFrames()
1473 // TODO: Fix this method to fix failing AlignedCodonFrame tests
1474 // this behaviour is currently incorrect. method should return codon frames
1475 // for just the alignment,
1476 // selected from dataset
1477 return dataset != null ? dataset.getCodonFrames() : codonFrameList;
1481 * Removes the given mapping from the stored set. Note that the mappings are
1482 * held on the dataset alignment if there is one.
1485 public boolean removeCodonFrame(AlignedCodonFrame codons)
1487 List<AlignedCodonFrame> acfs = getCodonFrames();
1488 if (codons == null || acfs == null)
1492 return acfs.remove(codons);
1496 public void append(AlignmentI toappend)
1498 // TODO JAL-1270 needs test coverage
1499 // currently tested for use in jalview.gui.SequenceFetcher
1500 boolean samegap = toappend.getGapCharacter() == getGapCharacter();
1501 char oldc = toappend.getGapCharacter();
1502 boolean hashidden = toappend.getHiddenSequences() != null
1503 && toappend.getHiddenSequences().hiddenSequences != null;
1504 // get all sequences including any hidden ones
1505 List<SequenceI> sqs = (hashidden)
1506 ? toappend.getHiddenSequences().getFullAlignment()
1508 : toappend.getSequences();
1511 // avoid self append deadlock by
1512 List<SequenceI> toappendsq = new ArrayList<>();
1515 for (SequenceI addedsq : sqs)
1519 char[] oldseq = addedsq.getSequence();
1520 for (int c = 0; c < oldseq.length; c++)
1522 if (oldseq[c] == oldc)
1524 oldseq[c] = gapCharacter;
1528 toappendsq.add(addedsq);
1531 for (SequenceI addedsq : toappendsq)
1533 addSequence(addedsq);
1536 AlignmentAnnotation[] alan = toappend.getAlignmentAnnotation();
1537 for (int a = 0; alan != null && a < alan.length; a++)
1539 addAnnotation(alan[a]);
1543 getCodonFrames().addAll(toappend.getCodonFrames());
1545 List<SequenceGroup> sg = toappend.getGroups();
1548 for (SequenceGroup _sg : sg)
1553 if (toappend.getHiddenSequences() != null)
1555 HiddenSequences hs = toappend.getHiddenSequences();
1556 if (hiddenSequences == null)
1558 hiddenSequences = new HiddenSequences(this);
1560 if (hs.hiddenSequences != null)
1562 for (int s = 0; s < hs.hiddenSequences.length; s++)
1564 // hide the newly appended sequence in the alignment
1565 if (hs.hiddenSequences[s] != null)
1567 hiddenSequences.hideSequence(hs.hiddenSequences[s]);
1572 if (toappend.getProperties() != null)
1574 // we really can't do very much here - just try to concatenate strings
1575 // where property collisions occur.
1576 Enumeration key = toappend.getProperties().keys();
1577 while (key.hasMoreElements())
1579 Object k = key.nextElement();
1580 Object ourval = this.getProperty(k);
1581 Object toapprop = toappend.getProperty(k);
1584 if (ourval.getClass().equals(toapprop.getClass())
1585 && !ourval.equals(toapprop))
1587 if (ourval instanceof String)
1591 ((String) ourval) + "; " + ((String) toapprop));
1595 if (ourval instanceof Vector)
1598 Enumeration theirv = ((Vector) toapprop).elements();
1599 while (theirv.hasMoreElements())
1601 ((Vector) ourval).addElement(theirv);
1609 // just add new property directly
1610 setProperty(k, toapprop);
1618 public AlignmentAnnotation findOrCreateAnnotation(String name,
1619 String calcId, boolean autoCalc, SequenceI seqRef,
1620 SequenceGroup groupRef)
1622 if (annotations != null)
1624 for (AlignmentAnnotation annot : getAlignmentAnnotation())
1626 if (annot.autoCalculated == autoCalc && (name.equals(annot.label))
1627 && (calcId == null || annot.getCalcId().equals(calcId))
1628 && annot.sequenceRef == seqRef
1629 && annot.groupRef == groupRef)
1635 AlignmentAnnotation annot = new AlignmentAnnotation(name, name,
1636 new Annotation[1], 0f, 0f, AlignmentAnnotation.BAR_GRAPH);
1637 annot.hasText = false;
1638 annot.setCalcId(new String(calcId));
1639 annot.autoCalculated = autoCalc;
1642 annot.setSequenceRef(seqRef);
1644 annot.groupRef = groupRef;
1645 addAnnotation(annot);
1651 public Iterable<AlignmentAnnotation> findAnnotation(String calcId)
1653 List<AlignmentAnnotation> aa = new ArrayList<>();
1654 AlignmentAnnotation[] alignmentAnnotation = getAlignmentAnnotation();
1655 if (alignmentAnnotation != null)
1657 for (AlignmentAnnotation a : alignmentAnnotation)
1659 if (a.getCalcId() == calcId || (a.getCalcId() != null
1660 && calcId != null && a.getCalcId().equals(calcId)))
1670 public Iterable<AlignmentAnnotation> findAnnotations(SequenceI seq,
1671 String calcId, String label)
1673 ArrayList<AlignmentAnnotation> aa = new ArrayList<>();
1674 for (AlignmentAnnotation ann : getAlignmentAnnotation())
1676 if ((calcId == null || (ann.getCalcId() != null
1677 && ann.getCalcId().equals(calcId)))
1678 && (seq == null || (ann.sequenceRef != null
1679 && ann.sequenceRef == seq))
1681 || (ann.label != null && ann.label.equals(label))))
1690 public void moveSelectedSequencesByOne(SequenceGroup sg,
1691 Map<SequenceI, SequenceCollectionI> map, boolean up)
1693 synchronized (sequences)
1698 for (int i = 1, iSize = sequences.size(); i < iSize; i++)
1700 SequenceI seq = sequences.get(i);
1701 if (!sg.getSequences(map).contains(seq))
1706 SequenceI temp = sequences.get(i - 1);
1707 if (sg.getSequences(null).contains(temp))
1712 sequences.set(i, temp);
1713 sequences.set(i - 1, seq);
1718 for (int i = sequences.size() - 2; i > -1; i--)
1720 SequenceI seq = sequences.get(i);
1721 if (!sg.getSequences(map).contains(seq))
1726 SequenceI temp = sequences.get(i + 1);
1727 if (sg.getSequences(map).contains(temp))
1732 sequences.set(i, temp);
1733 sequences.set(i + 1, seq);
1741 public void validateAnnotation(AlignmentAnnotation alignmentAnnotation)
1743 alignmentAnnotation.validateRangeAndDisplay();
1744 if (isNucleotide() && alignmentAnnotation.isValidStruc())
1746 hasRNAStructure = true;
1750 private SequenceI seqrep = null;
1754 * @return the representative sequence for this group
1757 public SequenceI getSeqrep()
1763 * set the representative sequence for this group. Note - this affects the
1764 * interpretation of the Hidereps attribute.
1767 * the seqrep to set (null means no sequence representative)
1770 public void setSeqrep(SequenceI seqrep)
1772 this.seqrep = seqrep;
1777 * @return true if group has a sequence representative
1780 public boolean hasSeqrep()
1782 return seqrep != null;
1786 public int getEndRes()
1788 return getWidth() - 1;
1792 public int getStartRes()
1798 * In the case of AlignmentI - returns the dataset for the alignment, if set
1801 * @see jalview.datamodel.AnnotatedCollectionI#getContext()
1804 public AnnotatedCollectionI getContext()
1810 * Align this alignment like the given (mapped) one.
1813 public int alignAs(AlignmentI al)
1816 * Currently retains unmapped gaps (in introns), regaps mapped regions
1819 return alignAs(al, false, true);
1823 * Align this alignment 'the same as' the given one. Mapped sequences only are
1824 * realigned. If both of the same type (nucleotide/protein) then align both
1825 * identically. If this is nucleotide and the other is protein, make 3 gaps
1826 * for each gap in the protein sequences. If this is protein and the other is
1827 * nucleotide, insert a gap for each 3 gaps (or part thereof) between
1828 * nucleotide bases. If this is protein and the other is nucleotide, gaps
1829 * protein to match the relative ordering of codons in the nucleotide.
1831 * Parameters control whether gaps in exon (mapped) and intron (unmapped)
1832 * regions are preserved. Gaps that connect introns to exons are treated
1833 * conservatively, i.e. only preserved if both intron and exon gaps are
1834 * preserved. TODO: check caveats below where the implementation fails
1837 * - must have same dataset, and sequences in al must have equivalent
1838 * dataset sequence and start/end bounds under given mapping
1839 * @param preserveMappedGaps
1840 * if true, gaps within and between mapped codons are preserved
1841 * @param preserveUnmappedGaps
1842 * if true, gaps within and between unmapped codons are preserved
1845 public int alignAs(AlignmentI al, boolean preserveMappedGaps,
1846 boolean preserveUnmappedGaps)
1848 // TODO should this method signature be the one in the interface?
1849 // JBPComment - yes - neither flag is used, so should be deleted.
1850 boolean thisIsNucleotide = this.isNucleotide();
1851 boolean thatIsProtein = !al.isNucleotide();
1852 if (!thatIsProtein && !thisIsNucleotide)
1854 return AlignmentUtils.alignProteinAsDna(this, al);
1856 else if (thatIsProtein && thisIsNucleotide)
1858 return AlignmentUtils.alignCdsAsProtein(this, al);
1860 return AlignmentUtils.alignAs(this, al);
1864 * Returns the alignment in Fasta format. Behaviour of this method is not
1865 * guaranteed between versions.
1868 public String toString()
1870 return new FastaFile().print(getSequencesArray(), true);
1874 * Returns the set of distinct sequence names. No ordering is guaranteed.
1877 public Set<String> getSequenceNames()
1879 Set<String> names = new HashSet<>();
1880 for (SequenceI seq : getSequences())
1882 names.add(seq.getName());
1888 public boolean hasValidSequence()
1890 boolean hasValidSeq = false;
1891 for (SequenceI seq : getSequences())
1893 if ((seq.getEnd() - seq.getStart()) > 0)
1903 * Update any mappings to 'virtual' sequences to compatible real ones, if
1904 * present in the added sequences. Returns a count of mappings updated.
1910 public int realiseMappings(List<SequenceI> seqs)
1913 for (SequenceI seq : seqs)
1915 for (AlignedCodonFrame mapping : getCodonFrames())
1917 count += mapping.realiseWith(seq);
1924 * Returns the first AlignedCodonFrame that has a mapping between the given
1932 public AlignedCodonFrame getMapping(SequenceI mapFrom, SequenceI mapTo)
1934 for (AlignedCodonFrame acf : getCodonFrames())
1936 if (acf.getAaForDnaSeq(mapFrom) == mapTo)
1945 public void setHiddenColumns(HiddenColumns cols)
1951 * Returns all HMM consensus sequences. This will not return real sequences
1952 * with HMMs. If remove is set to true, the consensus sequences will be
1953 * removed from the alignment.
1955 @Override // TODO make this more efficient.
1956 public List<SequenceI> getHMMConsensusSequences(boolean remove)
1958 List<SequenceI> seqs = new ArrayList<>();
1960 int seqsRemoved = 0;
1961 boolean endReached = false;
1965 SequenceI seq = sequences.get(position);
1966 if (seq.isHMMConsensusSequence())
1970 sequences.remove(position);
1972 seq.setPreviousPosition(seqsRemoved + position - 1);
1985 if (position >= sequences.size())