2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.datamodel;
23 import java.util.ArrayList;
24 import java.util.Arrays;
25 import java.util.BitSet;
26 import java.util.Collection;
27 import java.util.Collections;
28 import java.util.Enumeration;
29 import java.util.HashSet;
30 import java.util.Hashtable;
31 import java.util.Iterator;
32 import java.util.List;
35 import java.util.Vector;
37 import jalview.analysis.AlignmentUtils;
38 import jalview.datamodel.AlignedCodonFrame.SequenceToSequenceMapping;
39 import jalview.io.FastaFile;
40 import jalview.util.Comparison;
41 import jalview.util.LinkedIdentityHashSet;
42 import jalview.util.MessageManager;
45 * Data structure to hold and manipulate a multiple sequence alignment
51 public class Alignment implements AlignmentI, AutoCloseable
53 private Alignment dataset;
55 private List<SequenceI> sequences;
57 protected List<SequenceGroup> groups;
59 protected char gapCharacter = '-';
61 private boolean nucleotide = true;
63 public boolean hasRNAStructure = false;
65 public AlignmentAnnotation[] annotations;
67 HiddenSequences hiddenSequences;
69 HiddenColumns hiddenCols;
71 public Hashtable alignmentProperties;
73 private List<AlignedCodonFrame> codonFrameList;
75 private void initAlignment(SequenceI[] seqs)
77 groups = Collections.synchronizedList(new ArrayList<SequenceGroup>());
78 hiddenSequences = new HiddenSequences(this);
79 hiddenCols = new HiddenColumns();
80 codonFrameList = new ArrayList<>();
82 nucleotide = Comparison.isNucleotide(seqs);
84 sequences = Collections.synchronizedList(new ArrayList<SequenceI>());
86 for (int i = 0; i < seqs.length; i++)
88 sequences.add(seqs[i]);
94 * Make a 'copy' alignment - sequences have new copies of features and
95 * annotations, but share the original dataset sequences.
97 public Alignment(AlignmentI al)
99 SequenceI[] seqs = al.getSequencesArray();
100 for (int i = 0; i < seqs.length; i++)
102 seqs[i] = new Sequence(seqs[i]);
108 * Share the same dataset sequence mappings (if any).
110 if (dataset == null && al.getDataset() == null)
112 this.setCodonFrames(al.getCodonFrames());
117 * Make an alignment from an array of Sequences.
121 public Alignment(SequenceI[] seqs)
127 * Make a new alignment from an array of SeqCigars
132 public Alignment(SeqCigar[] alseqs)
134 SequenceI[] seqs = SeqCigar.createAlignmentSequences(alseqs,
135 gapCharacter, new HiddenColumns(), null);
140 * Make a new alignment from an CigarArray JBPNote - can only do this when
141 * compactAlignment does not contain hidden regions. JBPNote - must also check
142 * that compactAlignment resolves to a set of SeqCigars - or construct them
145 * @param compactAlignment
148 public static AlignmentI createAlignment(CigarArray compactAlignment)
150 throw new Error(MessageManager
151 .getString("error.alignment_cigararray_not_implemented"));
152 // this(compactAlignment.refCigars);
156 public List<SequenceI> getSequences()
162 public List<SequenceI> getSequences(
163 Map<SequenceI, SequenceCollectionI> hiddenReps)
165 // TODO: in jalview 2.8 we don't do anything with hiddenreps - fix design to
171 public SequenceI[] getSequencesArray()
173 if (sequences == null)
177 synchronized (sequences)
179 return sequences.toArray(new SequenceI[sequences.size()]);
184 * Returns a map of lists of sequences keyed by sequence name.
189 public Map<String, List<SequenceI>> getSequencesByName()
191 return AlignmentUtils.getSequencesByName(this);
195 public SequenceI getSequenceAt(int i)
197 synchronized (sequences)
200 if (i > -1 && i < sequences.size())
202 return sequences.get(i);
210 public SequenceI getSequenceAtAbsoluteIndex(int i)
212 SequenceI seq = null;
213 if (getHiddenSequences().getSize() > 0)
215 seq = getHiddenSequences().getHiddenSequence(i);
218 // didn't find the sequence in the hidden sequences, get it from the
220 int index = getHiddenSequences().findIndexWithoutHiddenSeqs(i);
221 seq = getSequenceAt(index);
226 seq = getSequenceAt(i);
232 * Adds a sequence to the alignment. Recalculates maxLength and size. Note
233 * this currently does not recalculate whether or not the alignment is
234 * nucleotide, so mixed alignments may have undefined behaviour.
239 public void addSequence(SequenceI snew)
244 // maintain dataset integrity
245 SequenceI dsseq = snew.getDatasetSequence();
248 // derive new sequence
249 SequenceI adding = snew.deriveSequence();
251 dsseq = snew.getDatasetSequence();
253 if (getDataset().findIndex(dsseq) == -1)
255 getDataset().addSequence(dsseq);
259 if (sequences == null)
261 initAlignment(new SequenceI[] { snew });
265 synchronized (sequences)
270 if (hiddenSequences != null)
272 hiddenSequences.adjustHeightSequenceAdded();
277 public SequenceI replaceSequenceAt(int i, SequenceI snew)
279 synchronized (sequences)
281 if (sequences.size() > i)
283 return sequences.set(i, snew);
289 hiddenSequences.adjustHeightSequenceAdded();
298 * @return DOCUMENT ME!
301 public List<SequenceGroup> getGroups()
309 if (getDataset() != null)
313 getDataset().removeAlignmentRef();
314 } catch (Throwable e)
324 * Defensively nulls out references in case this object is not garbage
327 void nullReferences()
333 hiddenSequences = null;
337 * decrement the alignmentRefs counter by one and null references if it goes
342 private void removeAlignmentRef() throws Throwable
344 if (--alignmentRefs == 0)
351 public void deleteSequence(SequenceI s)
353 synchronized (sequences)
355 deleteSequence(findIndex(s));
360 public void deleteSequence(int i)
362 synchronized (sequences)
364 if (i > -1 && i < getHeight())
367 hiddenSequences.adjustHeightSequenceDeleted(i);
373 public void deleteHiddenSequence(int i)
375 synchronized (sequences)
377 if (i > -1 && i < getHeight())
387 * @see jalview.datamodel.AlignmentI#findGroup(jalview.datamodel.SequenceI)
390 public SequenceGroup findGroup(SequenceI seq, int position)
392 synchronized (groups)
394 for (SequenceGroup sg : groups)
396 if (sg.getSequences(null).contains(seq))
398 if (position >= sg.getStartRes() && position <= sg.getEndRes())
412 * jalview.datamodel.AlignmentI#findAllGroups(jalview.datamodel.SequenceI)
415 public SequenceGroup[] findAllGroups(SequenceI s)
417 ArrayList<SequenceGroup> temp = new ArrayList<>();
419 synchronized (groups)
421 int gSize = groups.size();
422 for (int i = 0; i < gSize; i++)
424 SequenceGroup sg = groups.get(i);
425 if (sg == null || sg.getSequences() == null)
427 this.deleteGroup(sg);
432 if (sg.getSequences().contains(s))
438 SequenceGroup[] ret = new SequenceGroup[temp.size()];
439 return temp.toArray(ret);
444 public void addGroup(SequenceGroup sg)
446 synchronized (groups)
448 if (!groups.contains(sg))
450 if (hiddenSequences.getSize() > 0)
452 int i, iSize = sg.getSize();
453 for (i = 0; i < iSize; i++)
455 if (!sequences.contains(sg.getSequenceAt(i)))
457 sg.deleteSequence(sg.getSequenceAt(i), false);
463 if (sg.getSize() < 1)
468 sg.setContext(this, true);
475 * remove any annotation that references gp
478 * (if null, removes all group associated annotation)
480 private void removeAnnotationForGroup(SequenceGroup gp)
482 if (annotations == null || annotations.length == 0)
486 // remove annotation very quickly
487 AlignmentAnnotation[] t,
488 todelete = new AlignmentAnnotation[annotations.length],
489 tokeep = new AlignmentAnnotation[annotations.length];
493 for (i = 0, p = 0, k = 0; i < annotations.length; i++)
495 if (annotations[i].groupRef != null)
497 todelete[p++] = annotations[i];
501 tokeep[k++] = annotations[i];
507 for (i = 0, p = 0, k = 0; i < annotations.length; i++)
509 if (annotations[i].groupRef == gp)
511 todelete[p++] = annotations[i];
515 tokeep[k++] = annotations[i];
521 // clear out the group associated annotation.
522 for (i = 0; i < p; i++)
524 unhookAnnotation(todelete[i]);
527 t = new AlignmentAnnotation[k];
528 for (i = 0; i < k; i++)
537 public void deleteAllGroups()
539 synchronized (groups)
541 if (annotations != null)
543 removeAnnotationForGroup(null);
545 for (SequenceGroup sg : groups)
547 sg.setContext(null, false);
555 public void deleteGroup(SequenceGroup g)
557 synchronized (groups)
559 if (groups.contains(g))
561 removeAnnotationForGroup(g);
563 g.setContext(null, false);
570 public SequenceI findName(String name)
572 return findName(name, false);
578 * @see jalview.datamodel.AlignmentI#findName(java.lang.String, boolean)
581 public SequenceI findName(String token, boolean b)
583 return findName(null, token, b);
589 * @see jalview.datamodel.AlignmentI#findName(SequenceI, java.lang.String,
593 public SequenceI findName(SequenceI startAfter, String token, boolean b)
598 String sqname = null;
599 int nseq = sequences.size();
600 if (startAfter != null)
602 // try to find the sequence in the alignment
603 boolean matched = false;
606 if (getSequenceAt(i++) == startAfter)
619 sq = getSequenceAt(i);
620 sqname = sq.getName();
621 if (sqname.equals(token) // exact match
622 || (b && // allow imperfect matches - case varies
623 (sqname.equalsIgnoreCase(token))))
625 return getSequenceAt(i);
635 public SequenceI[] findSequenceMatch(String name)
637 Vector matches = new Vector();
640 while (i < sequences.size())
642 if (getSequenceAt(i).getName().equals(name))
644 matches.addElement(getSequenceAt(i));
649 SequenceI[] result = new SequenceI[matches.size()];
650 for (i = 0; i < result.length; i++)
652 result[i] = (SequenceI) matches.elementAt(i);
662 * @see jalview.datamodel.AlignmentI#findIndex(jalview.datamodel.SequenceI)
665 public int findIndex(SequenceI s)
669 while (i < sequences.size())
671 if (s == getSequenceAt(i))
686 * jalview.datamodel.AlignmentI#findIndex(jalview.datamodel.SearchResults)
689 public int findIndex(SearchResultsI results)
693 while (i < sequences.size())
695 if (results.involvesSequence(getSequenceAt(i)))
705 public int getHeight()
707 return sequences.size();
711 public int getAbsoluteHeight()
713 return sequences.size() + getHiddenSequences().getSize();
717 public int getWidth()
721 for (int i = 0; i < sequences.size(); i++)
723 maxLength = Math.max(maxLength, getSequenceAt(i).getLength());
729 public int getVisibleWidth()
732 if (hiddenCols != null)
734 w -= hiddenCols.getSize();
746 public void setGapCharacter(char gc)
749 synchronized (sequences)
751 for (SequenceI seq : sequences)
753 seq.setSequence(seq.getSequenceAsString().replace('.', gc)
754 .replace('-', gc).replace(' ', gc));
762 * @return DOCUMENT ME!
765 public char getGapCharacter()
773 * @see jalview.datamodel.AlignmentI#isAligned()
776 public boolean isAligned()
778 return isAligned(false);
784 * @see jalview.datamodel.AlignmentI#isAligned(boolean)
787 public boolean isAligned(boolean includeHidden)
789 int width = getWidth();
790 if (hiddenSequences == null || hiddenSequences.getSize() == 0)
792 includeHidden = true; // no hidden sequences to check against.
794 for (int i = 0; i < sequences.size(); i++)
796 if (includeHidden || !hiddenSequences.isHidden(getSequenceAt(i)))
798 if (getSequenceAt(i).getLength() != width)
809 public boolean isHidden(int alignmentIndex)
811 return (getHiddenSequences().getHiddenSequence(alignmentIndex) != null);
815 * Delete all annotations, including auto-calculated if the flag is set true.
816 * Returns true if at least one annotation was deleted, else false.
818 * @param includingAutoCalculated
822 public boolean deleteAllAnnotations(boolean includingAutoCalculated)
824 boolean result = false;
825 for (AlignmentAnnotation alan : getAlignmentAnnotation())
827 if (!alan.autoCalculated || includingAutoCalculated)
829 deleteAnnotation(alan);
839 * @seejalview.datamodel.AlignmentI#deleteAnnotation(jalview.datamodel.
840 * AlignmentAnnotation)
843 public boolean deleteAnnotation(AlignmentAnnotation aa)
845 return deleteAnnotation(aa, true);
849 public boolean deleteAnnotation(AlignmentAnnotation aa, boolean unhook)
853 if (annotations != null)
855 aSize = annotations.length;
863 AlignmentAnnotation[] temp = new AlignmentAnnotation[aSize - 1];
865 boolean swap = false;
868 for (int i = 0; i < aSize; i++)
870 if (annotations[i] == aa)
875 if (tIndex < temp.length)
877 temp[tIndex++] = annotations[i];
886 unhookAnnotation(aa);
893 * remove any object references associated with this annotation
897 private void unhookAnnotation(AlignmentAnnotation aa)
899 if (aa.sequenceRef != null)
901 aa.sequenceRef.removeAlignmentAnnotation(aa);
903 if (aa.groupRef != null)
905 // probably need to do more here in the future (post 2.5.0)
913 * @seejalview.datamodel.AlignmentI#addAnnotation(jalview.datamodel.
914 * AlignmentAnnotation)
917 public void addAnnotation(AlignmentAnnotation aa)
919 addAnnotation(aa, -1);
925 * @seejalview.datamodel.AlignmentI#addAnnotation(jalview.datamodel.
926 * AlignmentAnnotation, int)
929 public void addAnnotation(AlignmentAnnotation aa, int pos)
931 if (aa.getRNAStruc() != null)
933 hasRNAStructure = true;
937 if (annotations != null)
939 aSize = annotations.length + 1;
942 AlignmentAnnotation[] temp = new AlignmentAnnotation[aSize];
944 if (pos == -1 || pos >= aSize)
946 temp[aSize - 1] = aa;
955 for (i = 0; i < (aSize - 1); i++, p++)
963 temp[p] = annotations[i];
972 public void setAnnotationIndex(AlignmentAnnotation aa, int index)
974 if (aa == null || annotations == null || annotations.length - 1 < index)
979 int aSize = annotations.length;
980 AlignmentAnnotation[] temp = new AlignmentAnnotation[aSize];
984 for (int i = 0; i < aSize; i++)
993 temp[i] = annotations[i];
997 temp[i] = annotations[i - 1];
1006 * returns all annotation on the alignment
1008 public AlignmentAnnotation[] getAlignmentAnnotation()
1014 public boolean isNucleotide()
1020 public boolean hasRNAStructure()
1022 // TODO can it happen that structure is removed from alignment?
1023 return hasRNAStructure;
1027 public void setDataset(AlignmentI data)
1029 if (dataset == null && data == null)
1031 createDatasetAlignment();
1033 else if (dataset == null && data != null)
1037 throw new IllegalArgumentException("Circular dataset reference");
1039 if (!(data instanceof Alignment))
1042 "Implementation Error: jalview.datamodel.Alignment does not yet support other implementations of AlignmentI as its dataset reference");
1044 dataset = (Alignment) data;
1045 for (int i = 0; i < getHeight(); i++)
1047 SequenceI currentSeq = getSequenceAt(i);
1048 SequenceI dsq = currentSeq.getDatasetSequence();
1051 dsq = currentSeq.createDatasetSequence();
1052 dataset.addSequence(dsq);
1056 while (dsq.getDatasetSequence() != null)
1058 dsq = dsq.getDatasetSequence();
1060 if (dataset.findIndex(dsq) == -1)
1062 dataset.addSequence(dsq);
1067 dataset.addAlignmentRef();
1071 * add dataset sequences to seq for currentSeq and any sequences it references
1073 private void resolveAndAddDatasetSeq(SequenceI currentSeq,
1074 Set<SequenceI> seqs, boolean createDatasetSequence)
1076 SequenceI alignedSeq = currentSeq;
1077 if (currentSeq.getDatasetSequence() != null)
1079 currentSeq = currentSeq.getDatasetSequence();
1083 if (createDatasetSequence)
1085 currentSeq = currentSeq.createDatasetSequence();
1089 List<SequenceI> toProcess = new ArrayList<>();
1090 toProcess.add(currentSeq);
1091 while (toProcess.size() > 0)
1094 SequenceI curDs = toProcess.remove(0);
1096 if (!seqs.add(curDs))
1100 // iterate over database references, making sure we add forward referenced
1102 if (curDs.getDBRefs() != null)
1104 for (DBRefEntry dbr : curDs.getDBRefs())
1106 if (dbr.getMap() != null && dbr.getMap().getTo() != null)
1108 if (dbr.getMap().getTo() == alignedSeq)
1111 * update mapping to be to the newly created dataset sequence
1113 dbr.getMap().setTo(currentSeq);
1115 if (dbr.getMap().getTo().getDatasetSequence() != null)
1117 throw new Error("Implementation error: Map.getTo() for dbref "
1118 + dbr + " from " + curDs.getName()
1119 + " is not a dataset sequence.");
1121 // we recurse to add all forward references to dataset sequences via
1123 toProcess.add(dbr.getMap().getTo());
1131 * Creates a new dataset for this alignment. Can only be done once - if
1132 * dataset is not null this will not be performed.
1134 public void createDatasetAlignment()
1136 if (dataset != null)
1140 // try to avoid using SequenceI.equals at this stage, it will be expensive
1141 Set<SequenceI> seqs = new LinkedIdentityHashSet<>();
1143 for (int i = 0; i < getHeight(); i++)
1145 SequenceI currentSeq = getSequenceAt(i);
1146 resolveAndAddDatasetSeq(currentSeq, seqs, true);
1149 // verify all mappings are in dataset
1150 for (AlignedCodonFrame cf : codonFrameList)
1152 for (SequenceToSequenceMapping ssm : cf.getMappings())
1154 if (!seqs.contains(ssm.getFromSeq()))
1156 resolveAndAddDatasetSeq(ssm.getFromSeq(), seqs, false);
1158 if (!seqs.contains(ssm.getMapping().getTo()))
1160 resolveAndAddDatasetSeq(ssm.getMapping().getTo(), seqs, false);
1164 // finally construct dataset
1165 dataset = new Alignment(seqs.toArray(new SequenceI[seqs.size()]));
1166 // move mappings to the dataset alignment
1167 dataset.codonFrameList = this.codonFrameList;
1168 this.codonFrameList = null;
1172 * reference count for number of alignments referencing this one.
1174 int alignmentRefs = 0;
1177 * increase reference count to this alignment.
1179 private void addAlignmentRef()
1185 public Alignment getDataset()
1191 public boolean padGaps()
1193 boolean modified = false;
1195 // Remove excess gaps from the end of alignment
1199 int nseq = sequences.size();
1200 for (int i = 0; i < nseq; i++)
1202 current = getSequenceAt(i);
1203 for (int j = current.getLength(); j > maxLength; j--)
1206 && !jalview.util.Comparison.isGap(current.getCharAt(j)))
1217 for (int i = 0; i < nseq; i++)
1219 current = getSequenceAt(i);
1220 cLength = current.getLength();
1222 if (cLength < maxLength)
1224 current.insertCharAt(cLength, maxLength - cLength, gapCharacter);
1227 else if (current.getLength() > maxLength)
1229 current.deleteChars(maxLength, current.getLength());
1236 * Justify the sequences to the left or right by deleting and inserting gaps
1237 * before the initial residue or after the terminal residue
1240 * true if alignment padded to right, false to justify to left
1241 * @return true if alignment was changed
1244 public boolean justify(boolean right)
1246 boolean modified = false;
1248 // Remove excess gaps from the end of alignment
1250 int ends[] = new int[sequences.size() * 2];
1252 for (int i = 0; i < sequences.size(); i++)
1254 current = getSequenceAt(i);
1255 // This should really be a sequence method
1256 ends[i * 2] = current.findIndex(current.getStart());
1257 ends[i * 2 + 1] = current
1258 .findIndex(current.getStart() + current.getLength());
1259 boolean hitres = false;
1260 for (int j = 0, rs = 0, ssiz = current.getLength(); j < ssiz; j++)
1262 if (!jalview.util.Comparison.isGap(current.getCharAt(j)))
1271 ends[i * 2 + 1] = j;
1272 if (j - ends[i * 2] > maxLength)
1274 maxLength = j - ends[i * 2];
1282 // now edit the flanking gaps to justify to either left or right
1283 int cLength, extent, diff;
1284 for (int i = 0; i < sequences.size(); i++)
1286 current = getSequenceAt(i);
1288 cLength = 1 + ends[i * 2 + 1] - ends[i * 2];
1289 diff = maxLength - cLength; // number of gaps to indent
1290 extent = current.getLength();
1294 if (extent > ends[i * 2 + 1])
1296 current.deleteChars(ends[i * 2 + 1] + 1, extent);
1299 if (ends[i * 2] > diff)
1301 current.deleteChars(0, ends[i * 2] - diff);
1306 if (ends[i * 2] < diff)
1308 current.insertCharAt(0, diff - ends[i * 2], gapCharacter);
1316 if (ends[i * 2] > 0)
1318 current.deleteChars(0, ends[i * 2]);
1320 ends[i * 2 + 1] -= ends[i * 2];
1321 extent -= ends[i * 2];
1323 if (extent > maxLength)
1325 current.deleteChars(maxLength + 1, extent);
1330 if (extent < maxLength)
1332 current.insertCharAt(extent, maxLength - extent, gapCharacter);
1342 public HiddenSequences getHiddenSequences()
1344 return hiddenSequences;
1348 public HiddenColumns getHiddenColumns()
1354 public CigarArray getCompactAlignment()
1356 synchronized (sequences)
1358 SeqCigar alseqs[] = new SeqCigar[sequences.size()];
1360 for (SequenceI seq : sequences)
1362 alseqs[i++] = new SeqCigar(seq);
1364 CigarArray cal = new CigarArray(alseqs);
1365 cal.addOperation(CigarArray.M, getWidth());
1371 public void setProperty(Object key, Object value)
1373 if (alignmentProperties == null)
1375 alignmentProperties = new Hashtable();
1378 alignmentProperties.put(key, value);
1382 public Object getProperty(Object key)
1384 if (alignmentProperties != null)
1386 return alignmentProperties.get(key);
1395 public Hashtable getProperties()
1397 return alignmentProperties;
1401 * Adds the given mapping to the stored set. Note this may be held on the
1402 * dataset alignment.
1405 public void addCodonFrame(AlignedCodonFrame codons)
1407 List<AlignedCodonFrame> acfs = getCodonFrames();
1408 if (codons != null && acfs != null && !acfs.contains(codons))
1418 * jalview.datamodel.AlignmentI#getCodonFrame(jalview.datamodel.SequenceI)
1421 public List<AlignedCodonFrame> getCodonFrame(SequenceI seq)
1427 List<AlignedCodonFrame> cframes = new ArrayList<>();
1428 for (AlignedCodonFrame acf : getCodonFrames())
1430 if (acf.involvesSequence(seq))
1439 * Sets the codon frame mappings (replacing any existing mappings). Note the
1440 * mappings are set on the dataset alignment instead if there is one.
1442 * @see jalview.datamodel.AlignmentI#setCodonFrames()
1445 public void setCodonFrames(List<AlignedCodonFrame> acfs)
1447 if (dataset != null)
1449 dataset.setCodonFrames(acfs);
1453 this.codonFrameList = acfs;
1458 * Returns the set of codon frame mappings. Any changes to the returned set
1459 * will affect the alignment. The mappings are held on (and read from) the
1460 * dataset alignment if there is one.
1462 * @see jalview.datamodel.AlignmentI#getCodonFrames()
1465 public List<AlignedCodonFrame> getCodonFrames()
1467 // TODO: Fix this method to fix failing AlignedCodonFrame tests
1468 // this behaviour is currently incorrect. method should return codon frames
1469 // for just the alignment,
1470 // selected from dataset
1471 return dataset != null ? dataset.getCodonFrames() : codonFrameList;
1475 * Removes the given mapping from the stored set. Note that the mappings are
1476 * held on the dataset alignment if there is one.
1479 public boolean removeCodonFrame(AlignedCodonFrame codons)
1481 List<AlignedCodonFrame> acfs = getCodonFrames();
1482 if (codons == null || acfs == null)
1486 return acfs.remove(codons);
1490 public void append(AlignmentI toappend)
1492 // TODO JAL-1270 needs test coverage
1493 // currently tested for use in jalview.gui.SequenceFetcher
1494 char oldc = toappend.getGapCharacter();
1495 boolean samegap = oldc == getGapCharacter();
1496 boolean hashidden = toappend.getHiddenSequences() != null
1497 && toappend.getHiddenSequences().hiddenSequences != null;
1498 // get all sequences including any hidden ones
1499 List<SequenceI> sqs = (hashidden)
1500 ? toappend.getHiddenSequences().getFullAlignment()
1502 : toappend.getSequences();
1505 // avoid self append deadlock by
1506 List<SequenceI> toappendsq = new ArrayList<>();
1509 for (SequenceI addedsq : sqs)
1513 addedsq.replace(oldc, gapCharacter);
1515 toappendsq.add(addedsq);
1518 for (SequenceI addedsq : toappendsq)
1520 addSequence(addedsq);
1523 AlignmentAnnotation[] alan = toappend.getAlignmentAnnotation();
1524 for (int a = 0; alan != null && a < alan.length; a++)
1526 addAnnotation(alan[a]);
1530 getCodonFrames().addAll(toappend.getCodonFrames());
1532 List<SequenceGroup> sg = toappend.getGroups();
1535 for (SequenceGroup _sg : sg)
1540 if (toappend.getHiddenSequences() != null)
1542 HiddenSequences hs = toappend.getHiddenSequences();
1543 if (hiddenSequences == null)
1545 hiddenSequences = new HiddenSequences(this);
1547 if (hs.hiddenSequences != null)
1549 for (int s = 0; s < hs.hiddenSequences.length; s++)
1551 // hide the newly appended sequence in the alignment
1552 if (hs.hiddenSequences[s] != null)
1554 hiddenSequences.hideSequence(hs.hiddenSequences[s]);
1559 if (toappend.getProperties() != null)
1561 // we really can't do very much here - just try to concatenate strings
1562 // where property collisions occur.
1563 Enumeration key = toappend.getProperties().keys();
1564 while (key.hasMoreElements())
1566 Object k = key.nextElement();
1567 Object ourval = this.getProperty(k);
1568 Object toapprop = toappend.getProperty(k);
1571 if (ourval.getClass().equals(toapprop.getClass())
1572 && !ourval.equals(toapprop))
1574 if (ourval instanceof String)
1578 ((String) ourval) + "; " + ((String) toapprop));
1582 if (ourval instanceof Vector)
1585 Enumeration theirv = ((Vector) toapprop).elements();
1586 while (theirv.hasMoreElements())
1588 ((Vector) ourval).addElement(theirv);
1596 // just add new property directly
1597 setProperty(k, toapprop);
1605 public AlignmentAnnotation findOrCreateAnnotation(String name,
1606 String calcId, boolean autoCalc, SequenceI seqRef,
1607 SequenceGroup groupRef)
1609 if (annotations != null)
1611 for (AlignmentAnnotation annot : getAlignmentAnnotation())
1613 if (annot.autoCalculated == autoCalc && (name.equals(annot.label))
1614 && (calcId == null || annot.getCalcId().equals(calcId))
1615 && annot.sequenceRef == seqRef
1616 && annot.groupRef == groupRef)
1622 AlignmentAnnotation annot = new AlignmentAnnotation(name, name,
1623 new Annotation[1], 0f, 0f, AlignmentAnnotation.BAR_GRAPH);
1624 annot.hasText = false;
1627 annot.setCalcId(new String(calcId));
1629 annot.autoCalculated = autoCalc;
1632 annot.setSequenceRef(seqRef);
1634 annot.groupRef = groupRef;
1635 addAnnotation(annot);
1641 public Iterable<AlignmentAnnotation> findAnnotation(String calcId)
1643 AlignmentAnnotation[] alignmentAnnotation = getAlignmentAnnotation();
1644 if (alignmentAnnotation != null)
1646 return AlignmentAnnotation.findAnnotation(
1647 Arrays.asList(getAlignmentAnnotation()), calcId);
1649 return Arrays.asList(new AlignmentAnnotation[] {});
1653 public Iterable<AlignmentAnnotation> findAnnotations(SequenceI seq,
1654 String calcId, String label)
1656 return annotations == null ? null
1657 : AlignmentAnnotation.findAnnotations(
1658 Arrays.asList(getAlignmentAnnotation()), seq, calcId,
1663 public void moveSelectedSequencesByOne(SequenceGroup sg,
1664 Map<SequenceI, SequenceCollectionI> map, boolean up)
1666 synchronized (sequences)
1671 for (int i = 1, iSize = sequences.size(); i < iSize; i++)
1673 SequenceI seq = sequences.get(i);
1674 if (!sg.getSequences(map).contains(seq))
1679 SequenceI temp = sequences.get(i - 1);
1680 if (sg.getSequences(null).contains(temp))
1685 sequences.set(i, temp);
1686 sequences.set(i - 1, seq);
1691 for (int i = sequences.size() - 2; i > -1; i--)
1693 SequenceI seq = sequences.get(i);
1694 if (!sg.getSequences(map).contains(seq))
1699 SequenceI temp = sequences.get(i + 1);
1700 if (sg.getSequences(map).contains(temp))
1705 sequences.set(i, temp);
1706 sequences.set(i + 1, seq);
1714 public void validateAnnotation(AlignmentAnnotation alignmentAnnotation)
1716 alignmentAnnotation.validateRangeAndDisplay();
1717 if (isNucleotide() && alignmentAnnotation.isValidStruc())
1719 hasRNAStructure = true;
1723 private SequenceI seqrep = null;
1727 * @return the representative sequence for this group
1730 public SequenceI getSeqrep()
1736 * set the representative sequence for this group. Note - this affects the
1737 * interpretation of the Hidereps attribute.
1740 * the seqrep to set (null means no sequence representative)
1743 public void setSeqrep(SequenceI seqrep)
1745 this.seqrep = seqrep;
1750 * @return true if group has a sequence representative
1753 public boolean hasSeqrep()
1755 return seqrep != null;
1759 public int getEndRes()
1761 return getWidth() - 1;
1765 public int getStartRes()
1771 * In the case of AlignmentI - returns the dataset for the alignment, if set
1774 * @see jalview.datamodel.AnnotatedCollectionI#getContext()
1777 public AnnotatedCollectionI getContext()
1783 * Align this alignment like the given (mapped) one.
1786 public int alignAs(AlignmentI al)
1789 * Currently retains unmapped gaps (in introns), regaps mapped regions
1792 return alignAs(al, false, true);
1796 * Align this alignment 'the same as' the given one. Mapped sequences only are
1797 * realigned. If both of the same type (nucleotide/protein) then align both
1798 * identically. If this is nucleotide and the other is protein, make 3 gaps
1799 * for each gap in the protein sequences. If this is protein and the other is
1800 * nucleotide, insert a gap for each 3 gaps (or part thereof) between
1801 * nucleotide bases. If this is protein and the other is nucleotide, gaps
1802 * protein to match the relative ordering of codons in the nucleotide.
1804 * Parameters control whether gaps in exon (mapped) and intron (unmapped)
1805 * regions are preserved. Gaps that connect introns to exons are treated
1806 * conservatively, i.e. only preserved if both intron and exon gaps are
1807 * preserved. TODO: check caveats below where the implementation fails
1810 * - must have same dataset, and sequences in al must have equivalent
1811 * dataset sequence and start/end bounds under given mapping
1812 * @param preserveMappedGaps
1813 * if true, gaps within and between mapped codons are preserved
1814 * @param preserveUnmappedGaps
1815 * if true, gaps within and between unmapped codons are preserved
1818 public int alignAs(AlignmentI al, boolean preserveMappedGaps,
1819 boolean preserveUnmappedGaps)
1821 // TODO should this method signature be the one in the interface?
1822 // JBPComment - yes - neither flag is used, so should be deleted.
1823 boolean thisIsNucleotide = this.isNucleotide();
1824 boolean thatIsProtein = !al.isNucleotide();
1825 if (!thatIsProtein && !thisIsNucleotide)
1827 return AlignmentUtils.alignProteinAsDna(this, al);
1829 else if (thatIsProtein && thisIsNucleotide)
1831 return AlignmentUtils.alignCdsAsProtein(this, al);
1833 return AlignmentUtils.alignAs(this, al);
1837 * Returns the alignment in Fasta format. Behaviour of this method is not
1838 * guaranteed between versions.
1841 public String toString()
1843 return new FastaFile().print(getSequencesArray(), true);
1847 * Returns the set of distinct sequence names. No ordering is guaranteed.
1850 public Set<String> getSequenceNames()
1852 Set<String> names = new HashSet<>();
1853 for (SequenceI seq : getSequences())
1855 names.add(seq.getName());
1861 public boolean hasValidSequence()
1863 boolean hasValidSeq = false;
1864 for (SequenceI seq : getSequences())
1866 if ((seq.getEnd() - seq.getStart()) > 0)
1876 * Update any mappings to 'virtual' sequences to compatible real ones, if
1877 * present in the added sequences. Returns a count of mappings updated.
1883 public int realiseMappings(List<SequenceI> seqs)
1886 for (SequenceI seq : seqs)
1888 for (AlignedCodonFrame mapping : getCodonFrames())
1890 count += mapping.realiseWith(seq);
1897 * Returns the first AlignedCodonFrame that has a mapping between the given
1905 public AlignedCodonFrame getMapping(SequenceI mapFrom, SequenceI mapTo)
1907 for (AlignedCodonFrame acf : getCodonFrames())
1909 if (acf.getAaForDnaSeq(mapFrom) == mapTo)
1918 public boolean setHiddenColumns(HiddenColumns cols)
1920 boolean changed = cols == null ? hiddenCols != null
1921 : !cols.equals(hiddenCols);
1927 public void setupJPredAlignment()
1929 SequenceI repseq = getSequenceAt(0);
1931 HiddenColumns cs = new HiddenColumns();
1932 cs.hideList(repseq.getInsertions());
1933 setHiddenColumns(cs);
1937 public HiddenColumns propagateInsertions(SequenceI profileseq,
1938 AlignmentView input)
1942 char gc = getGapCharacter();
1943 Object[] alandhidden = input.getAlignmentAndHiddenColumns(gc);
1944 HiddenColumns nview = (HiddenColumns) alandhidden[1];
1945 SequenceI origseq = ((SequenceI[]) alandhidden[0])[profsqpos];
1946 return propagateInsertions(profileseq, origseq, nview);
1952 * sequence in al which corresponds to origseq
1954 * alignment which is to have gaps inserted into it
1956 * sequence corresponding to profileseq which defines gap map for
1959 private HiddenColumns propagateInsertions(SequenceI profileseq,
1960 SequenceI origseq, HiddenColumns hc)
1962 // take the set of hidden columns, and the set of gaps in origseq,
1963 // and remove all the hidden gaps from hiddenColumns
1965 // first get the gaps as a Bitset
1966 // then calculate hidden ^ not(gap)
1967 BitSet gaps = origseq.gapBitset();
1970 // for each sequence in the alignment, except the profile sequence,
1971 // insert gaps corresponding to each hidden region but where each hidden
1972 // column region is shifted backwards by the number of preceding visible
1973 // gaps update hidden columns at the same time
1974 HiddenColumns newhidden = new HiddenColumns();
1976 int numGapsBefore = 0;
1977 int gapPosition = 0;
1978 Iterator<int[]> it = hc.iterator();
1979 while (it.hasNext())
1981 int[] region = it.next();
1983 // get region coordinates accounting for gaps
1984 // we can rely on gaps not being *in* hidden regions because we already
1986 while (gapPosition < region[0])
1989 if (gaps.get(gapPosition))
1995 int left = region[0] - numGapsBefore;
1996 int right = region[1] - numGapsBefore;
1998 newhidden.hideColumns(left, right);
1999 padGaps(left, right, profileseq);
2005 * Pad gaps in all sequences in alignment except profileseq
2008 * position of first gap to insert
2010 * position of last gap to insert
2012 * sequence not to pad
2014 private void padGaps(int left, int right, SequenceI profileseq)
2016 char gc = getGapCharacter();
2018 // make a string with number of gaps = length of hidden region
2019 StringBuilder sb = new StringBuilder();
2020 for (int g = 0; g < right - left + 1; g++)
2025 // loop over the sequences and pad with gaps where required
2026 for (int s = 0, ns = getHeight(); s < ns; s++)
2028 SequenceI sqobj = getSequenceAt(s);
2029 if ((sqobj != profileseq) && (sqobj.getLength() >= left))
2031 String sq = sqobj.getSequenceAsString();
2033 sq.substring(0, left) + sb.toString() + sq.substring(left));
2039 //// Contact Matrix Holder Boilerplate
2041 ContactMapHolder cmholder = new ContactMapHolder();
2044 public Collection<ContactMatrixI> getContactMaps()
2046 return cmholder.getContactMaps();
2050 public ContactMatrixI getContactMatrixFor(AlignmentAnnotation _aa)
2052 ContactMatrixI cm = cmholder.getContactMatrixFor(_aa);
2053 if (cm == null && _aa.groupRef != null)
2055 cm = _aa.groupRef.getContactMatrixFor(_aa);
2057 if (cm == null && _aa.sequenceRef != null)
2059 cm = _aa.sequenceRef.getContactMatrixFor(_aa);
2062 // TODO fix up this logic and unify with getContactListFor
2063 cm = _aa.sequenceRef.getDatasetSequence().getContactMatrixFor(_aa);
2070 public ContactListI getContactListFor(AlignmentAnnotation _aa, int column)
2072 ContactListI cl = cmholder.getContactListFor(_aa, column);
2073 if (cl == null && _aa.groupRef != null)
2075 cl = _aa.groupRef.getContactListFor(_aa, column);
2077 if (cl == null && _aa.sequenceRef != null)
2079 if (_aa.annotations[column] != null)
2081 // sequence associated
2082 cl = _aa.sequenceRef.getContactListFor(_aa, column);
2083 if (cl == null && _aa.sequenceRef.getDatasetSequence() != null)
2085 int spos = _aa.sequenceRef.findPosition(column);
2086 if (spos >= _aa.sequenceRef.getStart()
2087 && spos <= 1 + _aa.sequenceRef.getEnd())
2089 cl = _aa.sequenceRef.getDatasetSequence().getContactListFor(_aa,
2090 spos - _aa.sequenceRef.getStart());
2099 public AlignmentAnnotation addContactList(ContactMatrixI cm)
2101 AlignmentAnnotation aa = cmholder.addContactList(cm);
2103 Annotation _aa[] = new Annotation[getWidth()];
2104 for (int i = 0; i < _aa.length; _aa[i++] = new Annotation(0.0f))
2108 aa.annotations = _aa;
2114 public void addContactListFor(AlignmentAnnotation annotation,
2117 cmholder.addContactListFor(annotation, cm);