2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.datamodel;
23 import jalview.analysis.AlignmentUtils;
24 import jalview.datamodel.AlignedCodonFrame.SequenceToSequenceMapping;
25 import jalview.io.FastaFile;
26 import jalview.util.Comparison;
27 import jalview.util.LinkedIdentityHashSet;
28 import jalview.util.MessageManager;
30 import java.util.ArrayList;
31 import java.util.Arrays;
32 import java.util.BitSet;
33 import java.util.Collections;
34 import java.util.Enumeration;
35 import java.util.HashSet;
36 import java.util.Hashtable;
37 import java.util.Iterator;
38 import java.util.List;
41 import java.util.Vector;
44 * Data structure to hold and manipulate a multiple sequence alignment
48 public class Alignment implements AlignmentI
50 private static final SequenceGroup[] NO_GROUPS = new SequenceGroup[0];
52 private Alignment dataset;
54 private List<SequenceI> sequences;
56 protected List<SequenceGroup> groups;
58 protected char gapCharacter = '-';
60 private boolean nucleotide = true;
62 private List<AlignedCodonFrame> codonFrameList;
65 * persistent object to hold result of findAllGroups(SequenceI)
67 private List<SequenceGroup> groupsForSequence = new ArrayList<>();
69 public boolean hasRNAStructure = false;
71 public AlignmentAnnotation[] annotations;
73 HiddenSequences hiddenSequences;
75 HiddenColumns hiddenCols;
77 public Hashtable alignmentProperties;
79 private void initAlignment(SequenceI[] seqs)
81 groups = Collections.synchronizedList(new ArrayList<SequenceGroup>());
82 hiddenSequences = new HiddenSequences(this);
83 hiddenCols = new HiddenColumns();
84 codonFrameList = new ArrayList<>();
86 nucleotide = Comparison.isNucleotide(seqs);
88 sequences = Collections.synchronizedList(new ArrayList<SequenceI>());
90 for (int i = 0; i < seqs.length; i++)
92 sequences.add(seqs[i]);
98 * Make a 'copy' alignment - sequences have new copies of features and
99 * annotations, but share the original dataset sequences.
101 public Alignment(AlignmentI al)
103 SequenceI[] seqs = al.getSequencesArray();
104 for (int i = 0; i < seqs.length; i++)
106 seqs[i] = new Sequence(seqs[i]);
112 * Share the same dataset sequence mappings (if any).
114 if (dataset == null && al.getDataset() == null)
116 this.setCodonFrames(al.getCodonFrames());
121 * Make an alignment from an array of Sequences.
125 public Alignment(SequenceI[] seqs)
131 * Make a new alignment from an array of SeqCigars
135 public Alignment(SeqCigar[] alseqs)
137 SequenceI[] seqs = SeqCigar.createAlignmentSequences(alseqs,
138 gapCharacter, new HiddenColumns(), null);
143 * Make a new alignment from an CigarArray JBPNote - can only do this when
144 * compactAlignment does not contain hidden regions. JBPNote - must also check
145 * that compactAlignment resolves to a set of SeqCigars - or construct them
148 * @param compactAlignment
151 public static AlignmentI createAlignment(CigarArray compactAlignment)
153 throw new Error(MessageManager
154 .getString("error.alignment_cigararray_not_implemented"));
155 // this(compactAlignment.refCigars);
159 public List<SequenceI> getSequences()
165 public List<SequenceI> getSequences(
166 Map<SequenceI, SequenceCollectionI> hiddenReps)
168 // TODO: in jalview 2.8 we don't do anything with hiddenreps - fix design to
174 public SequenceI[] getSequencesArray()
176 if (sequences == null)
180 synchronized (sequences)
182 return sequences.toArray(new SequenceI[sequences.size()]);
187 * Returns a map of lists of sequences keyed by sequence name.
192 public Map<String, List<SequenceI>> getSequencesByName()
194 return AlignmentUtils.getSequencesByName(this);
198 public SequenceI getSequenceAt(int i)
200 synchronized (sequences)
203 if (i > -1 && i < sequences.size())
205 return sequences.get(i);
213 public SequenceI getSequenceAtAbsoluteIndex(int i)
215 SequenceI seq = null;
216 if (getHiddenSequences().getSize() > 0)
218 seq = getHiddenSequences().getHiddenSequence(i);
221 // didn't find the sequence in the hidden sequences, get it from the
223 int index = getHiddenSequences().findIndexWithoutHiddenSeqs(i);
224 seq = getSequenceAt(index);
229 seq = getSequenceAt(i);
235 * Adds a sequence to the alignment. Recalculates maxLength and size. Note
236 * this currently does not recalculate whether or not the alignment is
237 * nucleotide, so mixed alignments may have undefined behaviour.
242 public void addSequence(SequenceI snew)
247 // maintain dataset integrity
248 SequenceI dsseq = snew.getDatasetSequence();
251 // derive new sequence
252 SequenceI adding = snew.deriveSequence();
254 dsseq = snew.getDatasetSequence();
256 if (getDataset().findIndex(dsseq) == -1)
258 getDataset().addSequence(dsseq);
262 if (sequences == null)
264 initAlignment(new SequenceI[] { snew });
268 synchronized (sequences)
273 if (hiddenSequences != null)
275 hiddenSequences.adjustHeightSequenceAdded();
280 public SequenceI replaceSequenceAt(int i, SequenceI snew)
282 synchronized (sequences)
284 if (sequences.size() > i)
286 return sequences.set(i, snew);
292 hiddenSequences.adjustHeightSequenceAdded();
301 * @return DOCUMENT ME!
304 public List<SequenceGroup> getGroups()
310 public void finalize() throws Throwable
312 if (getDataset() != null)
314 getDataset().removeAlignmentRef();
322 * Defensively nulls out references in case this object is not garbage
325 void nullReferences()
331 hiddenSequences = null;
335 * decrement the alignmentRefs counter by one and null references if it goes
340 private void removeAlignmentRef() throws Throwable
342 if (--alignmentRefs == 0)
349 public void deleteSequence(SequenceI s)
351 synchronized (sequences)
353 deleteSequence(findIndex(s));
358 public void deleteSequence(int i)
360 synchronized (sequences)
362 if (i > -1 && i < getHeight())
365 hiddenSequences.adjustHeightSequenceDeleted(i);
371 public void deleteHiddenSequence(int i)
373 synchronized (sequences)
375 if (i > -1 && i < getHeight())
385 * @see jalview.datamodel.AlignmentI#findGroup(jalview.datamodel.SequenceI)
388 public SequenceGroup findGroup(SequenceI seq, int position)
390 synchronized (groups)
392 for (SequenceGroup sg : groups)
394 if (sg.getSequences(null).contains(seq))
396 if (position >= sg.getStartRes() && position <= sg.getEndRes())
410 * jalview.datamodel.AlignmentI#findAllGroups(jalview.datamodel.SequenceI)
413 public SequenceGroup[] findAllGroups(SequenceI s)
415 synchronized (groups)
417 int gSize = groups.size();
422 groupsForSequence.clear();
423 for (int i = 0; i < gSize; i++)
425 SequenceGroup sg = groups.get(i);
426 if (sg == null || sg.getSequences() == null)
428 this.deleteGroup(sg);
433 if (sg.getSequences().contains(s))
435 groupsForSequence.add(sg);
439 SequenceGroup[] ret = new SequenceGroup[groupsForSequence.size()];
440 return groupsForSequence.toArray(ret);
445 public void addGroup(SequenceGroup sg)
447 synchronized (groups)
449 if (!groups.contains(sg))
451 if (hiddenSequences.getSize() > 0)
453 int i, iSize = sg.getSize();
454 for (i = 0; i < iSize; i++)
456 if (!sequences.contains(sg.getSequenceAt(i)))
458 sg.deleteSequence(sg.getSequenceAt(i), false);
464 if (sg.getSize() < 1)
469 sg.setContext(this, true);
476 * remove any annotation that references gp
479 * (if null, removes all group associated annotation)
481 private void removeAnnotationForGroup(SequenceGroup gp)
483 if (annotations == null || annotations.length == 0)
487 // remove annotation very quickly
488 AlignmentAnnotation[] t,
489 todelete = new AlignmentAnnotation[annotations.length],
490 tokeep = new AlignmentAnnotation[annotations.length];
494 for (i = 0, p = 0, k = 0; i < annotations.length; i++)
496 if (annotations[i].groupRef != null)
498 todelete[p++] = annotations[i];
502 tokeep[k++] = annotations[i];
508 for (i = 0, p = 0, k = 0; i < annotations.length; i++)
510 if (annotations[i].groupRef == gp)
512 todelete[p++] = annotations[i];
516 tokeep[k++] = annotations[i];
522 // clear out the group associated annotation.
523 for (i = 0; i < p; i++)
525 unhookAnnotation(todelete[i]);
528 t = new AlignmentAnnotation[k];
529 for (i = 0; i < k; i++)
538 public void deleteAllGroups()
540 synchronized (groups)
542 if (annotations != null)
544 removeAnnotationForGroup(null);
546 for (SequenceGroup sg : groups)
548 sg.setContext(null, false);
556 public void deleteGroup(SequenceGroup g)
558 synchronized (groups)
560 if (groups.contains(g))
562 removeAnnotationForGroup(g);
564 g.setContext(null, false);
571 public SequenceI findName(String name)
573 return findName(name, false);
579 * @see jalview.datamodel.AlignmentI#findName(java.lang.String, boolean)
582 public SequenceI findName(String token, boolean b)
584 return findName(null, token, b);
590 * @see jalview.datamodel.AlignmentI#findName(SequenceI, java.lang.String,
594 public SequenceI findName(SequenceI startAfter, String token, boolean b)
599 String sqname = null;
600 int nseq = sequences.size();
601 if (startAfter != null)
603 // try to find the sequence in the alignment
604 boolean matched = false;
607 if (getSequenceAt(i++) == startAfter)
620 sq = getSequenceAt(i);
621 sqname = sq.getName();
622 if (sqname.equals(token) // exact match
623 || (b && // allow imperfect matches - case varies
624 (sqname.equalsIgnoreCase(token))))
626 return getSequenceAt(i);
636 public SequenceI[] findSequenceMatch(String name)
638 Vector matches = new Vector();
641 while (i < sequences.size())
643 if (getSequenceAt(i).getName().equals(name))
645 matches.addElement(getSequenceAt(i));
650 SequenceI[] result = new SequenceI[matches.size()];
651 for (i = 0; i < result.length; i++)
653 result[i] = (SequenceI) matches.elementAt(i);
663 * @see jalview.datamodel.AlignmentI#findIndex(jalview.datamodel.SequenceI)
666 public int findIndex(SequenceI s)
670 while (i < sequences.size())
672 if (s == getSequenceAt(i))
687 * jalview.datamodel.AlignmentI#findIndex(jalview.datamodel.SearchResults)
690 public int findIndex(SearchResultsI results)
694 while (i < sequences.size())
696 if (results.involvesSequence(getSequenceAt(i)))
706 public int getHeight()
708 return sequences.size();
712 public int getAbsoluteHeight()
714 return sequences.size() + getHiddenSequences().getSize();
718 public int getWidth()
722 for (int i = 0; i < sequences.size(); i++)
724 maxLength = Math.max(maxLength, getSequenceAt(i).getLength());
730 public int getVisibleWidth()
733 if (hiddenCols != null)
735 w -= hiddenCols.getSize();
747 public void setGapCharacter(char gc)
750 synchronized (sequences)
752 for (SequenceI seq : sequences)
754 seq.setSequence(seq.getSequenceAsString().replace('.', gc)
755 .replace('-', gc).replace(' ', gc));
763 * @return DOCUMENT ME!
766 public char getGapCharacter()
774 * @see jalview.datamodel.AlignmentI#isAligned()
777 public boolean isAligned()
779 return isAligned(false);
785 * @see jalview.datamodel.AlignmentI#isAligned(boolean)
788 public boolean isAligned(boolean includeHidden)
790 int width = getWidth();
791 if (hiddenSequences == null || hiddenSequences.getSize() == 0)
793 includeHidden = true; // no hidden sequences to check against.
795 for (int i = 0; i < sequences.size(); i++)
797 if (includeHidden || !hiddenSequences.isHidden(getSequenceAt(i)))
799 if (getSequenceAt(i).getLength() != width)
810 public boolean isHidden(int alignmentIndex)
812 return (getHiddenSequences().getHiddenSequence(alignmentIndex) != null);
816 * Delete all annotations, including auto-calculated if the flag is set true.
817 * Returns true if at least one annotation was deleted, else false.
819 * @param includingAutoCalculated
823 public boolean deleteAllAnnotations(boolean includingAutoCalculated)
825 boolean result = false;
826 for (AlignmentAnnotation alan : getAlignmentAnnotation())
828 if (!alan.autoCalculated || includingAutoCalculated)
830 deleteAnnotation(alan);
840 * @seejalview.datamodel.AlignmentI#deleteAnnotation(jalview.datamodel.
841 * AlignmentAnnotation)
844 public boolean deleteAnnotation(AlignmentAnnotation aa)
846 return deleteAnnotation(aa, true);
850 public boolean deleteAnnotation(AlignmentAnnotation aa, boolean unhook)
854 if (annotations != null)
856 aSize = annotations.length;
864 AlignmentAnnotation[] temp = new AlignmentAnnotation[aSize - 1];
866 boolean swap = false;
869 for (int i = 0; i < aSize; i++)
871 if (annotations[i] == aa)
876 if (tIndex < temp.length)
878 temp[tIndex++] = annotations[i];
887 unhookAnnotation(aa);
894 * remove any object references associated with this annotation
898 private void unhookAnnotation(AlignmentAnnotation aa)
900 if (aa.sequenceRef != null)
902 aa.sequenceRef.removeAlignmentAnnotation(aa);
904 if (aa.groupRef != null)
906 // probably need to do more here in the future (post 2.5.0)
914 * @seejalview.datamodel.AlignmentI#addAnnotation(jalview.datamodel.
915 * AlignmentAnnotation)
918 public void addAnnotation(AlignmentAnnotation aa)
920 addAnnotation(aa, -1);
926 * @seejalview.datamodel.AlignmentI#addAnnotation(jalview.datamodel.
927 * AlignmentAnnotation, int)
930 public void addAnnotation(AlignmentAnnotation aa, int pos)
932 if (aa.getRNAStruc() != null)
934 hasRNAStructure = true;
938 if (annotations != null)
940 aSize = annotations.length + 1;
943 AlignmentAnnotation[] temp = new AlignmentAnnotation[aSize];
945 if (pos == -1 || pos >= aSize)
947 temp[aSize - 1] = aa;
956 for (i = 0; i < (aSize - 1); i++, p++)
964 temp[p] = annotations[i];
973 public void setAnnotationIndex(AlignmentAnnotation aa, int index)
975 if (aa == null || annotations == null || annotations.length - 1 < index)
980 int aSize = annotations.length;
981 AlignmentAnnotation[] temp = new AlignmentAnnotation[aSize];
985 for (int i = 0; i < aSize; i++)
994 temp[i] = annotations[i];
998 temp[i] = annotations[i - 1];
1007 * returns all annotation on the alignment
1009 public AlignmentAnnotation[] getAlignmentAnnotation()
1015 public boolean isNucleotide()
1021 public boolean hasRNAStructure()
1023 // TODO can it happen that structure is removed from alignment?
1024 return hasRNAStructure;
1028 public void setDataset(AlignmentI data)
1030 if (dataset == null && data == null)
1032 createDatasetAlignment();
1034 else if (dataset == null && data != null)
1038 throw new IllegalArgumentException("Circular dataset reference");
1040 if (!(data instanceof Alignment))
1043 "Implementation Error: jalview.datamodel.Alignment does not yet support other implementations of AlignmentI as its dataset reference");
1045 dataset = (Alignment) data;
1046 for (int i = 0; i < getHeight(); i++)
1048 SequenceI currentSeq = getSequenceAt(i);
1049 SequenceI dsq = currentSeq.getDatasetSequence();
1052 dsq = currentSeq.createDatasetSequence();
1053 dataset.addSequence(dsq);
1057 while (dsq.getDatasetSequence() != null)
1059 dsq = dsq.getDatasetSequence();
1061 if (dataset.findIndex(dsq) == -1)
1063 dataset.addSequence(dsq);
1068 dataset.addAlignmentRef();
1072 * add dataset sequences to seq for currentSeq and any sequences it references
1074 private void resolveAndAddDatasetSeq(SequenceI currentSeq,
1075 Set<SequenceI> seqs, boolean createDatasetSequence)
1077 SequenceI alignedSeq = currentSeq;
1078 if (currentSeq.getDatasetSequence() != null)
1080 currentSeq = currentSeq.getDatasetSequence();
1084 if (createDatasetSequence)
1086 currentSeq = currentSeq.createDatasetSequence();
1090 List<SequenceI> toProcess = new ArrayList<>();
1091 toProcess.add(currentSeq);
1092 while (toProcess.size() > 0)
1095 SequenceI curDs = toProcess.remove(0);
1097 if (!seqs.add(curDs))
1101 // iterate over database references, making sure we add forward referenced
1103 if (curDs.getDBRefs() != null)
1105 for (DBRefEntry dbr : curDs.getDBRefs())
1107 if (dbr.getMap() != null && dbr.getMap().getTo() != null)
1109 if (dbr.getMap().getTo() == alignedSeq)
1112 * update mapping to be to the newly created dataset sequence
1114 dbr.getMap().setTo(currentSeq);
1116 if (dbr.getMap().getTo().getDatasetSequence() != null)
1118 throw new Error("Implementation error: Map.getTo() for dbref "
1119 + dbr + " from " + curDs.getName()
1120 + " is not a dataset sequence.");
1122 // we recurse to add all forward references to dataset sequences via
1124 toProcess.add(dbr.getMap().getTo());
1132 * Creates a new dataset for this alignment. Can only be done once - if
1133 * dataset is not null this will not be performed.
1135 public void createDatasetAlignment()
1137 if (dataset != null)
1141 // try to avoid using SequenceI.equals at this stage, it will be expensive
1142 Set<SequenceI> seqs = new LinkedIdentityHashSet<>();
1144 for (int i = 0; i < getHeight(); i++)
1146 SequenceI currentSeq = getSequenceAt(i);
1147 resolveAndAddDatasetSeq(currentSeq, seqs, true);
1150 // verify all mappings are in dataset
1151 for (AlignedCodonFrame cf : codonFrameList)
1153 for (SequenceToSequenceMapping ssm : cf.getMappings())
1155 if (!seqs.contains(ssm.getFromSeq()))
1157 resolveAndAddDatasetSeq(ssm.getFromSeq(), seqs, false);
1159 if (!seqs.contains(ssm.getMapping().getTo()))
1161 resolveAndAddDatasetSeq(ssm.getMapping().getTo(), seqs, false);
1165 // finally construct dataset
1166 dataset = new Alignment(seqs.toArray(new SequenceI[seqs.size()]));
1167 // move mappings to the dataset alignment
1168 dataset.codonFrameList = this.codonFrameList;
1169 this.codonFrameList = null;
1173 * reference count for number of alignments referencing this one.
1175 int alignmentRefs = 0;
1178 * increase reference count to this alignment.
1180 private void addAlignmentRef()
1186 public Alignment getDataset()
1192 public boolean padGaps()
1194 boolean modified = false;
1196 // Remove excess gaps from the end of alignment
1200 int nseq = sequences.size();
1201 for (int i = 0; i < nseq; i++)
1203 current = getSequenceAt(i);
1204 for (int j = current.getLength(); j > maxLength; j--)
1207 && !jalview.util.Comparison.isGap(current.getCharAt(j)))
1218 for (int i = 0; i < nseq; i++)
1220 current = getSequenceAt(i);
1221 cLength = current.getLength();
1223 if (cLength < maxLength)
1225 current.insertCharAt(cLength, maxLength - cLength, gapCharacter);
1228 else if (current.getLength() > maxLength)
1230 current.deleteChars(maxLength, current.getLength());
1237 * Justify the sequences to the left or right by deleting and inserting gaps
1238 * before the initial residue or after the terminal residue
1241 * true if alignment padded to right, false to justify to left
1242 * @return true if alignment was changed
1245 public boolean justify(boolean right)
1247 boolean modified = false;
1249 // Remove excess gaps from the end of alignment
1251 int ends[] = new int[sequences.size() * 2];
1253 for (int i = 0; i < sequences.size(); i++)
1255 current = getSequenceAt(i);
1256 // This should really be a sequence method
1257 ends[i * 2] = current.findIndex(current.getStart());
1258 ends[i * 2 + 1] = current
1259 .findIndex(current.getStart() + current.getLength());
1260 boolean hitres = false;
1261 for (int j = 0, rs = 0, ssiz = current.getLength(); j < ssiz; j++)
1263 if (!jalview.util.Comparison.isGap(current.getCharAt(j)))
1272 ends[i * 2 + 1] = j;
1273 if (j - ends[i * 2] > maxLength)
1275 maxLength = j - ends[i * 2];
1283 // now edit the flanking gaps to justify to either left or right
1284 int cLength, extent, diff;
1285 for (int i = 0; i < sequences.size(); i++)
1287 current = getSequenceAt(i);
1289 cLength = 1 + ends[i * 2 + 1] - ends[i * 2];
1290 diff = maxLength - cLength; // number of gaps to indent
1291 extent = current.getLength();
1295 if (extent > ends[i * 2 + 1])
1297 current.deleteChars(ends[i * 2 + 1] + 1, extent);
1300 if (ends[i * 2] > diff)
1302 current.deleteChars(0, ends[i * 2] - diff);
1307 if (ends[i * 2] < diff)
1309 current.insertCharAt(0, diff - ends[i * 2], gapCharacter);
1317 if (ends[i * 2] > 0)
1319 current.deleteChars(0, ends[i * 2]);
1321 ends[i * 2 + 1] -= ends[i * 2];
1322 extent -= ends[i * 2];
1324 if (extent > maxLength)
1326 current.deleteChars(maxLength + 1, extent);
1331 if (extent < maxLength)
1333 current.insertCharAt(extent, maxLength - extent, gapCharacter);
1343 public HiddenSequences getHiddenSequences()
1345 return hiddenSequences;
1349 public HiddenColumns getHiddenColumns()
1355 public CigarArray getCompactAlignment()
1357 synchronized (sequences)
1359 SeqCigar alseqs[] = new SeqCigar[sequences.size()];
1361 for (SequenceI seq : sequences)
1363 alseqs[i++] = new SeqCigar(seq);
1365 CigarArray cal = new CigarArray(alseqs);
1366 cal.addOperation(CigarArray.M, getWidth());
1372 public void setProperty(Object key, Object value)
1374 if (alignmentProperties == null)
1376 alignmentProperties = new Hashtable();
1379 alignmentProperties.put(key, value);
1383 public Object getProperty(Object key)
1385 if (alignmentProperties != null)
1387 return alignmentProperties.get(key);
1396 public Hashtable getProperties()
1398 return alignmentProperties;
1402 * Adds the given mapping to the stored set. Note this may be held on the
1403 * dataset alignment.
1406 public void addCodonFrame(AlignedCodonFrame codons)
1408 List<AlignedCodonFrame> acfs = getCodonFrames();
1409 if (codons != null && acfs != null && !acfs.contains(codons))
1419 * jalview.datamodel.AlignmentI#getCodonFrame(jalview.datamodel.SequenceI)
1422 public List<AlignedCodonFrame> getCodonFrame(SequenceI seq)
1428 List<AlignedCodonFrame> cframes = new ArrayList<>();
1429 for (AlignedCodonFrame acf : getCodonFrames())
1431 if (acf.involvesSequence(seq))
1440 * Sets the codon frame mappings (replacing any existing mappings). Note the
1441 * mappings are set on the dataset alignment instead if there is one.
1443 * @see jalview.datamodel.AlignmentI#setCodonFrames()
1446 public void setCodonFrames(List<AlignedCodonFrame> acfs)
1448 if (dataset != null)
1450 dataset.setCodonFrames(acfs);
1454 this.codonFrameList = acfs;
1459 * Returns the set of codon frame mappings. Any changes to the returned set
1460 * will affect the alignment. The mappings are held on (and read from) the
1461 * dataset alignment if there is one.
1463 * @see jalview.datamodel.AlignmentI#getCodonFrames()
1466 public List<AlignedCodonFrame> getCodonFrames()
1468 // TODO: Fix this method to fix failing AlignedCodonFrame tests
1469 // this behaviour is currently incorrect. method should return codon frames
1470 // for just the alignment,
1471 // selected from dataset
1472 return dataset != null ? dataset.getCodonFrames() : codonFrameList;
1476 * Removes the given mapping from the stored set. Note that the mappings are
1477 * held on the dataset alignment if there is one.
1480 public boolean removeCodonFrame(AlignedCodonFrame codons)
1482 List<AlignedCodonFrame> acfs = getCodonFrames();
1483 if (codons == null || acfs == null)
1487 return acfs.remove(codons);
1491 public void append(AlignmentI toappend)
1493 // TODO JAL-1270 needs test coverage
1494 // currently tested for use in jalview.gui.SequenceFetcher
1495 char oldc = toappend.getGapCharacter();
1496 boolean samegap = oldc == getGapCharacter();
1497 boolean hashidden = toappend.getHiddenSequences() != null
1498 && toappend.getHiddenSequences().hiddenSequences != null;
1499 // get all sequences including any hidden ones
1500 List<SequenceI> sqs = (hashidden)
1501 ? toappend.getHiddenSequences().getFullAlignment()
1503 : toappend.getSequences();
1506 // avoid self append deadlock by
1507 List<SequenceI> toappendsq = new ArrayList<>();
1510 for (SequenceI addedsq : sqs)
1514 addedsq.replace(oldc, gapCharacter);
1516 toappendsq.add(addedsq);
1519 for (SequenceI addedsq : toappendsq)
1521 addSequence(addedsq);
1524 AlignmentAnnotation[] alan = toappend.getAlignmentAnnotation();
1525 for (int a = 0; alan != null && a < alan.length; a++)
1527 addAnnotation(alan[a]);
1531 getCodonFrames().addAll(toappend.getCodonFrames());
1533 List<SequenceGroup> sg = toappend.getGroups();
1536 for (SequenceGroup _sg : sg)
1541 if (toappend.getHiddenSequences() != null)
1543 HiddenSequences hs = toappend.getHiddenSequences();
1544 if (hiddenSequences == null)
1546 hiddenSequences = new HiddenSequences(this);
1548 if (hs.hiddenSequences != null)
1550 for (int s = 0; s < hs.hiddenSequences.length; s++)
1552 // hide the newly appended sequence in the alignment
1553 if (hs.hiddenSequences[s] != null)
1555 hiddenSequences.hideSequence(hs.hiddenSequences[s]);
1560 if (toappend.getProperties() != null)
1562 // we really can't do very much here - just try to concatenate strings
1563 // where property collisions occur.
1564 Enumeration key = toappend.getProperties().keys();
1565 while (key.hasMoreElements())
1567 Object k = key.nextElement();
1568 Object ourval = this.getProperty(k);
1569 Object toapprop = toappend.getProperty(k);
1572 if (ourval.getClass().equals(toapprop.getClass())
1573 && !ourval.equals(toapprop))
1575 if (ourval instanceof String)
1579 ((String) ourval) + "; " + ((String) toapprop));
1583 if (ourval instanceof Vector)
1586 Enumeration theirv = ((Vector) toapprop).elements();
1587 while (theirv.hasMoreElements())
1589 ((Vector) ourval).addElement(theirv);
1597 // just add new property directly
1598 setProperty(k, toapprop);
1606 public AlignmentAnnotation findOrCreateAnnotation(String name,
1607 String calcId, boolean autoCalc, SequenceI seqRef,
1608 SequenceGroup groupRef)
1610 if (annotations != null)
1612 for (AlignmentAnnotation annot : getAlignmentAnnotation())
1614 if (annot.autoCalculated == autoCalc && (name.equals(annot.label))
1615 && (calcId == null || annot.getCalcId().equals(calcId))
1616 && annot.sequenceRef == seqRef
1617 && annot.groupRef == groupRef)
1623 AlignmentAnnotation annot = new AlignmentAnnotation(name, name,
1624 new Annotation[1], 0f, 0f, AlignmentAnnotation.BAR_GRAPH);
1625 annot.hasText = false;
1628 annot.setCalcId(new String(calcId));
1630 annot.autoCalculated = autoCalc;
1633 annot.setSequenceRef(seqRef);
1635 annot.groupRef = groupRef;
1636 addAnnotation(annot);
1642 public Iterable<AlignmentAnnotation> findAnnotation(String calcId)
1644 AlignmentAnnotation[] alignmentAnnotation = getAlignmentAnnotation();
1645 if (alignmentAnnotation != null)
1647 return AlignmentAnnotation.findAnnotation(
1648 Arrays.asList(getAlignmentAnnotation()), calcId);
1650 return Arrays.asList(new AlignmentAnnotation[] {});
1654 public Iterable<AlignmentAnnotation> findAnnotations(SequenceI seq,
1655 String calcId, String label)
1657 return AlignmentAnnotation.findAnnotations(
1658 Arrays.asList(getAlignmentAnnotation()), seq, calcId, label);
1662 public void moveSelectedSequencesByOne(SequenceGroup sg,
1663 Map<SequenceI, SequenceCollectionI> map, boolean up)
1665 synchronized (sequences)
1670 for (int i = 1, iSize = sequences.size(); i < iSize; i++)
1672 SequenceI seq = sequences.get(i);
1673 if (!sg.getSequences(map).contains(seq))
1678 SequenceI temp = sequences.get(i - 1);
1679 if (sg.getSequences(null).contains(temp))
1684 sequences.set(i, temp);
1685 sequences.set(i - 1, seq);
1690 for (int i = sequences.size() - 2; i > -1; i--)
1692 SequenceI seq = sequences.get(i);
1693 if (!sg.getSequences(map).contains(seq))
1698 SequenceI temp = sequences.get(i + 1);
1699 if (sg.getSequences(map).contains(temp))
1704 sequences.set(i, temp);
1705 sequences.set(i + 1, seq);
1713 public void validateAnnotation(AlignmentAnnotation alignmentAnnotation)
1715 alignmentAnnotation.validateRangeAndDisplay();
1716 if (isNucleotide() && alignmentAnnotation.isValidStruc())
1718 hasRNAStructure = true;
1722 private SequenceI seqrep = null;
1726 * @return the representative sequence for this group
1729 public SequenceI getSeqrep()
1735 * set the representative sequence for this group. Note - this affects the
1736 * interpretation of the Hidereps attribute.
1739 * the seqrep to set (null means no sequence representative)
1742 public void setSeqrep(SequenceI seqrep)
1744 this.seqrep = seqrep;
1749 * @return true if group has a sequence representative
1752 public boolean hasSeqrep()
1754 return seqrep != null;
1758 public int getEndRes()
1760 return getWidth() - 1;
1764 public int getStartRes()
1770 * In the case of AlignmentI - returns the dataset for the alignment, if set
1773 * @see jalview.datamodel.AnnotatedCollectionI#getContext()
1776 public AnnotatedCollectionI getContext()
1782 * Align this alignment like the given (mapped) one.
1785 public int alignAs(AlignmentI al)
1788 * Currently retains unmapped gaps (in introns), regaps mapped regions
1791 return alignAs(al, false, true);
1795 * Align this alignment 'the same as' the given one. Mapped sequences only are
1796 * realigned. If both of the same type (nucleotide/protein) then align both
1797 * identically. If this is nucleotide and the other is protein, make 3 gaps
1798 * for each gap in the protein sequences. If this is protein and the other is
1799 * nucleotide, insert a gap for each 3 gaps (or part thereof) between
1800 * nucleotide bases. If this is protein and the other is nucleotide, gaps
1801 * protein to match the relative ordering of codons in the nucleotide.
1803 * Parameters control whether gaps in exon (mapped) and intron (unmapped)
1804 * regions are preserved. Gaps that connect introns to exons are treated
1805 * conservatively, i.e. only preserved if both intron and exon gaps are
1806 * preserved. TODO: check caveats below where the implementation fails
1809 * - must have same dataset, and sequences in al must have equivalent
1810 * dataset sequence and start/end bounds under given mapping
1811 * @param preserveMappedGaps
1812 * if true, gaps within and between mapped codons are preserved
1813 * @param preserveUnmappedGaps
1814 * if true, gaps within and between unmapped codons are preserved
1817 public int alignAs(AlignmentI al, boolean preserveMappedGaps,
1818 boolean preserveUnmappedGaps)
1820 // TODO should this method signature be the one in the interface?
1821 // JBPComment - yes - neither flag is used, so should be deleted.
1822 boolean thisIsNucleotide = this.isNucleotide();
1823 boolean thatIsProtein = !al.isNucleotide();
1824 if (!thatIsProtein && !thisIsNucleotide)
1826 return AlignmentUtils.alignProteinAsDna(this, al);
1828 else if (thatIsProtein && thisIsNucleotide)
1830 return AlignmentUtils.alignCdsAsProtein(this, al);
1832 return AlignmentUtils.alignAs(this, al);
1836 * Returns the alignment in Fasta format. Behaviour of this method is not
1837 * guaranteed between versions.
1840 public String toString()
1842 return new FastaFile().print(getSequencesArray(), true);
1846 * Returns the set of distinct sequence names. No ordering is guaranteed.
1849 public Set<String> getSequenceNames()
1851 Set<String> names = new HashSet<>();
1852 for (SequenceI seq : getSequences())
1854 names.add(seq.getName());
1860 public boolean hasValidSequence()
1862 boolean hasValidSeq = false;
1863 for (SequenceI seq : getSequences())
1865 if ((seq.getEnd() - seq.getStart()) > 0)
1875 * Update any mappings to 'virtual' sequences to compatible real ones, if
1876 * present in the added sequences. Returns a count of mappings updated.
1882 public int realiseMappings(List<SequenceI> seqs)
1885 for (SequenceI seq : seqs)
1887 for (AlignedCodonFrame mapping : getCodonFrames())
1889 count += mapping.realiseWith(seq);
1896 * Returns the first AlignedCodonFrame that has a mapping between the given
1904 public AlignedCodonFrame getMapping(SequenceI mapFrom, SequenceI mapTo)
1906 for (AlignedCodonFrame acf : getCodonFrames())
1908 if (acf.getAaForDnaSeq(mapFrom) == mapTo)
1917 public boolean setHiddenColumns(HiddenColumns cols)
1919 boolean changed = cols == null ? hiddenCols != null
1920 : !cols.equals(hiddenCols);
1926 public void setupJPredAlignment()
1928 SequenceI repseq = getSequenceAt(0);
1930 HiddenColumns cs = new HiddenColumns();
1931 cs.hideList(repseq.getInsertions());
1932 setHiddenColumns(cs);
1936 public HiddenColumns propagateInsertions(SequenceI profileseq,
1937 AlignmentView input)
1941 char gc = getGapCharacter();
1942 Object[] alandhidden = input.getAlignmentAndHiddenColumns(gc);
1943 HiddenColumns nview = (HiddenColumns) alandhidden[1];
1944 SequenceI origseq = ((SequenceI[]) alandhidden[0])[profsqpos];
1945 return propagateInsertions(profileseq, origseq, nview);
1951 * sequence in al which corresponds to origseq
1953 * alignment which is to have gaps inserted into it
1955 * sequence corresponding to profileseq which defines gap map for
1958 private HiddenColumns propagateInsertions(SequenceI profileseq,
1959 SequenceI origseq, HiddenColumns hc)
1961 // take the set of hidden columns, and the set of gaps in origseq,
1962 // and remove all the hidden gaps from hiddenColumns
1964 // first get the gaps as a Bitset
1965 // then calculate hidden ^ not(gap)
1966 BitSet gaps = origseq.gapBitset();
1969 // for each sequence in the alignment, except the profile sequence,
1970 // insert gaps corresponding to each hidden region but where each hidden
1971 // column region is shifted backwards by the number of preceding visible
1972 // gaps update hidden columns at the same time
1973 HiddenColumns newhidden = new HiddenColumns();
1975 int numGapsBefore = 0;
1976 int gapPosition = 0;
1977 Iterator<int[]> it = hc.iterator();
1978 while (it.hasNext())
1980 int[] region = it.next();
1982 // get region coordinates accounting for gaps
1983 // we can rely on gaps not being *in* hidden regions because we already
1985 while (gapPosition < region[0])
1988 if (gaps.get(gapPosition))
1994 int left = region[0] - numGapsBefore;
1995 int right = region[1] - numGapsBefore;
1997 newhidden.hideColumns(left, right);
1998 padGaps(left, right, profileseq);
2004 * Pad gaps in all sequences in alignment except profileseq
2007 * position of first gap to insert
2009 * position of last gap to insert
2011 * sequence not to pad
2013 private void padGaps(int left, int right, SequenceI profileseq)
2015 char gc = getGapCharacter();
2017 // make a string with number of gaps = length of hidden region
2018 StringBuilder sb = new StringBuilder();
2019 for (int g = 0; g < right - left + 1; g++)
2024 // loop over the sequences and pad with gaps where required
2025 for (int s = 0, ns = getHeight(); s < ns; s++)
2027 SequenceI sqobj = getSequenceAt(s);
2028 if ((sqobj != profileseq) && (sqobj.getLength() >= left))
2030 String sq = sqobj.getSequenceAsString();
2032 sq.substring(0, left) + sb.toString() + sq.substring(left));