package jalview.datamodel;
import jalview.analysis.AlignmentUtils;
+import jalview.datamodel.AlignedCodonFrame.SequenceToSequenceMapping;
import jalview.io.FastaFile;
import jalview.util.Comparison;
import jalview.util.MessageManager;
}
/**
+ * add dataset sequences to seq for currentSeq and any sequences it references
+ */
+ private void resolveAndAddDatasetSeq(SequenceI currentSeq,
+ Set<SequenceI> seqs, boolean createDatasetSequence)
+ {
+ if (currentSeq.getDatasetSequence() != null)
+ {
+ currentSeq = currentSeq.getDatasetSequence();
+ }
+ else
+ {
+ if (createDatasetSequence)
+ {
+ currentSeq = currentSeq.createDatasetSequence();
+ }
+ }
+ if (seqs.contains(currentSeq))
+ {
+ return;
+ }
+ List<SequenceI> toProcess = new ArrayList<SequenceI>();
+ toProcess.add(currentSeq);
+ while (toProcess.size() > 0)
+ {
+ // use a queue ?
+ SequenceI curDs = toProcess.remove(0);
+ if (seqs.contains(curDs))
+ {
+ continue;
+ }
+ seqs.add(curDs);
+ // iterate over database references, making sure we add forward referenced
+ // sequences
+ if (curDs.getDBRefs() != null)
+ {
+ for (DBRefEntry dbr : curDs.getDBRefs())
+ {
+ if (dbr.getMap() != null && dbr.getMap().getTo() != null)
+ {
+ if (dbr.getMap().getTo().getDatasetSequence() != null)
+ {
+ throw new Error("Implementation error: Map.getTo() for dbref"
+ + dbr + " is not a dataset sequence.");
+ // TODO: if this happens, could also rewrite the reference to
+ // point to new dataset sequence
+ }
+ // we recurse to add all forward references to dataset sequences via
+ // DBRefs/etc
+ toProcess.add(dbr.getMap().getTo());
+ }
+ }
+ }
+ }
+ }
+
+ /**
* Creates a new dataset for this alignment. Can only be done once - if
* dataset is not null this will not be performed.
*/
{
return;
}
- SequenceI[] seqs = new SequenceI[getHeight()];
- SequenceI currentSeq;
+ // try to avoid using equals at this stage, it will be expensive
+ Set<SequenceI> seqs = new jalview.util.LinkedIdentityHashSet<SequenceI>();
+
for (int i = 0; i < getHeight(); i++)
{
- currentSeq = getSequenceAt(i);
- if (currentSeq.getDatasetSequence() != null)
- {
- seqs[i] = currentSeq.getDatasetSequence();
- }
- else
+ SequenceI currentSeq = getSequenceAt(i);
+ resolveAndAddDatasetSeq(currentSeq, seqs, true);
+ }
+
+ // verify all mappings are in dataset
+ for (AlignedCodonFrame cf : codonFrameList)
+ {
+ for (SequenceToSequenceMapping ssm : cf.getMappings())
{
- seqs[i] = currentSeq.createDatasetSequence();
+ if (!seqs.contains(ssm.getFromSeq()))
+ {
+ resolveAndAddDatasetSeq(ssm.getFromSeq(), seqs, false);
+ }
+ if (!seqs.contains(ssm.getMapping().getTo()))
+ {
+ resolveAndAddDatasetSeq(ssm.getMapping().getTo(), seqs, false);
+ }
}
}
-
- dataset = new Alignment(seqs);
+ // finally construct dataset
+ dataset = new Alignment(seqs.toArray(new SequenceI[seqs.size()]));
// move mappings to the dataset alignment
dataset.codonFrameList = this.codonFrameList;
this.codonFrameList = null;
assertTrue(ds.getCodonFrames().contains(acf));
}
+ /**
+ * tests the addition of *all* sequences referred to by a sequence being added
+ * to the dataset
+ */
+ @Test(groups = "Functional")
+ public void testCreateDatasetAlignmentWithMappedToSeqs()
+ {
+ // Alignment with two sequences, gapped.
+ SequenceI sq1 = new Sequence("sq1", "A--SDF");
+ SequenceI sq2 = new Sequence("sq2", "G--TRQ");
+
+ // cross-references to two more sequences.
+ DBRefEntry dbr = new DBRefEntry("SQ1", "", "sq3");
+ SequenceI sq3 = new Sequence("sq3", "VWANG");
+ dbr.setMap(new Mapping(sq3, new MapList(new int[] { 1, 4 }, new int[] {
+ 2, 5 }, 1, 1)));
+ sq1.addDBRef(dbr);
+
+ SequenceI sq4 = new Sequence("sq4", "ERKWI");
+ DBRefEntry dbr2 = new DBRefEntry("SQ2", "", "sq4");
+ dbr2.setMap(new Mapping(sq4, new MapList(new int[] { 1, 4 }, new int[] {
+ 2, 5 }, 1, 1)));
+ sq2.addDBRef(dbr2);
+ // and a 1:1 codonframe mapping between them.
+ AlignedCodonFrame alc = new AlignedCodonFrame();
+ alc.addMap(sq1, sq2, new MapList(new int[] { 1, 4 },
+ new int[] { 1, 4 }, 1, 1));
+
+ AlignmentI protein = new Alignment(new SequenceI[] { sq1, sq2 });
+
+ /*
+ * create the alignment dataset
+ * note this creates sequence datasets where missing
+ * as a side-effect (in this case, on seq2
+ */
+
+ // TODO promote this method to AlignmentI
+ ((Alignment) protein).createDatasetAlignment();
+
+ AlignmentI ds = protein.getDataset();
+
+ // should be 4 sequences in dataset - two materialised, and two propagated
+ // from dbref
+ assertEquals(4, ds.getHeight());
+ assertTrue(ds.getSequences().contains(sq1.getDatasetSequence()));
+ assertTrue(ds.getSequences().contains(sq2.getDatasetSequence()));
+ assertTrue(ds.getSequences().contains(sq3));
+ assertTrue(ds.getSequences().contains(sq4));
+ // Should have one codon frame mapping between sq1 and sq2 via dataset
+ // sequences
+ assertEquals(ds.getCodonFrame(sq1.getDatasetSequence()),
+ ds.getCodonFrame(sq2.getDatasetSequence()));
+ }
+
@Test(groups = "Functional")
public void testAddCodonFrame()
{