From 9d465da4f21e403c20b835264929906b538c0c9b Mon Sep 17 00:00:00 2001 From: James Procter Date: Thu, 11 Jan 2024 14:44:32 +0000 Subject: [PATCH] JAL-4366 allow a 3di alignment to be loaded and shown as a split frame with existing AA alignment --- src/jalview/analysis/AlignmentUtils.java | 135 +++++++++++++++++++++++- src/jalview/gui/AlignViewport.java | 35 ++++-- test/jalview/analysis/AlignmentUtilsTests.java | 15 +++ 3 files changed, 172 insertions(+), 13 deletions(-) diff --git a/src/jalview/analysis/AlignmentUtils.java b/src/jalview/analysis/AlignmentUtils.java index be5133f..af3669f 100644 --- a/src/jalview/analysis/AlignmentUtils.java +++ b/src/jalview/analysis/AlignmentUtils.java @@ -1353,7 +1353,7 @@ public class AlignmentUtils */ public static boolean isMappable(AlignmentI al1, AlignmentI al2) { - if (al1 == null || al2 == null) + if (al1 == null || al2 == null || al1 == al2) { return false; } @@ -1363,7 +1363,12 @@ public class AlignmentUtils */ if (al1.isNucleotide() == al2.isNucleotide()) { - return false; + // or if protein - check if alternate coding + if (al1.isNucleotide()) + { + return false; + } + return check3diPeptideMapping(al1,al2); } AlignmentI dna = al1.isNucleotide() ? al1 : al2; AlignmentI protein = dna == al1 ? al2 : al1; @@ -1380,6 +1385,132 @@ public class AlignmentUtils } return false; } + public static boolean check3diPeptideMapping(AlignmentI al1, AlignmentI al2) + { + if (al1.getHeight()!=al2.getHeight()) + { return false; + } + int mappable=0; + for (SequenceI al1seq: al1.getSequences()) + { + boolean foundMappable = false; + for (SequenceI al2seq:al2.getSequences()) + { + if (canBuild3diMapping(al1seq,al2seq)) + { + foundMappable = true; + break; + } + } + if (foundMappable) + { + mappable++; + } + } + if (mappable == al1.getHeight()) + { + return true; + } + return false; + } + + /** + * exact name, start-end, and identical length non-gap sequences + * @param al1seq + * @param al2seq + * @return + */ + public static boolean canBuild3diMapping(SequenceI al1seq, + SequenceI al2seq) + { + if (!al1seq.getDisplayId(true) + .equalsIgnoreCase(al2seq.getDisplayId(true))) { + return false; + } + String s1 = AlignSeq + .extractGaps(jalview.util.Comparison.GapChars, + al1seq.getSequenceAsString()); + String s2 = AlignSeq.extractGaps( + jalview.util.Comparison.GapChars, + al2seq.getSequenceAsString()); + return s1.length()==s2.length(); + } + + public static boolean map3diPeptideToProteinAligment( + AlignmentI proteinAlignment, AlignmentI tdiAlignment) + { + if (proteinAlignment==null || tdiAlignment==null) + { + return false; + } + Set mappedDna = new HashSet<>(); + Set mappedProtein = new HashSet<>(); + + /* + * First pass - map sequences where cross-references exist. This include + * 1-to-many mappings to support, for example, variant cDNA. + */ + boolean mappingPerformed = mapProteinToTdiAlignment(proteinAlignment, + tdiAlignment, mappedDna, mappedProtein); + return mappingPerformed; + + + } + + + /** + * Make mappings between compatible sequences (ids are identical, length of seqs are identical). + * + * @param proteinAlignment + * @param tdiAlignment + * @param mappedTdiSeq + * a set of mapped DNA sequences (to add to) + * @param mappedProtein + * a set of mapped Protein sequences (to add to) + * @param xrefsOnly + * if true, only map sequences where xrefs exist + * @return + */ + protected static boolean mapProteinToTdiAlignment( + final AlignmentI proteinAlignment, final AlignmentI tdiAlignment, + Set mappedTdiSeq, Set mappedProtein) + { + boolean mappingExistsOrAdded = false; + List thisSeqs = proteinAlignment.getSequences(); + for (SequenceI aaSeq : thisSeqs) + { + boolean proteinMapped = false; + AlignedCodonFrame acf = new AlignedCodonFrame(); + + for (SequenceI cdnaSeq : tdiAlignment.getSequences()) + { + + if (mappingExists(proteinAlignment.getCodonFrames(), + aaSeq.getDatasetSequence(), cdnaSeq.getDatasetSequence())) + { + mappingExistsOrAdded = true; + } + else + { + if (canBuild3diMapping(aaSeq, cdnaSeq)) + { + MapList map = new MapList(new int[] { aaSeq.getStart(),aaSeq.getEnd()},new int[] { cdnaSeq.getStart(),cdnaSeq.getEnd()},1,1); + acf.addMap(cdnaSeq, aaSeq, map); + mappingExistsOrAdded = true; + proteinMapped = true; + mappedTdiSeq.add(cdnaSeq); + mappedProtein.add(aaSeq); + } + } + } + if (proteinMapped) + { + proteinAlignment.addCodonFrame(acf); + } + } + return mappingExistsOrAdded; + } + /** * Returns true if the dna sequence is mapped, or could be mapped, to the diff --git a/src/jalview/gui/AlignViewport.java b/src/jalview/gui/AlignViewport.java index a8bc815..d56764b 100644 --- a/src/jalview/gui/AlignViewport.java +++ b/src/jalview/gui/AlignViewport.java @@ -830,18 +830,31 @@ public class AlignViewport extends AlignmentViewport AlignmentI thisAlignment = newWindowOrSplitPane ? new Alignment(getAlignment()) : getAlignment(); - AlignmentI protein = al.isNucleotide() ? thisAlignment : al; - final AlignmentI cdna = al.isNucleotide() ? al : thisAlignment; - - /* - * Map sequences. At least one should get mapped as we have already passed - * the test for 'mappability'. Any mappings made will be added to the - * protein alignment. Note creating dataset sequences on the new alignment - * is a pre-requisite for building mappings. - */ + + // always create dataset for imported alignment before doing anything else.. al.setDataset(null); - AlignmentUtils.mapProteinAlignmentToCdna(protein, cdna); + + if (!al.isNucleotide() && !thisAlignment.isNucleotide()) + { + // link AA to 3di or other kind of 'alternative' 1:1 mapping alignment + AlignmentUtils.map3diPeptideToProteinAligment(thisAlignment,al); + } + else + { + // link CODON triplets to Protein + AlignmentI protein = al.isNucleotide() ? thisAlignment : al; + final AlignmentI cdna = al.isNucleotide() ? al : thisAlignment; + + /* + * Map sequences. At least one should get mapped as we have already passed + * the test for 'mappability'. Any mappings made will be added to the + * protein alignment. Note creating dataset sequences on the new alignment + * is a pre-requisite for building mappings. + */ + AlignmentUtils.mapProteinAlignmentToCdna(protein, cdna); + } + /* * Create the AlignFrame for the added alignment. If it is protein, mappings * are registered with StructureSelectionManager as a side-effect. @@ -876,7 +889,7 @@ public class AlignViewport extends AlignmentViewport if (newWindowOrSplitPane) { al.alignAs(thisAlignment); - protein = openSplitFrame(newAlignFrame, thisAlignment); + AlignmentI mapped = openSplitFrame(newAlignFrame, thisAlignment); } } diff --git a/test/jalview/analysis/AlignmentUtilsTests.java b/test/jalview/analysis/AlignmentUtilsTests.java index f017662..044f2ef 100644 --- a/test/jalview/analysis/AlignmentUtilsTests.java +++ b/test/jalview/analysis/AlignmentUtilsTests.java @@ -1438,8 +1438,12 @@ public class AlignmentUtilsTests { SequenceI dna1 = new Sequence("dna1", "cgCAGtgGT"); SequenceI aa1 = new Sequence("aa1", "RSG"); + SequenceI td1 = new Sequence("aa1", "QRV"); + SequenceI td2 = new Sequence("aa2", "QRV"); AlignmentI al1 = new Alignment(new SequenceI[] { dna1 }); AlignmentI al2 = new Alignment(new SequenceI[] { aa1 }); + AlignmentI al3 = new Alignment(new SequenceI[] { td1 }); + AlignmentI al4 = new Alignment(new SequenceI[] { td2 }); assertFalse(AlignmentUtils.isMappable(null, null)); assertFalse(AlignmentUtils.isMappable(al1, null)); @@ -1447,8 +1451,19 @@ public class AlignmentUtilsTests assertFalse(AlignmentUtils.isMappable(al1, al1)); assertFalse(AlignmentUtils.isMappable(al2, al2)); + assertTrue(AlignmentUtils.isMappable(al1, al2)); assertTrue(AlignmentUtils.isMappable(al2, al1)); + + + // test 3di/peptide mappability + assertFalse(AlignmentUtils.isMappable(al1, al3)); + assertFalse(AlignmentUtils.isMappable(al2, al4)); + assertFalse(AlignmentUtils.isMappable(al3, al4)); + + + assertTrue(AlignmentUtils.isMappable(al2, al3)); + assertTrue(AlignmentUtils.isMappable(al3, al2)); } /** -- 1.7.10.2