JAL-4366 fixed wrinkles in reconstructing peptide alignment using 3di alignment....
[jalview.git] / test / jalview / datamodel / AlignmentTest.java
index 1c27420..30c0de4 100644 (file)
@@ -38,9 +38,12 @@ import org.testng.annotations.BeforeMethod;
 import org.testng.annotations.Test;
 
 import jalview.analysis.AlignmentGenerator;
+import jalview.analysis.AlignmentUtils;
+import jalview.analysis.CrossRef;
 import jalview.datamodel.AlignedCodonFrame.SequenceToSequenceMapping;
 import jalview.gui.JvOptionPane;
 import jalview.io.DataSourceType;
+import jalview.io.FastaFile;
 import jalview.io.FileFormat;
 import jalview.io.FileFormatI;
 import jalview.io.FormatAdapter;
@@ -94,6 +97,28 @@ public class AlignmentTest
           "GCTCGUCGTACT\n" +
           ">Seq2Name/60-71\n" +
           "GGGTCAGGCAGT\n";
+  
+  private static final String AA_SEQS_2 = 
+          ">Seq1Name/5-8\n" +
+          "K-QY-L\n" +
+          ">Seq2Name/12-15\n" +
+          "-R-FPW\n";
+  private static final String AA_SEQS_2_DS = 
+          ">Seq1Name/5-8\n" +
+          "KQYL\n" +
+          ">Seq2Name/12-15\n" +
+          "RFPW\n";
+  private static final String TD_SEQS_2_DS = 
+          ">Seq1Name/5-8\n" +
+          "NMPR\n" +
+          ">Seq2Name/12-15\n" +
+          "VXYA\n";
+  private static final String TD_SEQS_2 = 
+          ">Seq1Name/5-8\n" +
+          "-NMP-R\n" +
+          ">Seq2Name/12-15\n" +
+          "VX--YA\n";
+
   // @formatter:on
 
   private AlignmentI al;
@@ -775,6 +800,59 @@ public class AlignmentTest
     assertEquals("-R-F-P-W", al2.getSequenceAt(1).getSequenceAsString());
   }
 
+  
+  /**
+   * Recover protein MSA from tdi msa
+   * 
+   * @throws IOException
+   */
+  @Test(groups = { "Functional" })
+  public void testAlignAs_prot_tdi() throws Exception
+  {
+    // see also AlignmentUtilsTests
+    AlignmentI al1 = loadAlignment(TD_SEQS_2, FileFormat.Fasta);
+    AlignmentI al2 = loadAlignment(AA_SEQS_2_DS, FileFormat.Fasta);
+    al1.setDataset(null);
+    al2.setDataset(al1.getDataset());
+    AlignmentI al1copy = new Alignment(al1);
+    AlignmentI al2copy = new Alignment(al2);
+    AlignmentUtils.map3diPeptideToProteinAligment(al2, al1);
+    if (al2.getCodonFrames().isEmpty()) {al2.getCodonFrames().addAll(al1.getCodonFrames()); }
+    else {al1.getCodonFrames().addAll(al2.getCodonFrames()); };
+    
+    ((Alignment) al2).alignAs(al1);
+    assertEquals("-NMP-R", al1.getSequenceAt(0).getSequenceAsString());
+    assertEquals("VX--YA", al1.getSequenceAt(1).getSequenceAsString());
+    assertEquals("-KQY-L", al2.getSequenceAt(0).getSequenceAsString());
+    assertEquals("RF--PW", al2.getSequenceAt(1).getSequenceAsString());
+    
+  }
+  /**
+   * Recover TdI MSA from protein msa
+   * 
+   * @throws IOException
+   */
+  @Test(groups = { "Functional" })
+  public void testAlignAs_tdi_prot() throws Exception
+  {
+    // see also AlignmentUtilsTests
+    AlignmentI al1 = loadAlignment(AA_SEQS_2, FileFormat.Fasta);
+    AlignmentI al2 = loadAlignment(TD_SEQS_2_DS, FileFormat.Fasta);
+    al1.setDataset(null);
+    al2.setDataset(al1.getDataset());
+    AlignmentI al1copy = new Alignment(al1);
+    AlignmentI al2copy = new Alignment(al2);
+    AlignmentUtils.map3diPeptideToProteinAligment(al1, al2);
+    if (al2.getCodonFrames().isEmpty()) {al2.getCodonFrames().addAll(al1.getCodonFrames()); }
+    else {al1.getCodonFrames().addAll(al2.getCodonFrames()); };
+    
+    ((Alignment) al2).alignAs(al1);
+    assertEquals("K-QY-L", al1.getSequenceAt(0).getSequenceAsString());
+    assertEquals("-R-FPW", al1.getSequenceAt(1).getSequenceAsString());
+    assertEquals("N-MP-R", al2.getSequenceAt(0).getSequenceAsString());
+    assertEquals("-V-XYA", al2.getSequenceAt(1).getSequenceAsString());
+    
+  }
   /**
    * Test aligning cdna as per protein alignment.
    * 
@@ -829,6 +907,59 @@ public class AlignmentTest
   }
 
   /**
+   * test mapping between a protein and 3di sequence alignment. Assumes 1:1
+   * @throws IOException
+   */
+  @Test(groups={"Functional"},enabled=true)
+  public void testAlignAs_3di() throws IOException
+  {
+    String protAl = ">1ji5_A\n"
+            + "-----------------------------DQPVLLLLLLQLLLLLVLLLQQLVVCLVQAD\n"
+            + "DPCNVVSNVVSVVSSVVSVVSNVVSQVVCVVVVHHHDDDVSSVVRYPQDHHDPP--DYPL\n"
+            + "RSLVSLLVSLVVVLVSLVVSLVSCVVVVNVVSNVSSVVVSVVSVVSNVVSCVVVVD----\n"
+            + "---------------------------------------------------\n"
+            + ">1jig_A\n"
+            + "---------------------------DALLVVLLLLLLQLLLALVLLLQQLVLCLVLAD\n"
+            + "DPCNVVSNVVSVVVSVVSVVSNVVSQVVCVVSVHHHDDDVSSVVRYPQDHDDSP--DYPL\n"
+            + "RSLVSLLVSLVVLLVSLVVSLVSCVVNVNPVSNVSSVVSSVVSVVSNVVSVVVND-----\n"
+            + "---------------------------------------------------\n"
+            + "\n";
+    String tdiAl = ">1ji5_A\n"
+            + "-----------------------------MNKQVIEVLNKQVADWSVLFTKLHNFHWYVK\n"
+            + "GPQFFTLHEKFEELYTESATHIDEIAERILAIGGKPVATKEYLEISSIQEAAYG--ETAE\n"
+            + "GMVEAIMKDYEMMLVELKKGMEIAQNSDDEMTSDLLLGIYTELEKHAWMLRAFLNQ----\n"
+            + "---------------------------------------------------\n"
+            + ">1jig_A\n"
+            + "---------------------------MSTKTNVVEVLNKQVANWNVLYVKLHNYHWYVT\n"
+            + "GPHFFTLHEKFEEFYNEAGTYIDELAERILALEGKPLATKEYLATSSVNEGTSK--ESAE\n"
+            + "EMVQTLVNDYSALIQELKEGMEVAGEAGDATSADMLLAIHTTLEQHVWMLSAFLK-----\n"
+            + "---------------------------------------------------\n" + "";
+    AlignmentI prot = loadAlignment(protAl, FileFormat.Fasta);
+    ((Alignment) prot).createDatasetAlignment();
+
+    AlignmentI tdi = loadAlignment(tdiAl, FileFormat.Fasta);
+    assertTrue(AlignmentUtils.map3diPeptideToProteinAligment(prot, tdi));
+
+    AlignmentI newProt = new Alignment(
+            new SequenceI[]
+            { prot.getSequenceAt(0).getSubSequence(25, 35),
+                prot.getSequenceAt(1).getSubSequence(35, 45) });
+    newProt.setDataset(prot.getDataset());
+
+    // TODO Find matching tdi sequence and construct alignment mirroring
+    // the protein alignment
+    // Alignment newTdi = new CrossRef(newProt.getSequencesArray(),
+    // newProt.getDataset()).findXrefSequences("", false);
+    //
+    // newTdi.alignAs(newProt);
+    //
+    // System.out.println("newProt - aa\n"+new
+    // FastaFile().print(newProt.getSequencesArray(), true));
+    // System.out.println("newProt - 3di\n"+new
+    // FastaFile().print(newTdi.getSequencesArray(), true));
+
+  }
+  /**
    * Helper method that makes mappings and then aligns the first alignment as
    * the second
    * 
@@ -857,7 +988,7 @@ public class AlignmentTest
 
   /**
    * Helper method to make mappings between sequences, and add the mappings to
-   * the 'mapped from' alignment
+   * the 'mapped from' alignment. If alFrom.isNucleotide() == alTo.isNucleotide() then ratio is always 1:1
    * 
    * @param alFrom
    * @param alTo