JAL-4019 Added tests for new Nucleotide sequence detection
authorBen Soares <b.soares@dundee.ac.uk>
Sun, 13 Nov 2022 00:54:09 +0000 (00:54 +0000)
committerBen Soares <b.soares@dundee.ac.uk>
Sun, 13 Nov 2022 00:54:09 +0000 (00:54 +0000)
test/jalview/analysis/CrossRefTest.java
test/jalview/datamodel/SequenceTest.java
test/jalview/schemes/ResiduePropertiesTest.java
test/jalview/util/ComparisonTest.java
test/jalview/util/comparisonTestProps.jvprops [new file with mode: 0644]

index 2970e3d..e693a7c 100644 (file)
@@ -28,6 +28,16 @@ import static org.testng.AssertJUnit.assertNull;
 import static org.testng.AssertJUnit.assertSame;
 import static org.testng.AssertJUnit.assertTrue;
 
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import org.testng.annotations.AfterClass;
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.Test;
+
+import jalview.bin.Cache;
 import jalview.datamodel.AlignedCodonFrame;
 import jalview.datamodel.AlignedCodonFrame.SequenceToSequenceMapping;
 import jalview.datamodel.Alignment;
@@ -42,15 +52,6 @@ import jalview.util.DBRefUtils;
 import jalview.util.MapList;
 import jalview.ws.SequenceFetcher;
 import jalview.ws.SequenceFetcherFactory;
-import jalview.ws.params.InvalidArgumentException;
-
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-
-import org.testng.annotations.AfterClass;
-import org.testng.annotations.BeforeClass;
-import org.testng.annotations.Test;
 
 public class CrossRefTest
 {
@@ -62,6 +63,12 @@ public class CrossRefTest
     JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION);
   }
 
+  @BeforeMethod(alwaysRun = true)
+  public void loadProperties()
+  {
+    Cache.loadProperties("test/jalview/util/comparisonTestProps.jvprops");
+  }
+
   @Test(groups = { "Functional" })
   public void testFindXDbRefs()
   {
@@ -187,6 +194,7 @@ public class CrossRefTest
     AlignmentI al = new Alignment(new SequenceI[] { emblSeq, uniprotSeq });
     Alignment xrefs = new CrossRef(new SequenceI[] { emblSeq }, al)
             .findXrefSequences("UNIPROT", true);
+    System.err.println("xrefs=" + xrefs);
     assertEquals(1, xrefs.getHeight());
     assertSame(uniprotSeq, xrefs.getSequenceAt(0));
   }
index 6e18324..f9c8acf 100644 (file)
@@ -20,8 +20,6 @@
  */
 package jalview.datamodel;
 
-import java.util.Locale;
-
 import static org.testng.AssertJUnit.assertEquals;
 import static org.testng.AssertJUnit.assertFalse;
 import static org.testng.AssertJUnit.assertNotNull;
@@ -30,20 +28,13 @@ import static org.testng.AssertJUnit.assertNull;
 import static org.testng.AssertJUnit.assertSame;
 import static org.testng.AssertJUnit.assertTrue;
 
-import jalview.analysis.AlignmentGenerator;
-import jalview.commands.EditCommand;
-import jalview.commands.EditCommand.Action;
-import jalview.datamodel.PDBEntry.Type;
-import jalview.gui.JvOptionPane;
-import jalview.util.MapList;
-import jalview.ws.params.InvalidArgumentException;
-
 import java.io.File;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.BitSet;
 import java.util.Iterator;
 import java.util.List;
+import java.util.Locale;
 import java.util.Vector;
 
 import org.testng.Assert;
@@ -51,6 +42,13 @@ import org.testng.annotations.BeforeClass;
 import org.testng.annotations.BeforeMethod;
 import org.testng.annotations.Test;
 
+import jalview.analysis.AlignmentGenerator;
+import jalview.bin.Cache;
+import jalview.commands.EditCommand;
+import jalview.commands.EditCommand.Action;
+import jalview.datamodel.PDBEntry.Type;
+import jalview.gui.JvOptionPane;
+import jalview.util.MapList;
 import junit.extensions.PA;
 
 public class SequenceTest
@@ -62,6 +60,12 @@ public class SequenceTest
     JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION);
   }
 
+  @BeforeMethod(alwaysRun = true)
+  public void loadProperties()
+  {
+    Cache.loadProperties("test/jalview/util/comparisonTestProps.jvprops");
+  }
+
   Sequence seq;
 
   @BeforeMethod(alwaysRun = true)
@@ -123,10 +127,13 @@ public class SequenceTest
     assertTrue(new Sequence("prot", "ASDFASDFASDFXXXXXXXXX").isProtein());
     // test DNA with X
     assertFalse(new Sequence("prot", "ACGTACGTACGTXXXXXXXX").isProtein());
+    // short sequence is nucleotide only if 50% is nucleotide and remaining N/X
+    // is either N or X only
+    assertTrue(new Sequence("prot", "ACGTACGTACGTXN").isProtein());
     // test DNA with N
     assertFalse(new Sequence("prot", "ACGTACGTACGTNNNNNNNN").isProtein());
     // test RNA with X
-    assertFalse(new Sequence("prot", "ACGUACGUACGUXXXXXXXXX").isProtein());
+    assertFalse(new Sequence("prot", "ACGUACGUACGUACTGACAXX").isProtein());
     assertFalse(new Sequence("prot", "ACGUACGUACGUNNNNNNNNN").isProtein());
   }
 
index 180deaf..28f39d8 100644 (file)
@@ -23,8 +23,6 @@ package jalview.schemes;
 import static org.testng.AssertJUnit.assertEquals;
 import static org.testng.AssertJUnit.assertNull;
 
-import jalview.gui.JvOptionPane;
-
 import java.util.Collections;
 import java.util.List;
 import java.util.Map;
@@ -32,6 +30,8 @@ import java.util.Map;
 import org.testng.annotations.BeforeClass;
 import org.testng.annotations.Test;
 
+import jalview.gui.JvOptionPane;
+
 public class ResiduePropertiesTest
 {
 
@@ -222,7 +222,8 @@ public class ResiduePropertiesTest
      */
     residues = ResidueProperties.getResidues(true, true);
     Collections.sort(residues);
-    assertEquals("[A, C, G, I, N, R, T, U, X, Y]", residues.toString());
+    assertEquals("[A, B, C, D, G, H, I, K, M, N, R, S, T, U, V, W, X, Y]",
+            residues.toString());
   }
 
   @Test(groups = { "Functional" })
index cef9ffc..99c097f 100644 (file)
@@ -24,13 +24,15 @@ import static org.testng.AssertJUnit.assertEquals;
 import static org.testng.AssertJUnit.assertFalse;
 import static org.testng.AssertJUnit.assertTrue;
 
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.Test;
+
+import jalview.bin.Cache;
 import jalview.datamodel.Sequence;
 import jalview.datamodel.SequenceI;
 import jalview.gui.JvOptionPane;
 
-import org.testng.annotations.BeforeClass;
-import org.testng.annotations.Test;
-
 public class ComparisonTest
 {
 
@@ -41,6 +43,12 @@ public class ComparisonTest
     JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION);
   }
 
+  @BeforeMethod(alwaysRun = true)
+  public void loadProperties()
+  {
+    Cache.loadProperties("test/jalview/util/comparisonTestProps.jvprops");
+  }
+
   @Test(groups = { "Functional" })
   public void testIsGap()
   {
@@ -60,24 +68,41 @@ public class ComparisonTest
   @Test(groups = { "Functional" })
   public void testIsNucleotide_sequences()
   {
-    SequenceI seq = new Sequence("eightypercent", "agctuAGCPV");
+    SequenceI seq = new Sequence("eightypercent+fivepercent", "agctuagcPV");
     assertFalse(Comparison.isNucleotide(new SequenceI[] { seq }));
     assertFalse(
             Comparison.isNucleotide(new SequenceI[][]
             { new SequenceI[] { seq } }));
 
-    seq = new Sequence("eightyfivepercent", "agctuAGCPVagctuAGCUV");
+    seq = new Sequence("eightyfivepercent+tenpercent",
+            "agctuagcgVagctuagcuVE");
+    assertFalse(Comparison.isNucleotide(new SequenceI[] { seq }));
+
+    seq = new Sequence(">nineyfivepercent+0percent",
+            "aagctuagcgEagctuagcua");
+    assertFalse(Comparison.isNucleotide(new SequenceI[] { seq }));
+
+    seq = new Sequence("nineyfivepercent+0percent", "agctuagcgEagctuagcua");
     assertFalse(Comparison.isNucleotide(new SequenceI[] { seq }));
 
-    seq = new Sequence("nineypercent", "agctuAGCgVagctuAGCUV");
+    seq = new Sequence("nineyfivepercent+fivepercent",
+            "agctuagcgWagctuagcua");
     assertTrue(Comparison.isNucleotide(new SequenceI[] { seq }));
 
+    seq = new Sequence("nineyfivepercent+tenpercent",
+            "agctuagcgEWWctuagcua");
+    assertFalse(Comparison.isNucleotide(new SequenceI[] { seq }));
+
+    seq = new Sequence("eightyfivepercent+fifteenpercent",
+            "agctuagcgWWWctuagcua");
+    assertFalse(Comparison.isNucleotide(new SequenceI[] { seq }));
+
     seq = new Sequence("eightyfivepercentgapped",
             "--agc--tuA--GCPV-a---gct-uA-GC---UV");
     assertFalse(Comparison.isNucleotide(new SequenceI[] { seq }));
 
-    seq = new Sequence("nineypercentgapped",
-            "ag--ct-u-A---GC---g----Vag--c---tuAGCUV");
+    seq = new Sequence("ninetyfivepercentgapped",
+            "ag--ct-u-a---gc---g----aag--c---tuagcuV");
     assertTrue(Comparison.isNucleotide(new SequenceI[] { seq }));
 
     seq = new Sequence("allgap", "---------");
@@ -108,8 +133,10 @@ public class ComparisonTest
                 new SequenceI[]
                 { seq, seq, seq, seq, seq2, seq2, null } }));
 
-    seq = new Sequence("ProteinThatLooksLikeDNA", "WYATGCCTGAgtcgt");
-    // 12/14 = 85.7%
+    String seqString = "aaatatatatgEcctgagtcgt";
+    seq = new Sequence("ShortProteinThatLooksLikeDNA", seqString);
+    assertFalse(Comparison.isNucleotide(new SequenceI[] { seq }));
+    seq = new Sequence("LongProteinThatLooksLikeDNA", seqString.repeat(10));
     assertTrue(Comparison.isNucleotide(new SequenceI[] { seq }));
 
     assertFalse(Comparison.isNucleotide((SequenceI[]) null));
@@ -165,6 +192,25 @@ public class ComparisonTest
     assertFalse(Comparison.isNucleotide('P'));
   }
 
+  @Test(groups = { "Functional" })
+  public void testIsNucleotideAmbiguity()
+  {
+    assertTrue(Comparison.isNucleotide('b', true));
+    assertTrue(Comparison.isNucleotide('B', true));
+    assertTrue(Comparison.isNucleotide('d', true));
+    assertTrue(Comparison.isNucleotide('V', true));
+    assertTrue(Comparison.isNucleotide('M', true));
+    assertTrue(Comparison.isNucleotide('s', true));
+    assertTrue(Comparison.isNucleotide('W', true));
+    assertTrue(Comparison.isNucleotide('x', true));
+    assertTrue(Comparison.isNucleotide('Y', true));
+    assertTrue(Comparison.isNucleotide('r', true));
+    assertTrue(Comparison.isNucleotide('i', true));
+    assertFalse(Comparison.isNucleotide('-', true));
+    assertFalse(Comparison.isNucleotide('n', true));
+    assertFalse(Comparison.isNucleotide('P', true));
+  }
+
   /**
    * Test the percentage identity calculation for two sequences
    */
@@ -206,6 +252,30 @@ public class ComparisonTest
     assertFalse(Comparison.isNucleotideSequence("aAgGcCtTuUx", false));
     assertTrue(Comparison.isNucleotideSequence("a A-g.GcCtTuU", true));
     assertFalse(Comparison.isNucleotideSequence("a A-g.GcCtTuU", false));
+    assertFalse(Comparison.isNucleotideSequence("gatactawgataca", false));
+    // including nucleotide ambiguity
+    assertTrue(
+            Comparison.isNucleotideSequence("gatacaWgataca", true, true));
+    assertFalse(
+            Comparison.isNucleotideSequence("gatacaEgataca", true, true));
+
+    // not quite all nucleotides and ambiguity codes
+    Sequence seq = new Sequence("Ambiguity DNA codes", "gatacagatacabve");
+    assertFalse(Comparison.isNucleotide(seq));
+    // all nucleotide and nucleotide ambiguity codes
+    seq = new Sequence("Ambiguity DNA codes", "gatacagatacabvt");
+    assertFalse(Comparison.isNucleotide(seq));
+    seq = new Sequence("Ambiguity DNA codes", "agatacabb");
+    assertFalse(Comparison.isNucleotide(seq));
+    // 55% nucleotide with only Xs or Ns
+    assertTrue(Comparison
+            .isNucleotide(new Sequence("dnaWithXs", "gatacaXXXX")));
+    assertTrue(Comparison
+            .isNucleotide(new Sequence("dnaWithXs", "gatacaNNNN")));
+    assertFalse(Comparison
+            .isNucleotide(new Sequence("dnaWithXs", "gatacXXXXX")));
+    assertFalse(Comparison
+            .isNucleotide(new Sequence("dnaWithXs", "gatacNNNNN")));
   }
 
   @Test(groups = { "Functional" })
@@ -219,4 +289,23 @@ public class ComparisonTest
     assertFalse(Comparison.isSameResidue('a', 'A', true));
     assertFalse(Comparison.isSameResidue('A', 'a', true));
   }
+
+  @Test(groups = { "Functional" })
+  public void testNucleotideProportion()
+  {
+    assertFalse(Comparison.myShortSequenceNucleotideProportionCount(2, 3));
+    assertTrue(Comparison.myShortSequenceNucleotideProportionCount(3, 3));
+    assertFalse(Comparison.myShortSequenceNucleotideProportionCount(2, 4));
+    assertTrue(Comparison.myShortSequenceNucleotideProportionCount(3, 4));
+    assertFalse(
+            Comparison.myShortSequenceNucleotideProportionCount(17, 20));
+    assertTrue(Comparison.myShortSequenceNucleotideProportionCount(18, 20));
+    assertFalse(
+            Comparison.myShortSequenceNucleotideProportionCount(38, 50));
+    assertTrue(Comparison.myShortSequenceNucleotideProportionCount(39, 50));
+    assertFalse(
+            Comparison.myShortSequenceNucleotideProportionCount(54, 100));
+    assertTrue(
+            Comparison.myShortSequenceNucleotideProportionCount(55, 100));
+  }
 }
diff --git a/test/jalview/util/comparisonTestProps.jvprops b/test/jalview/util/comparisonTestProps.jvprops
new file mode 100644 (file)
index 0000000..55f9303
--- /dev/null
@@ -0,0 +1,5 @@
+NUCLEOTIDE_AMBIGUITY_DETECTION=true
+NUCLEOTIDE_COUNT_PERCENT=55
+NUCLEOTIDE_COUNT_LONG_SEQUENCE_AMBIGUITY_PERCENT=95
+NUCLEOTIDE_COUNT_SHORT=100
+NUCLEOTIDE_COUNT_VERY_SHORT=4