Merge branch 'alpha/JAL-3362_Jalview_212_alpha' into alpha/merge_212_JalviewJS_2112
[jalview.git] / test / jalview / io / StockholmFileTest.java
index a6ae630..bb822a0 100644 (file)
  */
 package jalview.io;
 
+import static org.testng.Assert.assertTrue;
 import static org.testng.AssertJUnit.assertEquals;
 import static org.testng.AssertJUnit.assertNotNull;
 import static org.testng.AssertJUnit.assertTrue;
 import static org.testng.AssertJUnit.fail;
 
-import jalview.datamodel.AlignmentAnnotation;
-import jalview.datamodel.AlignmentI;
-import jalview.datamodel.Annotation;
-import jalview.datamodel.SequenceFeature;
-import jalview.datamodel.SequenceI;
-import jalview.gui.JvOptionPane;
-
 import java.io.File;
 import java.util.Arrays;
 import java.util.BitSet;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
 
 import org.testng.Assert;
 import org.testng.annotations.BeforeClass;
 import org.testng.annotations.Test;
 
+import jalview.datamodel.Alignment;
+import jalview.datamodel.AlignmentAnnotation;
+import jalview.datamodel.AlignmentI;
+import jalview.datamodel.Annotation;
+import jalview.datamodel.DBRefEntry;
+import jalview.datamodel.Sequence;
+import jalview.datamodel.SequenceFeature;
+import jalview.datamodel.SequenceI;
+import jalview.gui.JvOptionPane;
+import jalview.util.DBRefUtils;
+
 public class StockholmFileTest
 {
 
@@ -91,14 +98,57 @@ public class StockholmFileTest
   }
 
   /**
+   * JAL-3529 - verify uniprot refs for sequences are output for sequences
+   * retrieved via Pfam
+   */
+  @Test(groups = { "Functional" })
+  public void dbrefOutput() throws Exception
+  {
+    // sequences retrieved in a Pfam domain alignment also have a PFAM database
+    // reference
+    SequenceI sq = new Sequence("FER2_SPIOL", "AASSDDDFFF");
+    sq.addDBRef(new DBRefEntry("UNIPROT", "1", "P00224"));
+    sq.addDBRef(new DBRefEntry("PFAM", "1", "P00224.1"));
+    sq.addDBRef(new DBRefEntry("PFAM", "1", "PF00111"));
+    AppletFormatAdapter af = new AppletFormatAdapter();
+    String toStockholm = af.formatSequences(FileFormat.Stockholm,
+            new Alignment(new SequenceI[]
+            { sq }), false);
+    System.out.println(toStockholm);
+    // bleh - java.util.Regex sucks
+    assertTrue(
+            Pattern.compile(
+                    "^#=GS\\s+FER2_SPIOL(/\\d+-\\d+)?\\s+AC\\s+P00224$",
+                    Pattern.MULTILINE).matcher(toStockholm)
+                    .find(),
+            "Couldn't locate UNIPROT Accession in generated Stockholm file.");
+    AlignmentI fromStockholm = af.readFile(toStockholm,
+            DataSourceType.PASTE, FileFormat.Stockholm);
+    SequenceI importedSeq = fromStockholm.getSequenceAt(0);
+    assertTrue(importedSeq.getDBRefs()
+            .size() == 1,
+            "Expected just one database reference to be added to sequence.");
+    assertTrue(
+            importedSeq.getDBRefs().get(0).getAccessionId().indexOf(
+                    " ") == -1,
+            "Spaces were found in accession ID.");
+    List<DBRefEntry> dbrefs = DBRefUtils.searchRefs(importedSeq.getDBRefs(),
+            "P00224");
+    assertTrue(dbrefs.size() == 1,
+            "Couldn't find Uniprot DBRef on re-imported sequence.");
+
+  }
+
+  /**
    * test alignment data in given file can be imported, exported and reimported
    * with no dataloss
    * 
    * @param f
-   *          - source datafile (IdentifyFile.identify() should work with it)
+   *                               - source datafile (IdentifyFile.identify()
+   *                               should work with it)
    * @param ioformat
-   *          - label for IO class used to write and read back in the data from
-   *          f
+   *                               - label for IO class used to write and read
+   *                               back in the data from f
    * @param ignoreFeatures
    * @param ignoreRowVisibility
    * @param allowNullAnnotations
@@ -255,8 +305,8 @@ public class StockholmFileTest
           assertEquals("Threshold line not identical.",
                   aa_original[i].threshold, aa_new[i].threshold);
           // graphGroup may differ, but pattern should be the same
-          Integer o_ggrp = new Integer(aa_original[i].graphGroup + 2);
-          Integer n_ggrp = new Integer(aa_new[i].graphGroup + 2);
+          Integer o_ggrp = Integer.valueOf(aa_original[i].graphGroup + 2);
+          Integer n_ggrp = Integer.valueOf(aa_new[i].graphGroup + 2);
           BitSet orig_g = orig_groups.get(o_ggrp);
           BitSet new_g = new_groups.get(n_ggrp);
           if (orig_g == null)
@@ -321,10 +371,11 @@ public class StockholmFileTest
                           || (seq_original[i].getSequenceFeatures() != null && seq_new[in]
                                   .getSequenceFeatures() != null));
           // compare sequence features
-          if (seq_original[i].getSequenceFeatures() != null
+          if (!ignoreFeatures
+                  && seq_original[i].getSequenceFeatures() != null
                   && seq_new[in].getSequenceFeatures() != null)
           {
-            System.out.println("There are feature!!!");
+            System.out.println("Checking feature equivalence.");
             sequenceFeatures_original = seq_original[i]
                     .getSequenceFeatures();
             sequenceFeatures_new = seq_new[in].getSequenceFeatures();
@@ -658,7 +709,7 @@ public class StockholmFileTest
 
   // this is the single sequence alignment and the SS annotations equivalent to
   // the ones in file RnaSSTestFile
-  String aliFileRnaSSAlphaChars = ">Test.sequence/1-14\n"
+  String aliFileRnaSS = ">Test.sequence/1-14\n"
           + "GUACAAAAAAAAAA";
   String annFileRnaSSAlphaChars = "JALVIEW_ANNOTATION\n"
           + "# Created: Thu Aug 02 14:54:57 BST 2018\n" + "\n"
@@ -700,7 +751,7 @@ public class StockholmFileTest
 
     // this should result in the same RNA SS Annotations
     AlignmentI newAl = new AppletFormatAdapter().readFile(
-            aliFileRnaSSAlphaChars,
+            aliFileRnaSS,
             DataSourceType.PASTE, jalview.io.FileFormat.Fasta);
     AnnotationFile aaf = new AnnotationFile();
     aaf.readAnnotationFile(newAl, annFileRnaSSAlphaChars,
@@ -708,11 +759,14 @@ public class StockholmFileTest
 
     Assert.assertTrue(
             testRnaSSAnnotationsEquivalent(al.getAlignmentAnnotation()[0],
-                    newAl.getAlignmentAnnotation()[0]));
+                    newAl.getAlignmentAnnotation()[0]),
+            "RNA SS Annotations SHOULD be pair-wise equivalent (but apparently aren't): \n"
+                    + "RNA SS A 1:" + al.getAlignmentAnnotation()[0] + "\n"
+                    + "RNA SS A 2:" + newAl.getAlignmentAnnotation()[0]);
 
     // this should NOT result in the same RNA SS Annotations
     newAl = new AppletFormatAdapter().readFile(
-            aliFileRnaSSAlphaChars, DataSourceType.PASTE,
+            aliFileRnaSS, DataSourceType.PASTE,
             jalview.io.FileFormat.Fasta);
     aaf = new AnnotationFile();
     aaf.readAnnotationFile(newAl, wrongAnnFileRnaSSAlphaChars,
@@ -720,7 +774,10 @@ public class StockholmFileTest
 
     boolean mismatch = testRnaSSAnnotationsEquivalent(al.getAlignmentAnnotation()[0],
             newAl.getAlignmentAnnotation()[0]);
-    Assert.assertFalse( mismatch );
+    Assert.assertFalse(mismatch,
+            "RNA SS Annotations SHOULD NOT be pair-wise equivalent (but apparently are): \n"
+                    + "RNA SS A 1:" + al.getAlignmentAnnotation()[0] + "\n"
+                    + "RNA SS A 2:" + newAl.getAlignmentAnnotation()[0]);
   }
 
   private static boolean testRnaSSAnnotationsEquivalent(
@@ -730,4 +787,110 @@ public class StockholmFileTest
     return a1.rnaSecondaryStructureEquivalent(a2);
   }
 
+  String annFileRnaSSWithSpaceChars = "JALVIEW_ANNOTATION\n"
+          + "# Created: Thu Aug 02 14:54:57 BST 2018\n" + "\n"
+          + "NO_GRAPH\tSecondary Structure\tSecondary Structure\t<,<|.,.|H,H| , |B,B|h,h| , |b,b|(,(|E,E|.,.|e,e|),)|>,>|\t2.0\n"
+          + "\n"
+          + "ROWPROPERTIES\tSecondary Structure\tscaletofit=true\tshowalllabs=true\tcentrelabs=false\n"
+          + "\n" + "\n" + "ALIGNMENT\tID=RNA.SS.TEST\tTP=RNA;";
+  String annFileRnaSSWithoutSpaceChars = "JALVIEW_ANNOTATION\n"
+          + "# Created: Thu Aug 02 14:54:57 BST 2018\n" + "\n"
+          + "NO_GRAPH\tSecondary Structure\tSecondary Structure\t<,<|.,.|H,H|.,.|B,B|h,h|.,.|b,b|(,(|E,E|.,.|e,e|),)|>,>|\t2.0\n"
+          + "\n"
+          + "ROWPROPERTIES\tSecondary Structure\tscaletofit=true\tshowalllabs=true\tcentrelabs=false\n"
+          + "\n" + "\n" + "ALIGNMENT\tID=RNA.SS.TEST\tTP=RNA;";
+
+  String wrongAnnFileRnaSSWithoutSpaceChars = "JALVIEW_ANNOTATION\n"
+          + "# Created: Thu Aug 02 14:54:57 BST 2018\n" + "\n"
+          + "NO_GRAPH\tSecondary Structure\tSecondary Structure\t<,<|.,.|H,H|Z,Z|B,B|h,h|z,z|b,b|(,(|E,E|.,.|e,e|),)|>,>|\t2.0\n"
+          + "\n"
+          + "ROWPROPERTIES\tSecondary Structure\tscaletofit=true\tshowalllabs=true\tcentrelabs=false\n"
+          + "\n" + "\n" + "ALIGNMENT\tID=RNA.SS.TEST\tTP=RNA;";
+
+  @Test(groups = { "Functional" })
+  public void stockholmFileRnaSSSpaceChars() throws Exception
+  {
+    AlignmentI alWithSpaces = new AppletFormatAdapter().readFile(
+            aliFileRnaSS, DataSourceType.PASTE,
+            jalview.io.FileFormat.Fasta);
+    AnnotationFile afWithSpaces = new AnnotationFile();
+    afWithSpaces.readAnnotationFile(alWithSpaces,
+            annFileRnaSSWithSpaceChars, DataSourceType.PASTE);
+
+    Iterable<AlignmentAnnotation> aaiWithSpaces = alWithSpaces
+            .findAnnotations(null, null, "Secondary Structure");
+    AlignmentAnnotation aaWithSpaces = aaiWithSpaces.iterator().next();
+    Assert.assertTrue(aaWithSpaces.isRNA(),
+            "'" + aaWithSpaces + "' not recognised as RNA SS");
+    Assert.assertTrue(aaWithSpaces.isValidStruc(),
+            "'" + aaWithSpaces + "' not recognised as valid structure");
+    Annotation[] annWithSpaces = aaWithSpaces.annotations;
+    char[] As = new char[annWithSpaces.length];
+    for (int i = 0; i < annWithSpaces.length; i++)
+    {
+      As[i] = annWithSpaces[i].secondaryStructure;
+    }
+    // check all spaces and dots are spaces in the internal representation
+    char[] shouldBe = { '<', ' ', 'H', ' ', 'B', 'h', ' ', 'b', '(', 'E',
+        ' ', 'e', ')', '>' };
+    Assert.assertTrue(Arrays.equals(As, shouldBe), "Annotation is "
+            + new String(As) + " but should be " + new String(shouldBe));
+
+    // this should result in the same RNA SS Annotations
+    AlignmentI alWithoutSpaces = new AppletFormatAdapter().readFile(
+            aliFileRnaSS, DataSourceType.PASTE,
+            jalview.io.FileFormat.Fasta);
+    AnnotationFile afWithoutSpaces = new AnnotationFile();
+    afWithoutSpaces.readAnnotationFile(alWithoutSpaces,
+            annFileRnaSSWithoutSpaceChars,
+            DataSourceType.PASTE);
+
+    Assert.assertTrue(
+            testRnaSSAnnotationsEquivalent(
+                    alWithSpaces.getAlignmentAnnotation()[0],
+                    alWithoutSpaces.getAlignmentAnnotation()[0]),
+            "RNA SS Annotations SHOULD be pair-wise equivalent (but apparently aren't): \n"
+                    + "RNA SS A 1:"
+                    + alWithSpaces.getAlignmentAnnotation()[0]
+                            .getRnaSecondaryStructure()
+                    + "\n" + "RNA SS A 2:"
+                    + alWithoutSpaces.getAlignmentAnnotation()[0]
+                            .getRnaSecondaryStructure());
+
+    // this should NOT result in the same RNA SS Annotations
+    AlignmentI wrongAlWithoutSpaces = new AppletFormatAdapter().readFile(
+            aliFileRnaSS, DataSourceType.PASTE,
+            jalview.io.FileFormat.Fasta);
+    AnnotationFile wrongAfWithoutSpaces = new AnnotationFile();
+    wrongAfWithoutSpaces.readAnnotationFile(wrongAlWithoutSpaces,
+            wrongAnnFileRnaSSWithoutSpaceChars,
+            DataSourceType.PASTE);
+
+    Assert.assertFalse(
+            testRnaSSAnnotationsEquivalent(
+                    alWithSpaces.getAlignmentAnnotation()[0],
+                    wrongAlWithoutSpaces.getAlignmentAnnotation()[0]),
+            "RNA SS Annotations SHOULD NOT be pair-wise equivalent (but apparently are): \n"
+                    + "RNA SS A 1:"
+                    + alWithSpaces.getAlignmentAnnotation()[0]
+                            .getRnaSecondaryStructure()
+                    + "\n" + "RNA SS A 2:"
+                    + wrongAlWithoutSpaces.getAlignmentAnnotation()[0]
+                            .getRnaSecondaryStructure());
+
+    // check no spaces in the output
+    // TODO: create a better 'save as <format>' pattern
+    alWithSpaces.getAlignmentAnnotation()[0].visible = true;
+    StockholmFile sf = new StockholmFile(alWithSpaces);
+
+    String stockholmFile = sf.print(alWithSpaces.getSequencesArray(), true);
+    Pattern noSpacesInRnaSSAnnotation = Pattern
+            .compile("\\n#=GC SS_cons\\s+\\S{14}\\n");
+    Matcher m = noSpacesInRnaSSAnnotation.matcher(stockholmFile);
+    boolean matches = m.find();
+    Assert.assertTrue(matches,
+            "StockholmFile output does not contain expected output (may contain spaces):\n"
+                    + stockholmFile);
+
+  }
 }