JAL-2113 additions to embl_mapping.xml / data models / tests features/JAL-2113_emblXml1.2
authorgmungoc <g.m.carstairs@dundee.ac.uk>
Thu, 26 May 2016 09:27:02 +0000 (10:27 +0100)
committergmungoc <g.m.carstairs@dundee.ac.uk>
Thu, 26 May 2016 09:27:02 +0000 (10:27 +0100)
resources/embl_mapping.xml
src/jalview/datamodel/FeatureProperties.java
src/jalview/datamodel/xdb/embl/EmblEntry.java
test/jalview/datamodel/xdb/embl/EmblEntryTest.java
test/jalview/datamodel/xdb/embl/EmblFileTest.java
test/jalview/datamodel/xdb/embl/EmblTestHelper.java

index da1bba4..01b921a 100644 (file)
                <field name="accession" type="string">
                        <bind-xml name="accession" node="attribute"/>
                </field>
-               <field name="lastUpdated" type="string">
-                       <bind-xml name="lastUpdated" node="attribute"/>
-               </field>
                <!-- 
-                   in EMBL XML 1.2 sequence/@version becomes entry/version 
-                   entry/@version becomes entry/@entryVersion
+                   in EMBL XML 1.2 sequence/@version became entry/version 
+                   entry/@version became entry/@entryVersion
                -->
                <field name="sequenceVersion" type="string">
                        <bind-xml name="version" node="attribute"/>
                <field name="entryVersion" type="string">
                        <bind-xml name="entryVersion" node="attribute"/>
                </field>
+               <field name="dataClass" type="string">
+                       <bind-xml name="dataClass" node="attribute"/>
+               </field>
+               <field name="taxonomicDivision" type="string">
+                       <bind-xml name="taxonomicDivision" node="attribute"/>
+               </field>
                <field name="moleculeType" type="string">
                        <bind-xml name="moleculeType" node="attribute"/>
                </field>
                <field name="topology" type="string">
                        <bind-xml name="topology" node="attribute" location="type"/>
                </field>
-               <field name="rCreated" type="string">
-                       <bind-xml name="releaseCreated" node="attribute"/>
+               <field name="firstPublicDate" type="string">
+                       <bind-xml name="firstPublic" node="attribute"/>
+               </field>
+               <field name="firstPublicRelease" type="string">
+                       <bind-xml name="firstPublicRelease" node="attribute"/>
+               </field>
+               <field name="lastUpdatedDate" type="string">
+                       <bind-xml name="lastUpdated" node="attribute"/>
                </field>
-               <field name="rLastUpdated" type="string">
-                       <bind-xml name="releaseLastUpdated" node="attribute"/>
+               <field name="lastUpdatedRelease" type="string">
+                       <bind-xml name="lastUpdatedRelease" node="attribute"/>
                </field>
-               <field name="desc" type="string">
+               <field name="description" type="string">
                        <bind-xml name="description" node="element"/>
                </field>
                <field name="keywords" type="string" collection="vector">
index d25eb96..2306bec 100644 (file)
@@ -28,8 +28,7 @@ package jalview.datamodel;
  */
 public class FeatureProperties
 {
-
-  private static final String EMBL_CODING_FEATURE = "CDS";
+  public static final String EMBL_CODING_FEATURE = "CDS";
 
   public static final String EXONPOS = "exon number";
 
index 50a262f..cfe87d9 100644 (file)
@@ -62,21 +62,25 @@ public class EmblEntry
 
   String sequenceVersion;
 
+  String dataClass;
+
   String moleculeType;
 
   String topology;
 
   String sequenceLength;
 
-  String taxDivision;
+  String taxonomicDivision;
+
+  String description;
 
-  String desc;
+  String firstPublicDate;
 
-  String rCreated;
+  String firstPublicRelease;
 
-  String rLastUpdated;
+  String lastUpdatedDate;
 
-  String lastUpdated;
+  String lastUpdatedRelease;
 
   Vector<String> keywords;
 
@@ -121,23 +125,6 @@ public class EmblEntry
   }
 
   /**
-   * @return the desc
-   */
-  public String getDesc()
-  {
-    return desc;
-  }
-
-  /**
-   * @param desc
-   *          the desc to set
-   */
-  public void setDesc(String desc)
-  {
-    this.desc = desc;
-  }
-
-  /**
    * @return the features
    */
   public Vector<EmblFeature> getFeatures()
@@ -172,57 +159,6 @@ public class EmblEntry
   }
 
   /**
-   * @return the lastUpdated
-   */
-  public String getLastUpdated()
-  {
-    return lastUpdated;
-  }
-
-  /**
-   * @param lastUpdated
-   *          the lastUpdated to set
-   */
-  public void setLastUpdated(String lastUpdated)
-  {
-    this.lastUpdated = lastUpdated;
-  }
-
-  /**
-   * @return the releaseCreated
-   */
-  public String getRCreated()
-  {
-    return rCreated;
-  }
-
-  /**
-   * @param releaseCreated
-   *          the releaseCreated to set
-   */
-  public void setRCreated(String releaseCreated)
-  {
-    this.rCreated = releaseCreated;
-  }
-
-  /**
-   * @return the releaseLastUpdated
-   */
-  public String getRLastUpdated()
-  {
-    return rLastUpdated;
-  }
-
-  /**
-   * @param releaseLastUpdated
-   *          the releaseLastUpdated to set
-   */
-  public void setRLastUpdated(String releaseLastUpdated)
-  {
-    this.rLastUpdated = releaseLastUpdated;
-  }
-
-  /**
    * @return the sequence
    */
   public EmblSequence getSequence()
@@ -240,40 +176,6 @@ public class EmblEntry
   }
 
   /**
-   * @return the taxDivision
-   */
-  public String getTaxDivision()
-  {
-    return taxDivision;
-  }
-
-  /**
-   * @param taxDivision
-   *          the taxDivision to set
-   */
-  public void setTaxDivision(String taxDivision)
-  {
-    this.taxDivision = taxDivision;
-  }
-
-  /**
-   * @return the entry version
-   */
-  public String getEntryVersion()
-  {
-    return entryVersion;
-  }
-
-  /**
-   * @param version
-   *          the version to set
-   */
-  public void setEntryVersion(String version)
-  {
-    this.entryVersion = version;
-  }
-
-  /**
    * Recover annotated sequences from EMBL file
    * 
    * @param sourceDb
@@ -285,7 +187,7 @@ public class EmblEntry
   {
     SequenceI dna = new Sequence(sourceDb + "|" + accession,
             sequence.getSequence());
-    dna.setDescription(desc);
+    dna.setDescription(description);
     DBRefEntry retrievedref = new DBRefEntry(sourceDb,
             getSequenceVersion(), accession);
     dna.addDBRef(retrievedref);
@@ -780,6 +682,26 @@ public class EmblEntry
     this.sequenceVersion = sequenceVersion;
   }
 
+  public String getSequenceLength()
+  {
+    return sequenceLength;
+  }
+
+  public void setSequenceLength(String sequenceLength)
+  {
+    this.sequenceLength = sequenceLength;
+  }
+
+  public String getEntryVersion()
+  {
+    return entryVersion;
+  }
+
+  public void setEntryVersion(String entryVersion)
+  {
+    this.entryVersion = entryVersion;
+  }
+
   public String getMoleculeType()
   {
     return moleculeType;
@@ -800,33 +722,73 @@ public class EmblEntry
     this.topology = topology;
   }
 
-  public String getSequenceLength()
+  public String getTaxonomicDivision()
   {
-    return sequenceLength;
+    return taxonomicDivision;
   }
 
-  public void setSequenceLength(String sequenceLength)
+  public void setTaxonomicDivision(String taxonomicDivision)
   {
-    this.sequenceLength = sequenceLength;
+    this.taxonomicDivision = taxonomicDivision;
+  }
+
+  public String getDescription()
+  {
+    return description;
+  }
+
+  public void setDescription(String description)
+  {
+    this.description = description;
+  }
+
+  public String getFirstPublicDate()
+  {
+    return firstPublicDate;
+  }
+
+  public void setFirstPublicDate(String firstPublicDate)
+  {
+    this.firstPublicDate = firstPublicDate;
+  }
+
+  public String getFirstPublicRelease()
+  {
+    return firstPublicRelease;
+  }
+
+  public void setFirstPublicRelease(String firstPublicRelease)
+  {
+    this.firstPublicRelease = firstPublicRelease;
+  }
+
+  public String getLastUpdatedDate()
+  {
+    return lastUpdatedDate;
+  }
+
+  public void setLastUpdatedDate(String lastUpdatedDate)
+  {
+    this.lastUpdatedDate = lastUpdatedDate;
   }
 
-  public String getrCreated()
+  public String getLastUpdatedRelease()
   {
-    return rCreated;
+    return lastUpdatedRelease;
   }
 
-  public void setrCreated(String rCreated)
+  public void setLastUpdatedRelease(String lastUpdatedRelease)
   {
-    this.rCreated = rCreated;
+    this.lastUpdatedRelease = lastUpdatedRelease;
   }
 
-  public String getrLastUpdated()
+  public String getDataClass()
   {
-    return rLastUpdated;
+    return dataClass;
   }
 
-  public void setrLastUpdated(String rLastUpdated)
+  public void setDataClass(String dataClass)
   {
-    this.rLastUpdated = rLastUpdated;
+    this.dataClass = dataClass;
   }
 }
index c36b7d3..e8760bd 100644 (file)
@@ -1,7 +1,9 @@
 package jalview.datamodel.xdb.embl;
 
 import static org.testng.AssertJUnit.assertEquals;
+import static org.testng.AssertJUnit.assertSame;
 
+import jalview.datamodel.DBRefEntry;
 import jalview.datamodel.Sequence;
 import jalview.datamodel.SequenceI;
 
@@ -36,17 +38,79 @@ public class EmblEntryTest
     SequenceI dna = new Sequence("J03321", "GGATCCGTAAGTTAGACGAAATT");
     List<SequenceI> peptides = new ArrayList<SequenceI>();
     EmblFile ef = EmblTestHelper.getEmblFile();
-    EmblFeature feature = null;
-    for (EmblFeature feat : ef.getEntries().get(0).getFeatures())
+
+    /*
+     * parse two CDS features, one with two Uniprot cross-refs,
+     * the other with one
+     */
+    EmblEntry testee = new EmblEntry();
+    for (EmblFeature feature : ef.getEntries().get(0).getFeatures())
     {
-      if ("CDS".equals(feat.getName()))
+      if ("CDS".equals(feature.getName()))
       {
-        feature = feat;
-        break;
+        testee.parseCodingFeature(feature, "EMBL", dna, peptides);
       }
     }
 
-    EmblEntry testee = new EmblEntry();
-    testee.parseCodingFeature(feature, "EMBL", dna, peptides);
+    /*
+     * peptides should now have five entries:
+     * EMBL product and two Uniprot accessions for the first CDS / translation
+     * EMBL product and one Uniprot accession for the second CDS / translation
+     */
+    assertEquals(5, peptides.size());
+    assertEquals("CAA30420.1", peptides.get(0).getName());
+    assertEquals("MLCF", peptides.get(0).getSequenceAsString());
+    assertEquals("UNIPROT|B0BCM4", peptides.get(1).getName());
+    assertEquals("MLCF", peptides.get(1).getSequenceAsString());
+    assertEquals("UNIPROT|P0CE20", peptides.get(2).getName());
+    assertEquals("MLCF", peptides.get(2).getSequenceAsString());
+    assertEquals("CAA30421.1", peptides.get(3).getName());
+    assertEquals("MSSS", peptides.get(3).getSequenceAsString());
+    assertEquals("UNIPROT|B0BCM3", peptides.get(4).getName());
+    assertEquals("MSSS", peptides.get(4).getSequenceAsString());
+
+    /*
+     * verify dna sequence has dbrefs with mappings to the peptide 'products'
+     */
+    DBRefEntry[] dbrefs = dna.getDBRefs();
+    assertEquals(3, dbrefs.length);
+    DBRefEntry dbRefEntry = dbrefs[0];
+    assertEquals("UNIPROT", dbRefEntry.getSource());
+    assertEquals("B0BCM4", dbRefEntry.getAccessionId());
+    assertSame(peptides.get(1), dbRefEntry.getMap().getTo());
+    List<int[]> fromRanges = dbRefEntry.getMap().getMap().getFromRanges();
+    assertEquals(1, fromRanges.size());
+    assertEquals(57, fromRanges.get(0)[0]);
+    assertEquals(46, fromRanges.get(0)[1]);
+    List<int[]> toRanges = dbRefEntry.getMap().getMap().getToRanges();
+    assertEquals(1, toRanges.size());
+    assertEquals(1, toRanges.get(0)[0]);
+    assertEquals(4, toRanges.get(0)[1]);
+
+    dbRefEntry = dbrefs[1];
+    assertEquals("UNIPROT", dbRefEntry.getSource());
+    assertEquals("P0CE20", dbRefEntry.getAccessionId());
+    assertSame(peptides.get(2), dbRefEntry.getMap().getTo());
+    fromRanges = dbRefEntry.getMap().getMap().getFromRanges();
+    assertEquals(1, fromRanges.size());
+    assertEquals(57, fromRanges.get(0)[0]);
+    assertEquals(46, fromRanges.get(0)[1]);
+    toRanges = dbRefEntry.getMap().getMap().getToRanges();
+    assertEquals(1, toRanges.size());
+    assertEquals(1, toRanges.get(0)[0]);
+    assertEquals(4, toRanges.get(0)[1]);
+
+    dbRefEntry = dbrefs[2];
+    assertEquals("UNIPROT", dbRefEntry.getSource());
+    assertEquals("B0BCM3", dbRefEntry.getAccessionId());
+    assertSame(peptides.get(4), dbRefEntry.getMap().getTo());
+    fromRanges = dbRefEntry.getMap().getMap().getFromRanges();
+    assertEquals(1, fromRanges.size());
+    assertEquals(4, fromRanges.get(0)[0]);
+    assertEquals(15, fromRanges.get(0)[1]);
+    toRanges = dbRefEntry.getMap().getMap().getToRanges();
+    assertEquals(1, toRanges.size());
+    assertEquals(1, toRanges.get(0)[0]);
+    assertEquals(4, toRanges.get(0)[1]);
   }
 }
index a62cb87..6955833 100644 (file)
@@ -39,28 +39,31 @@ public class EmblFileTest
     assertEquals(1, entries.size());
     EmblEntry entry = entries.get(0);
 
-    assertEquals("X53828", entry.getAccession());
-    assertEquals(
-            "Chicken LDH-A mRNA for lactate dehydrogenase A chain (EC 1.1.1.27)",
-            entry.getDesc());
-    assertEquals("2005-04-18", entry.getLastUpdated());
-    assertEquals("mRNA", entry.getMoleculeType());
+    assertEquals("X07547", entry.getAccession());
+    assertEquals("C. trachomatis plasmid", entry.getDescription());
+    assertEquals("STD", entry.getDataClass());
+    assertEquals("PRO", entry.getTaxonomicDivision());
+    assertEquals("1999-02-10", entry.getLastUpdatedDate());
+    assertEquals("58", entry.getLastUpdatedRelease());
+    assertEquals("1988-11-10", entry.getFirstPublicDate());
+    assertEquals("18", entry.getFirstPublicRelease());
+    assertEquals("genomic DNA", entry.getMoleculeType());
     assertEquals("1", entry.getSequenceVersion());
-    assertEquals("3", entry.getEntryVersion());
+    assertEquals("8", entry.getEntryVersion());
     assertEquals("linear", entry.getTopology());
-    assertEquals("1575", entry.getSequenceLength());
+    assertEquals("7499", entry.getSequenceLength());
 
     /*
      * FIXME these assertions fail - values are null - why?? Adding or removing
      * attributes in the test XML modifies behaviour. eg. inserting an attribute
      * _before_ lastUpdated results in a null value in this field.
      */
-    // assertEquals("25", entry.getRCreated());
-    // assertEquals("83", entry.getRLastUpdated());
+    assertEquals("1988-11-10", entry.getFirstPublicDate());
+    assertEquals("18", entry.getFirstPublicRelease());
 
     assertEquals(2, entry.getKeywords().size());
-    assertEquals("L-lactate dehydrogenase", entry.getKeywords().get(0));
-    assertEquals("chutney", entry.getKeywords().get(1));
+    assertEquals("plasmid", entry.getKeywords().get(0));
+    assertEquals("unidentified reading frame", entry.getKeywords().get(1));
 
     /*
      * dbrefs
@@ -68,52 +71,81 @@ public class EmblFileTest
     assertEquals(2, entry.getDbRefs().size());
     DBRefEntry dbref = entry.getDbRefs().get(0);
     assertEquals("EuropePMC", dbref.getSource());
-    assertEquals("PMC1460223", dbref.getAccessionId());
-    assertEquals("9649548", dbref.getVersion());
+    assertEquals("PMC107176", dbref.getAccessionId());
+    assertEquals("9573186", dbref.getVersion());
     dbref = entry.getDbRefs().get(1);
     assertEquals("MD5", dbref.getSource());
-    assertEquals("d3b68", dbref.getAccessionId());
+    assertEquals("ac73317", dbref.getAccessionId());
     // blank version has been converted to "0"
     assertEquals("0", dbref.getVersion());
 
     /*
-     * sequence feature for CDS
+     * two sequence features for CDS
+     */
+    assertEquals(2, entry.getFeatures().size());
+    /*
+     * first CDS
      */
-    assertEquals(1, entry.getFeatures().size());
     EmblFeature ef = entry.getFeatures().get(0);
     assertEquals("CDS", ef.getName());
-    assertEquals("60..1058", ef.getLocation());
+    assertEquals("complement(46..57)", ef.getLocation());
     assertEquals(2, ef.getDbRefs().size());
     dbref = ef.getDbRefs().get(0);
-    assertEquals("GOA", dbref.getSource());
-    assertEquals("P00340", dbref.getAccessionId());
+    assertEquals("UniProtKB/Swiss-Prot", dbref.getSource());
+    assertEquals("B0BCM4", dbref.getAccessionId());
     assertEquals("2.1", dbref.getVersion());
     dbref = ef.getDbRefs().get(1);
-    assertEquals("InterPro", dbref.getSource());
-    assertEquals("IPR001236", dbref.getAccessionId());
-    // blank version converted to "0":
+    assertEquals("UniProtKB/Swiss-Prot", dbref.getSource());
+    assertEquals("P0CE20", dbref.getAccessionId());
+    // blank version gets converted to "0":
     assertEquals("0", dbref.getVersion());
-    assertEquals(2, ef.getQualifiers().size());
-
-    // feature qualifiers
+    // CDS feature qualifiers
+    assertEquals(3, ef.getQualifiers().size());
     Qualifier q = ef.getQualifiers().get(0);
     assertEquals("note", q.getName());
     assertEquals(2, q.getValues().length);
-    assertEquals("L-lactate dehydrogenase A-chain", q.getValues()[0]);
+    assertEquals("ORF 8 (AA 1-330)", q.getValues()[0]);
     assertEquals("pickle", q.getValues()[1]);
     assertNull(q.getEvidence());
     q = ef.getQualifiers().get(1);
+    assertEquals("protein_id", q.getName());
+    assertEquals(1, q.getValues().length);
+    assertEquals("CAA30420.1", q.getValues()[0]);
+    q = ef.getQualifiers().get(2);
     assertEquals("translation", q.getName());
     assertEquals(1, q.getValues().length);
-    assertEquals("MSLKDHLIHN", q.getValues()[0]);
+    assertEquals("MLCF", q.getValues()[0]);
     assertEquals(1, q.getEvidence().length);
     assertEquals("Keith", q.getEvidence()[0]);
 
     /*
-     * Sequence
+     * second CDS
+     */
+    ef = entry.getFeatures().get(1);
+    assertEquals("CDS", ef.getName());
+    assertEquals("4..15", ef.getLocation());
+    assertEquals(1, ef.getDbRefs().size());
+    dbref = ef.getDbRefs().get(0);
+    assertEquals("UniProtKB/Swiss-Prot", dbref.getSource());
+    assertEquals("B0BCM3", dbref.getAccessionId());
+    assertEquals("0", dbref.getVersion());
+    assertEquals(2, ef.getQualifiers().size());
+    q = ef.getQualifiers().get(0);
+    assertEquals("protein_id", q.getName());
+    assertEquals(1, q.getValues().length);
+    assertEquals("CAA30421.1", q.getValues()[0]);
+    q = ef.getQualifiers().get(1);
+    assertEquals("translation", q.getName());
+    assertEquals(1, q.getValues().length);
+    assertEquals("MSSS", q.getValues()[0]);
+
+    /*
+     * Sequence - verify newline not converted to space (JAL-2029)
      */
     EmblSequence seq = entry.getSequence();
-    assertEquals("GTGACG", seq.getSequence());
+    assertEquals(
+            "GGTATGTCCTCTAGTACAAACACCCCCAATATTGTGATATAATTAAAAACATAGCAT",
+            seq.getSequence());
 
     /*
      * getSequence() converts empty DBRefEntry.version to "0"
index 71ca508..9957c72 100644 (file)
@@ -4,22 +4,47 @@ import java.io.StringReader;
 
 public class EmblTestHelper
 {
-  // adapted from http://www.ebi.ac.uk/ena/data/view/x53828&display=xml
+  // adapted from http://www.ebi.ac.uk/ena/data/view/X07547&display=xml
+  // dna and translations truncated for convenience
   private static final String TESTDATA = "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>"
           + "<ROOT>"
-          + "<entry accession=\"X53828\" entryVersion=\"3\" lastUpdated=\"2005-04-18\" releaseCreated=\"25\" releaseLastUpdated=\"83\""
-          + " version=\"1\" moleculeType=\"mRNA\" topology=\"linear\" sequenceLength=\"1575\">"
-          + "<description>Chicken LDH-A mRNA for lactate dehydrogenase A chain (EC 1.1.1.27)</description>"
-          + "<keyword>L-lactate dehydrogenase</keyword><keyword>chutney</keyword>"
-          + "<xref db=\"EuropePMC\" id=\"PMC1460223\" secondaryId=\"9649548\" />"
-          + "<xref db=\"MD5\" id=\"d3b68\" />"
-          + "<feature name=\"CDS\" location=\"60..1058\">"
-          + "<xref db=\"GOA\" id=\"P00340\" secondaryId=\"2.1\" /><xref db=\"InterPro\" id=\"IPR001236\" />"
-          + "<qualifier name=\"note\"><value>L-lactate dehydrogenase A-chain</value><value>pickle</value></qualifier>"
-          + "<qualifier name=\"translation\"><value>MSLKDHLIHN</value><evidence>Keith</evidence></qualifier>"
-          // emulate EMBL XML 1.2 which splits sequence data every 60 characters
-          // see EmblSequence.setSequence
-          + "</feature>" + "<sequence>GTG\nACG</sequence></entry></ROOT>";
+          + "<entry accession=\"X07547\" version=\"1\" entryVersion=\"8\""
+          + " dataClass=\"STD\" taxonomicDivision=\"PRO\""
+          + " moleculeType=\"genomic DNA\" sequenceLength=\"7499\" topology=\"linear\""
+          + " firstPublic=\"1988-11-10\" firstPublicRelease=\"18\""
+          + " lastUpdated=\"1999-02-10\" lastUpdatedRelease=\"58\">"
+          + "<secondaryAccession>X07574</secondaryAccession>"
+          + "<description>C. trachomatis plasmid</description>"
+          + "<keyword>plasmid</keyword><keyword>unidentified reading frame</keyword>"
+          + "<xref db=\"EuropePMC\" id=\"PMC107176\" secondaryId=\"9573186\" />"
+          + "<xref db=\"MD5\" id=\"ac73317\" />"
+          /*
+           * first CDS (range and translation changed to keep test data manageable)
+           */
+          + "<feature name=\"CDS\" location=\"complement(46..57)\">"
+          // test the case of >1 cross-ref to the same database (JAL-2029)
+          + "<xref db=\"UniProtKB/Swiss-Prot\" id=\"B0BCM4\" secondaryId=\"2.1\" />"
+          + "<xref db=\"UniProtKB/Swiss-Prot\" id=\"P0CE20\" />"
+          + "<qualifier name=\"note\"><value>ORF 8 (AA 1-330)</value><value>pickle</value></qualifier>"
+          + "<qualifier name=\"protein_id\"><value>CAA30420.1</value></qualifier>"
+          + "<qualifier name=\"translation\"><value>MLCF</value><evidence>Keith</evidence></qualifier>"
+          + "</feature>"
+          /*
+           * second CDS (range and translation changed to keep test data manageable)
+           */
+          + "<feature name=\"CDS\" location=\"4..15\">"
+          + "<xref db=\"UniProtKB/Swiss-Prot\" id=\"B0BCM3\" />"
+          + "<qualifier name=\"protein_id\"><value>CAA30421.1</value></qualifier>"
+          + "<qualifier name=\"translation\"><value>MSSS</value></qualifier>"
+          + "</feature>"
+          /*
+           * sequence (modified for test purposes)
+           * emulates EMBL XML 1.2 which splits sequence data every 60 characters
+           * see EmblSequence.setSequence
+           */
+          + "<sequence>GGTATGTCCTCTAGTACAAAC\n"
+          + "ACCCCCAATATTGTGATATAATTAAAAACATAGCAT"
+          + "</sequence></entry></ROOT>";
 
   static EmblFile getEmblFile()
   {