JAL-3116 clear out whitespace when materialising Sequence object from Uniprot XML
authorJim Procter <jprocter@issues.jalview.org>
Thu, 17 Jan 2019 12:29:06 +0000 (12:29 +0000)
committerJim Procter <jprocter@issues.jalview.org>
Thu, 17 Jan 2019 12:29:06 +0000 (12:29 +0000)
src/jalview/ws/dbsources/Uniprot.java
test/jalview/ws/dbsources/UniprotTest.java

index 86282c7..d97bf69 100644 (file)
@@ -178,16 +178,12 @@ public class Uniprot extends DbSourceProxyImpl
   SequenceI uniprotEntryToSequence(Entry entry)
   {
     String id = getUniprotEntryId(entry);
-    String seqString = entry.getSequence().getValue();
-
     /*
-     * for backwards compatibility with Castor processing,
-     * remove any internal spaces
+     * Sequence should not include any whitespace, but JAXB leaves these in
      */
-    if (seqString.indexOf(' ') > -1)
-    {
-      seqString = seqString.replace(" ", "");
-    }
+    String seqString = entry.getSequence().getValue().replaceAll("\\s*",
+            "");
+
     SequenceI sequence = new Sequence(id,
             seqString);
     sequence.setDescription(getUniprotEntryDescription(entry));
index e835724..86f5602 100644 (file)
@@ -216,6 +216,8 @@ public class UniprotTest
     SequenceI seq = new Uniprot().uniprotEntryToSequence(entry);
     assertNotNull(seq);
     assertEquals(6, seq.getDBRefs().length); // 2*Uniprot, PDB, PDBsum, 2*EMBL
+    assertEquals(seq.getSequenceAsString(),
+            seq.createDatasetSequence().getSequenceAsString());
 
   }