JAL-4219 test and patch to allow RNA dot-bracket SS lines to be appended to fasta... features/JAL-4219_extended_fasta_rna_ss
authorJames Procter <j.procter@dundee.ac.uk>
Fri, 14 Jul 2023 00:37:47 +0000 (01:37 +0100)
committerJames Procter <j.procter@dundee.ac.uk>
Fri, 14 Jul 2023 00:37:47 +0000 (01:37 +0100)
src/jalview/io/FastaFile.java
test/jalview/io/FastaFileTest.java [new file with mode: 0644]

index c698a31..a01e1db 100755 (executable)
@@ -22,6 +22,9 @@ package jalview.io;
 
 import java.io.IOException;
 
+import com.stevesoft.pat.Regex;
+
+import jalview.analysis.Rna;
 import jalview.datamodel.Alignment;
 import jalview.datamodel.AlignmentAnnotation;
 import jalview.datamodel.Annotation;
@@ -81,7 +84,8 @@ public class FastaFile extends AlignFile
   {
     super(seqs);
   }
-
+  private static final Regex NOT_RNASS = new Regex(
+          "^[^<>[\\](){}ADFJ-RUVWYZadfj-ruvwyz]*$");
   /**
    * DOCUMENT ME!
    * 
@@ -96,9 +100,10 @@ public class FastaFile extends AlignFile
 
     String line, uline;
     Sequence seq = null;
+    SequenceI aseqref = null;
 
     boolean annotation = false;
-
+    boolean rnaAnnot=false;
     while ((uline = nextLine()) != null)
     {
       line = uline.trim();
@@ -110,28 +115,31 @@ public class FastaFile extends AlignFile
           {
             if (annotation)
             {
-              annotations.addElement(makeAnnotation(seq, sb));
+              annotations.addElement(makeAnnotation(seq, sb,aseqref));
             }
           }
           else
           {
             annotation = false;
           }
-
           if (!firstLine)
           {
             seq.setSequence(sb.toString());
 
-            if (!annotation)
+            if (!(annotation||rnaAnnot))
             {
               seqs.addElement(seq);
+            } else {
+              AlignmentAnnotation alan = makeAnnotation(seq, sb, aseqref);
+              annotations.addElement(alan);              
             }
           }
 
           seq = parseId(line.substring(1));
           firstLine = false;
-
-          sb = new StringBuffer();
+          rnaAnnot=false;
+          aseqref=null;
+          sb.setLength(0);
 
           if (line.startsWith(">#_"))
           {
@@ -140,14 +148,32 @@ public class FastaFile extends AlignFile
         }
         else
         {
-          sb.append(annotation ? uline : line);
+          // check if we have rna annotation for the preceeding sequence data
+          if (!annotation) {
+            if (!firstLine && !NOT_RNASS.search(uline) && uline.indexOf('(')>-1 && uline.indexOf('(')<uline.indexOf(')'))
+            {
+              if (!rnaAnnot)
+              {
+                seq.setSequence(sb.toString());
+                sb.setLength(0);
+                seqs.addElement(seq);
+                aseqref = seq;
+                seq =new Sequence("Secondary Structure","");
+                seq.setDescription("");
+                rnaAnnot=true;
+                annotation=true;
+              }
+            }
+          }
+          
+          sb.append((annotation || rnaAnnot) ? uline : line);           
         }
       }
     }
 
     if (annotation)
     {
-      annotations.addElement(makeAnnotation(seq, sb));
+      annotations.addElement(makeAnnotation(seq, sb,aseqref));
     }
 
     else if (!firstLine)
@@ -157,20 +183,29 @@ public class FastaFile extends AlignFile
     }
   }
 
-  private AlignmentAnnotation makeAnnotation(SequenceI seq, StringBuffer sb)
+  private AlignmentAnnotation makeAnnotation(SequenceI seq, StringBuffer sb,
+          SequenceI aseqref)
   {
     Annotation[] anots = new Annotation[sb.length()];
     char cb;
+
     for (int i = 0; i < anots.length; i++)
     {
       char cn = sb.charAt(i);
       if (cn != ' ')
       {
-        anots[i] = new Annotation("" + cn, null, ' ', Float.NaN);
+        anots[i] =  new Annotation(aseqref != null ? "" : "" + cn, null, aseqref != null ? cn
+                : ' ', Float.NaN);
       }
     }
     AlignmentAnnotation aa = new AlignmentAnnotation(
-            seq.getName().substring(2), seq.getDescription(), anots);
+            aseqref == null ? seq.getName().substring(2) : seq.getName(),
+            seq.getDescription(), anots);
+    if (aseqref != null)
+    {
+      aa.sequenceRef = aseqref;
+      aseqref.addAlignmentAnnotation(aa);
+    }
     return aa;
   }
 
diff --git a/test/jalview/io/FastaFileTest.java b/test/jalview/io/FastaFileTest.java
new file mode 100644 (file)
index 0000000..bf60492
--- /dev/null
@@ -0,0 +1,57 @@
+/*
+ * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
+ * Copyright (C) $$Year-Rel$$ The Jalview Authors
+ * 
+ * This file is part of Jalview.
+ * 
+ * Jalview is free software: you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License 
+ * as published by the Free Software Foundation, either version 3
+ * of the License, or (at your option) any later version.
+ *  
+ * Jalview is distributed in the hope that it will be useful, but 
+ * WITHOUT ANY WARRANTY; without even the implied warranty 
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR 
+ * PURPOSE.  See the GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with Jalview.  If not, see <http://www.gnu.org/licenses/>.
+ * The Jalview Authors are detailed in the 'AUTHORS' file.
+ */
+package jalview.io;
+
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertTrue;
+
+import jalview.datamodel.AlignmentAnnotation;
+import jalview.datamodel.SequenceI;
+
+import java.io.IOException;
+
+import org.testng.annotations.Test;
+
+public class FastaFileTest
+{
+  @Test(groups = "Functional")
+  public void testParse_rnaFasta() throws IOException
+  {
+    String rnaseq = "gGGGGCCACAGCAGAAGCGUUCACGUCGCAGCCCCUGUCAGCCAUUGCACUCCGGCUGCGAAUUCUGCU",
+            rnastruct = "[[[[[[...((((((((((.......))).]]]]]]..(((((..........)))))....)))))))";
+    //@formatter:off
+    String rna_fasta = ">strand_B\n"
+            + rnaseq+"\n"
+            + rnastruct+"\n";
+    //@formatter:on
+    FastaFile cf = new FastaFile(rna_fasta, DataSourceType.PASTE);
+    SequenceI[] seqs = cf.getSeqsAsArray();
+    AlignmentAnnotation[] aa = cf.annotations.toArray(new AlignmentAnnotation[1]);
+    assertEquals(seqs.length, 1);
+    assertEquals(seqs[0].getName(), "strand_B");
+    assertEquals(seqs[0].getStart(), 1);
+    assertEquals(seqs[0].getEnd(), 69);
+    assertTrue(seqs[0].getSequenceAsString().endsWith("UUCUGCU"));
+    assertTrue(seqs[0].getAnnotation()!=null);
+    assertEquals(seqs[0].getAnnotation().length,1);
+    assertEquals(seqs[0].getAnnotation()[0].getRNAStruc(),rnastruct);
+  }
+}