From 786475501a15799d7c4058dbf74e4bf896d03736 Mon Sep 17 00:00:00 2001 From: James Procter Date: Fri, 14 Jul 2023 01:37:47 +0100 Subject: [PATCH] JAL-4219 test and patch to allow RNA dot-bracket SS lines to be appended to fasta sequence entries --- src/jalview/io/FastaFile.java | 59 ++++++++++++++++++++++++++++-------- test/jalview/io/FastaFileTest.java | 57 ++++++++++++++++++++++++++++++++++ 2 files changed, 104 insertions(+), 12 deletions(-) create mode 100644 test/jalview/io/FastaFileTest.java diff --git a/src/jalview/io/FastaFile.java b/src/jalview/io/FastaFile.java index c698a31..a01e1db 100755 --- a/src/jalview/io/FastaFile.java +++ b/src/jalview/io/FastaFile.java @@ -22,6 +22,9 @@ package jalview.io; import java.io.IOException; +import com.stevesoft.pat.Regex; + +import jalview.analysis.Rna; import jalview.datamodel.Alignment; import jalview.datamodel.AlignmentAnnotation; import jalview.datamodel.Annotation; @@ -81,7 +84,8 @@ public class FastaFile extends AlignFile { super(seqs); } - + private static final Regex NOT_RNASS = new Regex( + "^[^<>[\\](){}ADFJ-RUVWYZadfj-ruvwyz]*$"); /** * DOCUMENT ME! * @@ -96,9 +100,10 @@ public class FastaFile extends AlignFile String line, uline; Sequence seq = null; + SequenceI aseqref = null; boolean annotation = false; - + boolean rnaAnnot=false; while ((uline = nextLine()) != null) { line = uline.trim(); @@ -110,28 +115,31 @@ public class FastaFile extends AlignFile { if (annotation) { - annotations.addElement(makeAnnotation(seq, sb)); + annotations.addElement(makeAnnotation(seq, sb,aseqref)); } } else { annotation = false; } - if (!firstLine) { seq.setSequence(sb.toString()); - if (!annotation) + if (!(annotation||rnaAnnot)) { seqs.addElement(seq); + } else { + AlignmentAnnotation alan = makeAnnotation(seq, sb, aseqref); + annotations.addElement(alan); } } seq = parseId(line.substring(1)); firstLine = false; - - sb = new StringBuffer(); + rnaAnnot=false; + aseqref=null; + sb.setLength(0); if (line.startsWith(">#_")) { @@ -140,14 +148,32 @@ public class FastaFile extends AlignFile } else { - sb.append(annotation ? uline : line); + // check if we have rna annotation for the preceeding sequence data + if (!annotation) { + if (!firstLine && !NOT_RNASS.search(uline) && uline.indexOf('(')>-1 && uline.indexOf('('). + * The Jalview Authors are detailed in the 'AUTHORS' file. + */ +package jalview.io; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertTrue; + +import jalview.datamodel.AlignmentAnnotation; +import jalview.datamodel.SequenceI; + +import java.io.IOException; + +import org.testng.annotations.Test; + +public class FastaFileTest +{ + @Test(groups = "Functional") + public void testParse_rnaFasta() throws IOException + { + String rnaseq = "gGGGGCCACAGCAGAAGCGUUCACGUCGCAGCCCCUGUCAGCCAUUGCACUCCGGCUGCGAAUUCUGCU", + rnastruct = "[[[[[[...((((((((((.......))).]]]]]]..(((((..........)))))....)))))))"; + //@formatter:off + String rna_fasta = ">strand_B\n" + + rnaseq+"\n" + + rnastruct+"\n"; + //@formatter:on + FastaFile cf = new FastaFile(rna_fasta, DataSourceType.PASTE); + SequenceI[] seqs = cf.getSeqsAsArray(); + AlignmentAnnotation[] aa = cf.annotations.toArray(new AlignmentAnnotation[1]); + assertEquals(seqs.length, 1); + assertEquals(seqs[0].getName(), "strand_B"); + assertEquals(seqs[0].getStart(), 1); + assertEquals(seqs[0].getEnd(), 69); + assertTrue(seqs[0].getSequenceAsString().endsWith("UUCUGCU")); + assertTrue(seqs[0].getAnnotation()!=null); + assertEquals(seqs[0].getAnnotation().length,1); + assertEquals(seqs[0].getAnnotation()[0].getRNAStruc(),rnastruct); + } +} -- 1.7.10.2