From: Jim Procter Date: Mon, 8 Jun 2020 10:08:25 +0000 (+0100) Subject: JAL-3641 prototype FASTQ import via groovy script for 2.11.1 X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=49492cdc61a03e377534696dc78ae76aae189adb;p=jalview.git JAL-3641 prototype FASTQ import via groovy script for 2.11.1 --- diff --git a/examples/groovy/FastQParser.groovy b/examples/groovy/FastQParser.groovy new file mode 100644 index 0000000..7658539 --- /dev/null +++ b/examples/groovy/FastQParser.groovy @@ -0,0 +1,155 @@ +import htsjdk.samtools.cram.encoding.readfeatures.BaseQualityScore +import htsjdk.samtools.fastq.FastqReader +import htsjdk.samtools.fastq.FastqRecord +import jalview.datamodel.AlignmentI +import jalview.datamodel.SequenceI +import jalview.datamodel.Sequence +import jalview.datamodel.SequenceFeature +import jalview.io.FileFormatI +import jalview.io.FileFormats +import jalview.io.FileParse +import jalview.io.AlignFile +import jalview.io.DataSourceType + +/* + * Prototype Jalview FASTQ importer based on HTS JDK 2.20 + */ + +/* + * EDIT THE FILE PATH BELOW TO IMPORT A FASTQ FILE + */ +FASTQ_FILE = "/Users/jprocter/Downloads/small_normalised_illumina_adeno.fastq.gz" +FASTQ_FILE = "/Users/jprocter/Downloads/test.fastq" + + +/* + * A parser class to read or write the format + * based on the fileFormat.groovy example for creating your own alignment parser/writer + */ +class FastQFile extends AlignFile +{ + FastqReader fqreader; + @Override + protected boolean isParseImmediately() + { + return false; + } + /* + * Constructor for reading a file; the superclass + * constructor will call the parse() method + */ + FastQFile(FileParse src) + { + super(src) + } + + /* + * Constructor for writing out an alignment + */ + FastQFile(AlignmentI al) + { + throw new Error("Unimplemented"); + } + + /* + * Parse a formatted data file (with no error checking!) + */ + void parse() + { + fqreader = new FastqReader(getReader()); + + FastqRecord record + while (fqreader.hasNext() && (record = fqreader.next())!=null) + { + // Create a sequence from read name and sequence string. start & end are 1-length + Sequence seq = new Sequence(record.getReadName(), record.getReadString()); + byte[] qualities = record.getBaseQualities(); + if (qualities!=null) + { + + // Two options here: create an annotation row, or create features. + // annotations take far longer to create and manipulate in Jalview 2.11 (this is changing in 2.12!) + // so we create sequence features for now + boolean makeAnn=false; + + jalview.datamodel.Annotation[] anns=new jalview.datamodel.Annotation[qualities.length]; + int p = 0; + for (byte q:qualities) + { + if (makeAnn) { anns[p++]=new jalview.datamodel.Annotation(Float.valueOf(q)) } + else { + SequenceFeature sf = new SequenceFeature("Phred","Score", p+1,p+1,Float.valueOf(q),""); + seq.addSequenceFeature(sf); + p++; + } + } + if (makeAnn) { + jalview.datamodel.AlignmentAnnotation qalAnnot = new jalview.datamodel.AlignmentAnnotation("PhredScore","Base qualities: "+record.getBaseQualityHeader(),anns); + qalAnnot.visible=false; // hidden by default + seq.addAlignmentAnnotation(qalAnnot); + annotations.add(qalAnnot); + } + } + addSequence(seq) + } + fqreader.close(); + } + + /* + * Print the formatted sequences + * (addSuffix always defaults to true as no user preference for it) + */ + String print(SequenceI[] seqs, boolean addSuffix) + { + // not supported + } +} + +/* + * Define and register the fileformat class for handling FastQ + */ +def myFormat = { -> + [ + getName: { -> 'FASTQ Parser (Groovy)' }, + + toString: { -> getName() }, + + getExtensions: { 'fq,fastq'}, + + getReader: { FileParse source -> new FastQFile(source) }, + + getWriter: { AlignmentI al -> new FastQFile(al) }, + + isReadable: { -> true }, + + isWritable: { -> false }, + + isTextFormat: { -> true }, + + isStructureFile: { -> false }, + + isComplexAlignFile: { -> false }, + + ] as FileFormatI +} + +// read in the file specified at the top of the Groovy script + +FastQFile parser = new FastQFile(new FileParse(FASTQ_FILE, DataSourceType.FILE)); +AlignmentI al = new jalview.datamodel.Alignment(parser.getSeqsAsArray()); +parser.addAnnotations(al); +jalview.gui.AlignFrame newwindow = new jalview.gui.AlignFrame(al,800,600); +jalview.gui.Desktop.addInternalFrame(newwindow, parser.getDataName(), 800,600); + + +/* + * Register the file format. After running this script in Jalview's + * Groovy console, the new format should be shown in open file. + * + * However, we don't yet have a way of dynamically updating the IdentifyFile routine, + * so a FastQ file selected in the file browser won't be imported using this reader + * + */ + + // FileFormats.instance.registerFileFormat(myFormat()) +