import htsjdk.samtools.cram.encoding.readfeatures.BaseQualityScore import htsjdk.samtools.fastq.FastqReader import htsjdk.samtools.fastq.FastqRecord import jalview.datamodel.AlignmentI import jalview.datamodel.SequenceI import jalview.datamodel.Sequence import jalview.datamodel.SequenceFeature import jalview.io.FileFormatI import jalview.io.FileFormats import jalview.io.FileParse import jalview.io.AlignFile import jalview.io.DataSourceType /* * Prototype Jalview FASTQ importer based on HTS JDK 2.20 */ /* * EDIT THE FILE PATH BELOW TO IMPORT A FASTQ FILE */ FASTQ_FILE = "/Users/jprocter/Downloads/test.fastq" FASTQ_FILE_PROTOCOL = DataSourceType.FILE // could also be DataSourceType.URL /* * A parser class to read or write the format * based on the fileFormat.groovy example for creating your own alignment parser/writer */ class FastQFile extends AlignFile { FastqReader fqreader; @Override protected boolean isParseImmediately() { return false; } /* * Constructor for reading a file; the superclass * constructor will call the parse() method */ FastQFile(FileParse src) { super(src) } /* * Constructor for writing out an alignment */ FastQFile(AlignmentI al) { throw new Error("Unimplemented"); } /* * Parse a formatted data file (with no error checking!) */ void parse() { fqreader = new FastqReader(getReader()); FastqRecord record while (fqreader.hasNext() && (record = fqreader.next())!=null) { // Create a sequence from read name and sequence string. start & end are 1-length Sequence seq = new Sequence(record.getReadName(), record.getReadString()); byte[] qualities = record.getBaseQualities(); if (qualities!=null) { // Two options here: create an annotation row, or create features. // annotations take far longer to create and manipulate in Jalview 2.11 (this is changing in 2.12!) // so we create sequence features for now boolean makeAnn=false; jalview.datamodel.Annotation[] anns=new jalview.datamodel.Annotation[qualities.length]; int p = 0; for (byte q:qualities) { if (makeAnn) { anns[p++]=new jalview.datamodel.Annotation(Float.valueOf(q)) } else { SequenceFeature sf = new SequenceFeature("Phred","Score", p+1,p+1,Float.valueOf(q),""); seq.addSequenceFeature(sf); p++; } } if (makeAnn) { jalview.datamodel.AlignmentAnnotation qalAnnot = new jalview.datamodel.AlignmentAnnotation("PhredScore","Base qualities: "+record.getBaseQualityHeader(),anns); qalAnnot.visible=false; // hidden by default seq.addAlignmentAnnotation(qalAnnot); annotations.add(qalAnnot); } } addSequence(seq) } fqreader.close(); } /* * Print the formatted sequences * (addSuffix always defaults to true as no user preference for it) */ String print(SequenceI[] seqs, boolean addSuffix) { // not supported } } /* * Define and register the fileformat class for handling FastQ */ def myFormat = { -> [ getName: { -> 'FASTQ Parser (Groovy)' }, toString: { -> getName() }, getExtensions: { 'fq,fastq'}, getReader: { FileParse source -> new FastQFile(source) }, getWriter: { AlignmentI al -> new FastQFile(al) }, isReadable: { -> true }, isWritable: { -> false }, isTextFormat: { -> true }, isStructureFile: { -> false }, isComplexAlignFile: { -> false }, ] as FileFormatI } // read in the file specified at the top of the Groovy script FastQFile parser = new FastQFile(new FileParse(FASTQ_FILE, FASTQ_FILE_PROTOCOL)); AlignmentI al = new jalview.datamodel.Alignment(parser.getSeqsAsArray()); parser.addAnnotations(al); jalview.gui.AlignFrame newwindow = new jalview.gui.AlignFrame(al,800,600); jalview.gui.Desktop.addInternalFrame(newwindow, parser.getDataName(), 800,600); /* * Register the file format. After running this script in Jalview's * Groovy console, the new format should be shown in open file. * * However, we don't yet have a way of dynamically updating the IdentifyFile routine, * so a FastQ file selected in the file browser won't be imported using this reader * */ // FileFormats.instance.registerFileFormat(myFormat())