1 import htsjdk.samtools.cram.encoding.readfeatures.BaseQualityScore
2 import htsjdk.samtools.fastq.FastqReader
3 import htsjdk.samtools.fastq.FastqRecord
4 import jalview.datamodel.AlignmentI
5 import jalview.datamodel.SequenceI
6 import jalview.datamodel.Sequence
7 import jalview.datamodel.SequenceFeature
8 import jalview.io.FileFormatI
9 import jalview.io.FileFormats
10 import jalview.io.FileParse
11 import jalview.io.AlignFile
12 import jalview.io.DataSourceType
15 * Prototype Jalview FASTQ importer based on HTS JDK 2.20
19 * EDIT THE FILE PATH BELOW TO IMPORT A FASTQ FILE
21 FASTQ_FILE = "/Users/jprocter/Downloads/small_normalised_illumina_adeno.fastq.gz"
22 FASTQ_FILE = "/Users/jprocter/Downloads/test.fastq"
26 * A parser class to read or write the format
27 * based on the fileFormat.groovy example for creating your own alignment parser/writer
29 class FastQFile extends AlignFile
33 protected boolean isParseImmediately()
38 * Constructor for reading a file; the superclass
39 * constructor will call the parse() method
41 FastQFile(FileParse src)
47 * Constructor for writing out an alignment
49 FastQFile(AlignmentI al)
51 throw new Error("Unimplemented");
55 * Parse a formatted data file (with no error checking!)
59 fqreader = new FastqReader(getReader());
62 while (fqreader.hasNext() && (record = fqreader.next())!=null)
64 // Create a sequence from read name and sequence string. start & end are 1-length
65 Sequence seq = new Sequence(record.getReadName(), record.getReadString());
66 byte[] qualities = record.getBaseQualities();
70 // Two options here: create an annotation row, or create features.
71 // annotations take far longer to create and manipulate in Jalview 2.11 (this is changing in 2.12!)
72 // so we create sequence features for now
73 boolean makeAnn=false;
75 jalview.datamodel.Annotation[] anns=new jalview.datamodel.Annotation[qualities.length];
77 for (byte q:qualities)
79 if (makeAnn) { anns[p++]=new jalview.datamodel.Annotation(Float.valueOf(q)) }
81 SequenceFeature sf = new SequenceFeature("Phred","Score", p+1,p+1,Float.valueOf(q),"");
82 seq.addSequenceFeature(sf);
87 jalview.datamodel.AlignmentAnnotation qalAnnot = new jalview.datamodel.AlignmentAnnotation("PhredScore","Base qualities: "+record.getBaseQualityHeader(),anns);
88 qalAnnot.visible=false; // hidden by default
89 seq.addAlignmentAnnotation(qalAnnot);
90 annotations.add(qalAnnot);
99 * Print the formatted sequences
100 * (addSuffix always defaults to true as no user preference for it)
102 String print(SequenceI[] seqs, boolean addSuffix)
109 * Define and register the fileformat class for handling FastQ
113 getName: { -> 'FASTQ Parser (Groovy)' },
115 toString: { -> getName() },
117 getExtensions: { 'fq,fastq'},
119 getReader: { FileParse source -> new FastQFile(source) },
121 getWriter: { AlignmentI al -> new FastQFile(al) },
123 isReadable: { -> true },
125 isWritable: { -> false },
127 isTextFormat: { -> true },
129 isStructureFile: { -> false },
131 isComplexAlignFile: { -> false },
136 // read in the file specified at the top of the Groovy script
138 FastQFile parser = new FastQFile(new FileParse(FASTQ_FILE, DataSourceType.FILE));
139 AlignmentI al = new jalview.datamodel.Alignment(parser.getSeqsAsArray());
140 parser.addAnnotations(al);
141 jalview.gui.AlignFrame newwindow = new jalview.gui.AlignFrame(al,800,600);
142 jalview.gui.Desktop.addInternalFrame(newwindow, parser.getDataName(), 800,600);
146 * Register the file format. After running this script in Jalview's
147 * Groovy console, the new format should be shown in open file.
149 * However, we don't yet have a way of dynamically updating the IdentifyFile routine,
150 * so a FastQ file selected in the file browser won't be imported using this reader
154 // FileFormats.instance.registerFileFormat(myFormat())