X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;ds=sidebyside;f=forester%2Fruby%2Fscripts%2Fbioruby_examples%2Fmsa_1.rb;h=d39d810515a6f654b9652c591a9103564d4f7719;hb=53694c29c8deada091a88bbd201f6bb2034124fa;hp=ed5d4733613ee407eb457083612283befa1bae32;hpb=981461fd0fe1a528ba2d8af024e7afb768da184a;p=jalview.git diff --git a/forester/ruby/scripts/bioruby_examples/msa_1.rb b/forester/ruby/scripts/bioruby_examples/msa_1.rb index ed5d473..d39d810 100644 --- a/forester/ruby/scripts/bioruby_examples/msa_1.rb +++ b/forester/ruby/scripts/bioruby_examples/msa_1.rb @@ -3,17 +3,59 @@ require 'bio' ############# +seqs = Bio::Alignment::MultiFastaFormat.new(File.open('bcl2.fasta').read) +seqs.entries.each do |seq| + puts seq.to_seq.output(:genbank) +end +puts +puts +puts :genbank +puts seqs.entries[0].to_seq.output(:genbank) +puts +puts :fasta +puts seqs.entries[0].to_seq.output(:fasta) +puts +puts :embl +puts seqs.entries[0].to_seq.output(:embl) +puts +puts :raw +puts seqs.entries[0].to_seq.output(:raw) +puts +puts :fasta_ncbi +puts seqs.entries[0].to_seq.output(:fasta_ncbi) +puts +puts :fastq +puts seqs.entries[0].to_seq.output(:fastq) +puts +puts :fastq_sanger +puts seqs.entries[0].to_seq.output(:fastq_sanger) +puts +puts :fastq_solexa +puts seqs.entries[0].to_seq.output(:fastq_solexa) +puts +puts :fastq_illumina +puts seqs.entries[0].to_seq.output(:fastq_illumina) +puts +puts :fasta_numeric +puts seqs.entries[0].to_seq.output(:fasta_numeric) +puts +puts :qual +puts seqs.entries[0].to_seq.output(:qual) +exit +############## + + # Reads in a ClustalW formatted multiple sequence alignment # from a file named "infile_clustalw.aln" and stores it in 'report'. report = Bio::ClustalW::Report.new(File.read('infile_clustalw.aln')) # Accesses the actual alignment. -align = report.alignment +msa = report.alignment -# Goes through all sequences in 'align' and prints the +# Goes through all sequences in 'msa' and prints the # actual molecular sequence. -align.each do |entry| - puts entry.seq +msa.each do |entry| + # puts entry.seq end ############## @@ -39,19 +81,33 @@ file.each do |entry| # do something on each fasta sequence entry end + ############## # Creates a new file named "outfile.fasta" and writes -# multiple sequence alignment 'align' to it in fasta format. +# multiple sequence alignment 'msa' to it in fasta format. File.open('outfile.fasta', 'w') do |f| - f.write(align.output(:fasta)) + f.write(msa.output(:fasta)) end -# Creates a new file named "outfile.aln" and writes -# multiple sequence alignment 'align' to it in clustal format. -File.open('outfile.aln', 'w') do |f| - f.write(align.output(:clustal)) +# Other formats +File.open('outfile.clustal', 'w') do |f| + f.write(msa.output(:clustal)) end +File.open('outfile.phylip', 'w') do |f| + f.write(msa.output(:phylip)) +end +File.open('outfile.phylipnon', 'w') do |f| + f.write(msa.output(:phylipnon)) +end +File.open('outfile.msf', 'w') do |f| + f.write(msa.output(:msf)) +end +File.open('outfile.molphy', 'w') do |f| + f.write(msa.output(:molphy)) +end + + ############# @@ -64,15 +120,15 @@ puts seq1.output(:fasta) -seqs = [ "MFQIPEFEPSEQEDSSSAER", - "MGTPKQPSLAPAHALGLRKS", - "PKQPSLAPAHALGLRKS", - "MCSTSGCDLE" ] +seqs = ['MFQIPEFEPSEQEDSSSAER', + 'MGTPKQPSLAPAHALGLRKS', + 'PKQPSLAPAHALGLRKS', + 'MCSTSGCDLE'] # MAFFT options = [ '--maxiterate', '1000', '--localpair' ] -mafft = Bio::MAFFT.new('/home/zma/SOFTWARE/mafft-6.847-without-extensions/scripts/mafft', options ) +mafft = Bio::MAFFT.new('mafft', options ) report = mafft.query_align( seqs) # Accesses the actual alignment