X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fruby%2Fscripts%2Fbioruby_examples%2Fmsa_1.rb;h=92a4acf8d2d185d91a62b0ceca5c90a3bbb76760;hb=d49bf58cee106ada674e7fa0d7fcf328ed797252;hp=bb8210bf53b23c41a6e6da41e7a208a795b6d1c4;hpb=0b1f98e56d39ab1acd58da0f57fda383696d92c9;p=jalview.git diff --git a/forester/ruby/scripts/bioruby_examples/msa_1.rb b/forester/ruby/scripts/bioruby_examples/msa_1.rb index bb8210b..92a4acf 100644 --- a/forester/ruby/scripts/bioruby_examples/msa_1.rb +++ b/forester/ruby/scripts/bioruby_examples/msa_1.rb @@ -1,8 +1,146 @@ -require 'rubygems' require 'bio' - + +seq_ary = Array.new +ff = Bio::FlatFile.auto('bcl2.fasta') +ff.each_entry do |entry| + seq_ary.push(entry) + puts entry.entry_id # prints the identifier of the entry + puts entry.definition # prints the definition of the entry + puts entry.seq # prints the sequence data of the entry +end + +# Creates a multiple sequence alignment (possibly unaligned) named +# 'seqs' from array 'seq_ary'. +seqs = Bio::Alignment.new( seq_ary ) + + +seqs.each { |seq| puts seq.to_s } + + +puts seqs.consensus + +# Writes multiple sequence alignment (possibly unaligned) 'seqs' +# to a file in PHYLIP format. +File.open('out0.phylip', 'w') do |f| + f.write(seqs.output(:phylip)) +end + +File.open('out0.fasta', 'w') do |f| + f.write(seqs.output(:fasta)) +end + +exit +############# + +# Reads in a FASTA-formatted multiple sequence alignment (which does +# not have to be aligned, though) and stores its sequences in +# array 'seq_ary'. +seq_ary = Array.new +fasta_seqs = Bio::Alignment::MultiFastaFormat.new(File.open('bcl2.fasta').read) +fasta_seqs.entries.each do |seq| + seq_ary.push( seq ) +end + +# Creates a multiple sequence alignment (possibly unaligned) named +# 'seqs' from array 'seq_ary'. +seqs = Bio::Alignment.new( seq_ary ) +seqs.each { |seq| puts seq.to_s } + + +puts seqs.consensus + +# Writes multiple sequence alignment (possibly unaligned) 'seqs' +# to a file in PHYLIP format. +File.open('out1.phylip', 'w') do |f| + f.write(seqs.output(:phylip)) +end + +File.open('out1.fasta', 'w') do |f| + f.write(seqs.output(:fasta)) +end + +exit +################# + +#ff = Bio::FlatFile.new(Bio::FastaFormat, 'bcl2.fasta') +#ff.each_entry do |f| +# puts "definition : " + f.definition +# puts "nalen : " + f.nalen.to_s +# puts "naseq : " + f.naseq +#end +#exit + +seq_ary = Array.new +Bio::FastaFormat.open('bcl2.fasta') do | file | + file.each do |entry| + puts entry.entry_id # Gets the identifier, e.g. 'sp|O35147|BAD_RAT'. + # puts entry.definition # Gets the complete fasta description line. + #puts entry.seq # Gets the actual sequence. + seq_ary.push( entry ) + end +end +seqs =Bio::Alignment.new( seq_ary ) +seqs.each { |x| puts x } +puts seqs.consensus +exit + + + +#will be obsolete! +#seqs =Bio::Alignment.readfiles(File.open('bcl2.fasta')) +#seqs.entries.each do |seq| +# puts seq.to_biosequence +#end + +#Bio::Alignment. + + + +#exit ############# +seqs = Bio::Alignment::MultiFastaFormat.new(File.open('bcl2.fasta').read) +seqs.entries.each do |seq| + puts seq.to_seq.output(:genbank) +end +puts +puts +puts :genbank +puts seqs.entries[0].to_seq.output(:genbank) +puts +puts :fasta +puts seqs.entries[0].to_seq.output(:fasta) +puts +puts :embl +puts seqs.entries[0].to_seq.output(:embl) +puts +puts :raw +puts seqs.entries[0].to_seq.output(:raw) +puts +puts :fasta_ncbi +puts seqs.entries[0].to_seq.output(:fasta_ncbi) +puts +puts :fastq +puts seqs.entries[0].to_seq.output(:fastq) +puts +puts :fastq_sanger +puts seqs.entries[0].to_seq.output(:fastq_sanger) +puts +puts :fastq_solexa +puts seqs.entries[0].to_seq.output(:fastq_solexa) +puts +puts :fastq_illumina +puts seqs.entries[0].to_seq.output(:fastq_illumina) +puts +puts :fasta_numeric +puts seqs.entries[0].to_seq.output(:fasta_numeric) +puts +puts :qual +puts seqs.entries[0].to_seq.output(:qual) +#exit +############## + + # Reads in a ClustalW formatted multiple sequence alignment # from a file named "infile_clustalw.aln" and stores it in 'report'. report = Bio::ClustalW::Report.new(File.read('infile_clustalw.aln')) @@ -13,7 +151,7 @@ msa = report.alignment # Goes through all sequences in 'msa' and prints the # actual molecular sequence. msa.each do |entry| - puts entry.seq + # puts entry.seq end ############## @@ -39,6 +177,7 @@ file.each do |entry| # do something on each fasta sequence entry end + ############## # Creates a new file named "outfile.fasta" and writes