X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fruby%2Fscripts%2Fbioruby_examples%2Fmsa_1.rb;h=d39d810515a6f654b9652c591a9103564d4f7719;hb=53694c29c8deada091a88bbd201f6bb2034124fa;hp=9c977e5e1a805f4cea92577dd000058da5ee2ff6;hpb=6639ef7ab715a18637ce7300707fa48bf77c0de6;p=jalview.git diff --git a/forester/ruby/scripts/bioruby_examples/msa_1.rb b/forester/ruby/scripts/bioruby_examples/msa_1.rb index 9c977e5..d39d810 100644 --- a/forester/ruby/scripts/bioruby_examples/msa_1.rb +++ b/forester/ruby/scripts/bioruby_examples/msa_1.rb @@ -1,29 +1,134 @@ require 'rubygems' require 'bio' -# creating a Bio::Sequence::NA object containing ambiguous alphabets -#as = Bio::Sequence::NA.new("atgcyrwskmbdhvn") +############# -#print as.to_s +seqs = Bio::Alignment::MultiFastaFormat.new(File.open('bcl2.fasta').read) +seqs.entries.each do |seq| + puts seq.to_seq.output(:genbank) +end +puts +puts +puts :genbank +puts seqs.entries[0].to_seq.output(:genbank) +puts +puts :fasta +puts seqs.entries[0].to_seq.output(:fasta) +puts +puts :embl +puts seqs.entries[0].to_seq.output(:embl) +puts +puts :raw +puts seqs.entries[0].to_seq.output(:raw) +puts +puts :fasta_ncbi +puts seqs.entries[0].to_seq.output(:fasta_ncbi) +puts +puts :fastq +puts seqs.entries[0].to_seq.output(:fastq) +puts +puts :fastq_sanger +puts seqs.entries[0].to_seq.output(:fastq_sanger) +puts +puts :fastq_solexa +puts seqs.entries[0].to_seq.output(:fastq_solexa) +puts +puts :fastq_illumina +puts seqs.entries[0].to_seq.output(:fastq_illumina) +puts +puts :fasta_numeric +puts seqs.entries[0].to_seq.output(:fasta_numeric) +puts +puts :qual +puts seqs.entries[0].to_seq.output(:qual) +exit +############## + + +# Reads in a ClustalW formatted multiple sequence alignment +# from a file named "infile_clustalw.aln" and stores it in 'report'. +report = Bio::ClustalW::Report.new(File.read('infile_clustalw.aln')) + +# Accesses the actual alignment. +msa = report.alignment + +# Goes through all sequences in 'msa' and prints the +# actual molecular sequence. +msa.each do |entry| + # puts entry.seq +end + +############## + +DEFAULT_PARSER = Bio::Alignment::MultiFastaFormat +puts DEFAULT_PARSER.to_s + +#file = Bio::Alignment.readfiles('bcl2.fasta', Bio::Alignment::MultiFastaFormat) +#file.each do |entry| +# puts entry.entry_id # Gets the identifier, e.g. 'sp|O35147|BAD_RAT'. +# puts entry.definition # Gets the complete fasta description line. +# puts entry.seq # Gets the actual sequence. + #puts entry.aaseq.composition # Gets the amino acid composition. +#end +#puts 'OK' +#puts + +file = Bio::FastaFormat.open('bcl2.fasta') +file.each do |entry| + puts entry.entry_id # Gets the identifier, e.g. 'sp|O35147|BAD_RAT'. + puts entry.definition # Gets the complete fasta description line. + puts entry.seq # Gets the actual sequence. + # do something on each fasta sequence entry +end -#print "\n" +############## -#seq1 = Bio::Sequence::AA.new("gggggg") +# Creates a new file named "outfile.fasta" and writes +# multiple sequence alignment 'msa' to it in fasta format. +File.open('outfile.fasta', 'w') do |f| + f.write(msa.output(:fasta)) +end + +# Other formats +File.open('outfile.clustal', 'w') do |f| + f.write(msa.output(:clustal)) +end +File.open('outfile.phylip', 'w') do |f| + f.write(msa.output(:phylip)) +end +File.open('outfile.phylipnon', 'w') do |f| + f.write(msa.output(:phylipnon)) +end +File.open('outfile.msf', 'w') do |f| + f.write(msa.output(:msf)) +end +File.open('outfile.molphy', 'w') do |f| + f.write(msa.output(:molphy)) +end + + + +############# + +seq1 = Bio::Sequence.auto("gggggg") + + +puts seq1.output(:fasta) #seq2 = Bio::Sequence::AA.new("ggggt") #seq3 = Bio::Sequence::AA.new("ggt") -seqs = [ "KMLFGVVFFFGG", - "LMGGHHF", - "GKKKKGHHHGHRRRGR", - "KKKKGHHHGHRRRGR" ] +seqs = ['MFQIPEFEPSEQEDSSSAER', + 'MGTPKQPSLAPAHALGLRKS', + 'PKQPSLAPAHALGLRKS', + 'MCSTSGCDLE'] # MAFFT options = [ '--maxiterate', '1000', '--localpair' ] -mafft = Bio::MAFFT.new('/home/zma/SOFTWARE/mafft-6.847-without-extensions/scripts/mafft', options ) +mafft = Bio::MAFFT.new('mafft', options ) report = mafft.query_align( seqs) # Accesses the actual alignment @@ -51,4 +156,31 @@ report = muscle.query_align( seqs) #puts report.alignment.output_fasta.to_s report.alignment.each { |x| puts x.to_s } puts 'OK' -puts \ No newline at end of file +puts + +file = Bio::FastaFormat.open('bcl2.fasta') +file.each do |entry| + puts entry.entry_id # Gets the identifier, e.g. 'sp|O35147|BAD_RAT'. + puts entry.definition # Gets the complete fasta description line. + puts entry.seq # Gets the actual sequence. + puts entry.aaseq.composition # Gets the amino acid composition. +end +puts 'OK' +puts + +Bio::FlatFile.auto('bcl2.fasta') do |ff| + ff.each do |entry| + puts entry.entry_id # Gets the identifier, e.g. 'sp|O35147|BAD_RAT'. + puts entry.definition # Gets the complete fasta description line. + puts entry.seq # Gets the actual sequence. + puts entry.aaseq.composition # Gets the amino acid composition. + end +end +puts 'OK' +puts + + + + + +