Edited wiki page PhyloBioRuby through web user interface.

[jalview.git] / forester / ruby / scripts / bioruby_examples / msa_1.rb
diff --git a/forester/ruby/scripts/bioruby_examples/msa_1.rb b/forester/ruby/scripts/bioruby_examples/msa_1.rb

index 9c977e5..ed5d473 100644 (file)
--- a/forester/ruby/scripts/bioruby_examples/msa_1.rb
+++ b/forester/ruby/scripts/bioruby_examples/msa_1.rb
@@ -1,24 +1,73 @@
  require 'rubygems'
  require 'bio'
   
-# creating a Bio::Sequence::NA object containing ambiguous alphabets
-#as = Bio::Sequence::NA.new("atgcyrwskmbdhvn")
+#############
  
-#print as.to_s
+# Reads in a ClustalW formatted multiple sequence alignment
+# from a file named "infile_clustalw.aln" and stores it in 'report'.
+report = Bio::ClustalW::Report.new(File.read('infile_clustalw.aln'))
  
-#print "\n"
+# Accesses the actual alignment.
+align = report.alignment
+
+# Goes through all sequences in 'align' and prints the
+# actual molecular sequence.
+align.each do |entry|
+  puts entry.seq
+end
+
+##############
+
+DEFAULT_PARSER = Bio::Alignment::MultiFastaFormat
+puts DEFAULT_PARSER.to_s
+
+#file = Bio::Alignment.readfiles('bcl2.fasta', Bio::Alignment::MultiFastaFormat)
+#file.each do |entry|
+#  puts entry.entry_id           # Gets the identifier, e.g. 'sp|O35147|BAD_RAT'.
+#  puts entry.definition         # Gets the complete fasta description line.
+#  puts entry.seq                # Gets the actual sequence.
+  #puts entry.aaseq.composition  # Gets the amino acid composition.
+#end
+#puts 'OK'
+#puts
+
+file = Bio::FastaFormat.open('bcl2.fasta')
+file.each do |entry|
+   puts entry.entry_id           # Gets the identifier, e.g. 'sp|O35147|BAD_RAT'.
+  puts entry.definition         # Gets the complete fasta description line.
+  puts entry.seq                # Gets the actual sequence.
+  # do something on each fasta sequence entry
+end
  
+##############
  
-#seq1 = Bio::Sequence::AA.new("gggggg")
+# Creates a new file named "outfile.fasta" and writes
+# multiple sequence alignment 'align' to it in fasta format.
+File.open('outfile.fasta', 'w') do |f|
+  f.write(align.output(:fasta))
+end
+
+# Creates a new file named "outfile.aln" and writes
+# multiple sequence alignment 'align' to it in clustal format.
+File.open('outfile.aln', 'w') do |f|
+  f.write(align.output(:clustal))
+end
+
+#############
+
+seq1 = Bio::Sequence.auto("gggggg")
+
+
+puts seq1.output(:fasta)
  #seq2 = Bio::Sequence::AA.new("ggggt")
  #seq3 = Bio::Sequence::AA.new("ggt")
  
  
  
-seqs = [ "KMLFGVVFFFGG",
-         "LMGGHHF",
-         "GKKKKGHHHGHRRRGR",
-         "KKKKGHHHGHRRRGR" ] 
+seqs = [ "MFQIPEFEPSEQEDSSSAER",
+         "MGTPKQPSLAPAHALGLRKS",
+         "PKQPSLAPAHALGLRKS",
+         "MCSTSGCDLE" ] 
  
  
  # MAFFT
@@ -51,4 +100,31 @@ report = muscle.query_align( seqs)
  #puts report.alignment.output_fasta.to_s
  report.alignment.each { |x| puts x.to_s }
  puts 'OK'
-puts
\ No newline at end of file
+puts
+
+file = Bio::FastaFormat.open('bcl2.fasta')
+file.each do |entry|
+  puts entry.entry_id           # Gets the identifier, e.g. 'sp|O35147|BAD_RAT'.
+  puts entry.definition         # Gets the complete fasta description line.
+  puts entry.seq                # Gets the actual sequence.
+  puts entry.aaseq.composition  # Gets the amino acid composition. 
+end
+puts 'OK'
+puts
+
+Bio::FlatFile.auto('bcl2.fasta') do |ff|
+  ff.each do |entry|
+    puts entry.entry_id           # Gets the identifier, e.g. 'sp|O35147|BAD_RAT'.
+    puts entry.definition         # Gets the complete fasta description line.
+    puts entry.seq                # Gets the actual sequence.
+    puts entry.aaseq.composition  # Gets the amino acid composition.
+  end
+end
+puts 'OK'
+puts
+
+
+
+
+
+