Skip to content

Commit

Permalink
init
Browse files Browse the repository at this point in the history
  • Loading branch information
Adam Wiggins committed Jun 11, 2008
0 parents commit aa4dec4
Show file tree
Hide file tree
Showing 10 changed files with 463 additions and 0 deletions.
33 changes: 33 additions & 0 deletions README
@@ -0,0 +1,33 @@
= YamlDb

YamlDb is a database-independent format for dumping and restoring data. It complements the the database-independent schema format found in db/schema.rb. The data is saved into db/data.yml.

This can be used as a replacement for mysqldump or pg_dump, but only for the databases typically used by Rails apps. Users, permissions, schemas, triggers, and other advanced database features are not supported - by design.

Any database that has an ActiveRecord adapter should work.

== Usage

rake db:data:dump -> Dump contents of Rails database to db/data.yml
rake db:data:load -> Load contents of db/data.yml into the database

Further, there are tasks db:dump and db:load which do the entire database (the equivalent of running db:schema:dump followed by db:data:load).

== Examples

One common use would be to switch your data from one database backend to another. For example, let's say you wanted to switch from SQLite to MySQL. You might execute the following steps:

1. rake db:dump

2. Edit config/database.yml and change your adapter to mysql, set up database params

3. mysqladmin create [database name]

4. rake db:load

== Credits

Created by Orion Henry and Adam Wiggins. Major updates by Ricardo Chimal, Jr. Patches contributed by Michael Irwin.

Send questions, feedback, or patches to the Heroku mailing list: http://groups.google.com/group/heroku

10 changes: 10 additions & 0 deletions Rakefile
@@ -0,0 +1,10 @@
require 'rake'
require 'spec/rake/spectask'

desc "Run all specs"
Spec::Rake::SpecTask.new('spec') do |t|
t.spec_files = FileList['spec/*_spec.rb']
end

task :default => :spec

5 changes: 5 additions & 0 deletions about.yml
@@ -0,0 +1,5 @@
author: Orion Henry and Adam Wiggins of Heroku
summary: Dumps and loads a database-independent data dump format in db/data.yml.
homepage: http://opensource.heroku.com/
license: MIT
rails_version: 1.2+
1 change: 1 addition & 0 deletions init.rb
@@ -0,0 +1 @@
require 'yaml_db'
168 changes: 168 additions & 0 deletions lib/yaml_db.rb
@@ -0,0 +1,168 @@
require 'rubygems'
require 'yaml'
require 'active_record'


module YamlDb
def self.dump(filename)
disable_logger
YamlDb::Dump.dump(File.new(filename, "w"))
reenable_logger
end

def self.load(filename)
disable_logger
YamlDb::Load.load(File.new(filename, "r"))
reenable_logger
end

def self.disable_logger
@@old_logger = ActiveRecord::Base.logger
ActiveRecord::Base.logger = nil
end

def self.reenable_logger
ActiveRecord::Base.logger = @@old_logger
end
end


module YamlDb::Utils
def self.chunk_records(records)
yaml = [ records ].to_yaml
yaml.sub!("--- \n", "")
yaml.sub!('- - -', ' - -')
yaml
end

def self.unhash(hash, keys)
keys.map { |key| hash[key] }
end

def self.unhash_records(records, keys)
records.each_with_index do |record, index|
records[index] = unhash(record, keys)
end

records
end

def self.convert_booleans(records, columns)
records.each do |record|
columns.each do |column|
next if is_boolean(record[column])
record[column] = (record[column] == 't' or record[column] == '1')
end
end
records
end

def self.boolean_columns(table)
columns = ActiveRecord::Base.connection.columns(table).reject { |c| c.type != :boolean }
columns.map { |c| c.name }
end

def self.is_boolean(value)
value.kind_of?(TrueClass) or value.kind_of?(FalseClass)
end
end


module YamlDb::Dump
def self.dump(io)
ActiveRecord::Base.connection.tables.each do |table|
dump_table(io, table)
end
end

def self.dump_table(io, table)
return if table_record_count(table).zero?

dump_table_columns(io, table)
dump_table_records(io, table)
end

def self.dump_table_columns(io, table)
io.write("\n")
io.write({ table => { 'columns' => table_column_names(table) } }.to_yaml)
end

def self.dump_table_records(io, table)
table_record_header(io)

column_names = table_column_names(table)

each_table_page(table) do |records|
rows = YamlDb::Utils.unhash_records(records, column_names)
io.write(YamlDb::Utils.chunk_records(records))
end
end

def self.table_record_header(io)
io.write(" records: \n")
end

def self.table_column_names(table)
ActiveRecord::Base.connection.columns(table).map { |c| c.name }
end

def self.each_table_page(table, records_per_page=1000)
total_count = table_record_count(table)
pages = (total_count.to_f / records_per_page).ceil - 1
id = table_column_names(table).first
boolean_columns = YamlDb::Utils.boolean_columns(table)

(0..pages).to_a.each do |page|
sql_limit = "LIMIT #{records_per_page} OFFSET #{records_per_page*page}"
records = ActiveRecord::Base.connection.select_all("SELECT * FROM #{table} ORDER BY #{id} #{sql_limit}")
records = YamlDb::Utils.convert_booleans(records, boolean_columns)
yield records
end
end

def self.table_record_count(table)
ActiveRecord::Base.connection.select_one("SELECT COUNT(*) FROM #{table}").values.first.to_i
end
end


module YamlDb::Load
def self.load(io)
ActiveRecord::Base.connection.transaction do
YAML.load_documents(io) do |ydoc|
ydoc.keys.each do |table_name|
next if ydoc[table_name].nil?
load_table(table_name, ydoc[table_name])
end
end
end
end

def self.truncate_table(table)
begin
ActiveRecord::Base.connection.execute("TRUNCATE #{table}")
rescue Exception
ActiveRecord::Base.connection.execute("DELETE FROM #{table}")
end
end

def self.load_table(table, data)
column_names = data['columns']
truncate_table(table)
load_records(table, column_names, data['records'])
reset_pk_sequence!(table)
end

def self.load_records(table, column_names, records)
quoted_column_names = column_names.map { |column| ActiveRecord::Base.connection.quote_column_name(column) }.join(',')
records.each do |record|
ActiveRecord::Base.connection.execute("INSERT INTO #{table} (#{quoted_column_names}) VALUES (#{record.map { |r| ActiveRecord::Base.connection.quote(r) }.join(',')})")
end
end

def self.reset_pk_sequence!(table_name)
if ActiveRecord::Base.connection.kind_of?(ActiveRecord::ConnectionAdapters::PostgreSQLAdapter)
ActiveRecord::Base.connection.reset_pk_sequence!(table_name)
end
end
end
7 changes: 7 additions & 0 deletions spec/base.rb
@@ -0,0 +1,7 @@
require 'rubygems'
require 'spec'

$LOAD_PATH.unshift(File.dirname(__FILE__) + '/../lib')
require 'yaml_db'


89 changes: 89 additions & 0 deletions spec/yaml_dump_spec.rb
@@ -0,0 +1,89 @@
require File.dirname(__FILE__) + '/base'

describe YamlDb::Dump do
before do
File.stub!(:new).with('dump.yml', 'w').and_return(StringIO.new)

ActiveRecord::Base = mock('ActiveRecord::Base', :null_object => true)
ActiveRecord::Base.connection = mock('connection')
ActiveRecord::Base.connection.stub!(:tables).and_return([ 'mytable' ])
ActiveRecord::Base.connection.stub!(:columns).with('mytable').and_return([ mock('a',:name => 'a'), mock('b', :name => 'b') ])
ActiveRecord::Base.connection.stub!(:select_one).and_return({"count"=>"2"})
ActiveRecord::Base.connection.stub!(:select_all).and_return([ { 'a' => 1, 'b' => 2 }, { 'a' => 3, 'b' => 4 } ])
end

before(:each) do
@io = StringIO.new
end

it "should return a formatted string" do
YamlDb::Dump.table_record_header(@io)
@io.rewind
@io.read.should == " records: \n"
end

it "should return a list of column names" do
YamlDb::Dump.table_column_names('mytable').should == [ 'a', 'b' ]
end

it "should return the total number of records in a table" do
YamlDb::Dump.table_record_count('mytable').should == 2
end

it "should return a yaml string that contains a table header and column names" do
YamlDb::Dump.stub!(:table_column_names).with('mytable').and_return([ 'a', 'b' ])
YamlDb::Dump.dump_table_columns(@io, 'mytable')
@io.rewind
@io.read.should == <<EOYAML
---
mytable:
columns:
- a
- b
EOYAML
end

it "should return all records from the database and return them when there is only 1 page" do
YamlDb::Dump.each_table_page('mytable') do |records|
records.should == [ { 'a' => 1, 'b' => 2 }, { 'a' => 3, 'b' => 4 } ]
end
end

it "should paginate records from the database and return them" do
ActiveRecord::Base.connection.stub!(:select_all).and_return([ { 'a' => 1, 'b' => 2 } ], [ { 'a' => 3, 'b' => 4 } ])

records = [ ]
YamlDb::Dump.each_table_page('mytable', 1) do |page|
page.size.should == 1
records.concat(page)
end

records.should == [ { 'a' => 1, 'b' => 2 }, { 'a' => 3, 'b' => 4 } ]
end

it "should return dump the records for a table in yaml to a given io stream" do
YamlDb::Dump.dump_table_records(@io, 'mytable')
@io.rewind
@io.read.should == <<EOYAML
records:
- - 1
- 2
- - 3
- 4
EOYAML
end

it "should dump a table's contents to yaml" do
YamlDb::Dump.should_receive(:dump_table_columns)
YamlDb::Dump.should_receive(:dump_table_records)
YamlDb::Dump.dump_table(@io, 'mytable')
end

it "should not dump a table's contents when the record count is zero" do
YamlDb::Dump.stub!(:table_record_count).with('mytable').and_return(0)
YamlDb::Dump.should_not_receive(:dump_table_columns)
YamlDb::Dump.should_not_receive(:dump_table_records)
YamlDb::Dump.dump_table(@io, 'mytable')
end
end

0 comments on commit aa4dec4

Please sign in to comment.