shuuuuun
5/28/2019 - 2:41 AM

dynamodb-export.rb

# usage:
#   1. Install aws-sdk
#     $ echo "source 'https://rubygems.org'\ngem 'aws-sdk'" > Gemfile
#     $ bundle install --path vendor/bundle
#     # or
#     $ gem install aws-sdk
#   2. Setup AWS credentials
#     $ eval $(assume-role hoge)
#     # or
#     $ export AWS_PROFILE=hoge
#     # etc...
#   4. Rewrite TABLE_NAME in the script
#     TABLE_NAME = 'hoge'
#   3. Run Script
#     $ bundle exec ruby dynamojson.rb

# note:
#   実行前にキャパシティを計算して上げておく。
#   https://docs.aws.amazon.com/sdkforruby/api/Aws/DynamoDB/Client.html#scan-instance_method
#   https://docs.aws.amazon.com/ja_jp/amazondynamodb/latest/developerguide/GettingStarted.Ruby.04.html
#   https://docs.aws.amazon.com/ja_jp/amazondynamodb/latest/developerguide/Scan.html
#   https://docs.aws.amazon.com/ja_jp/amazondynamodb/latest/developerguide/HowItWorks.ReadWriteCapacityMode.html
#   https://aws.amazon.com/jp/dynamodb/pricing/provisioned/?nc1=f_ls

require 'json'
require 'aws-sdk'

TABLE_NAME = 'hoge'

Aws.config.update(
  region: 'ap-northeast-1'
)

dynamodb = Aws::DynamoDB::Client.new
ndjson = File.open("#{TABLE_NAME}.ndjson", 'w')

params = {
  table_name: TABLE_NAME,
}

puts "Scanning #{TABLE_NAME} table."

results = []
begin
  loop do
    result = dynamodb.scan(params)
    results << result
    result[:items].each do |item|
      ndjson.puts item.to_json
    end
    break if result.last_evaluated_key.nil?
    puts 'Scanning for more...'
    params[:exclusive_start_key] = result.last_evaluated_key
  end
rescue  Aws::DynamoDB::Errors::ServiceError => error
  puts 'Unable to scan:'
  puts "#{error.message}"
end

items = results.map(&:items).flatten
# puts items.to_json
puts "item count is #{items.count}"

File.write("#{TABLE_NAME}.json", items.to_json)
puts "output at #{TABLE_NAME}.json"

ndjson.close