falsecz
5/27/2015 - 12:05 PM

hbase shell backup script

hbase shell backup script

@s3 = "s3a://XXXX:XXXX@BUCKET/"
@libjars = `ls /opt/hbase/lib/*.jar | tr "\n" ","`
@ignore = [ /zipkin\..*/i, /.*_temp/i, /.*tmp/i, /test_.*/i, /.*_test/i, /.*_old/i ]
@mappers = "6"

include Java

java_import org.apache.hadoop.hbase.HBaseConfiguration
java_import org.apache.hadoop.hbase.client.HBaseAdmin
java_import org.apache.hadoop.hbase.snapshot.ExportSnapshot
java_import org.apache.hadoop.util.ToolRunner

@conf = org.apache.hadoop.hbase.HBaseConfiguration.create

@admin = org.apache.hadoop.hbase.client.HBaseAdmin.new(@conf)

def cleanup()
    old_snapshots = @admin.listSnapshots(".*-S3BACKUP-.*").to_a
    old_snapshots.each { |s|
        puts "-----> Cleaning up old snapshot " + s.name
        @admin.deleteSnapshot(s.name.to_java_bytes)
    }
end

def tables_to_backup()
    to_backup = Array.new
    tables = @admin.listTables(".*").to_a
    tables.each { |s|
        tn = s.getTableName().getNameAsString().to_s
        backup = true
        @ignore.each { |i|
            if i.match(tn)
                backup = false
            end
        }
        if backup
            to_backup << tn
        end
    }
    to_backup
end
def backup(table)
    snapshot_name = table + "-S3BACKUP-" + Time.now.strftime("%Y%m%d_%H%M%S")

    puts "-----> Creating snapshot " + snapshot_name
    @admin.snapshot(snapshot_name.to_java_bytes, table.to_java_bytes)

    puts "-----> Export snapshot " + snapshot_name
    @es = org.apache.hadoop.hbase.snapshot.ExportSnapshot.new
    args = ["--libjars", @libjars, "-snapshot", snapshot_name, "-copy-to", @s3, "-mappers", @mappers]
    java_args = args.to_java :String
    ToolRunner.run(@conf, @es, java_args)

    puts "-----> Deleting snapshot " + snapshot_name
    @admin.deleteSnapshot(snapshot_name.to_java_bytes)
end


cleanup()

tables = tables_to_backup()
puts "-----> Tables to backup"
puts tables

puts "-----> Starting backup"

tables.each { |t|
    backup(t)
}

puts "-----> Tada"

exit 0