Iketaki
1/14/2014 - 7:58 AM

calc md5 checksum for all files

calc md5 checksum for all files

#require 'bundler'
#Bundler.require

require 'digest/md5'
require 'find'

#PATH = ENV["HOME"] + "/Downloads/"
PATH = "/"

puts PATH

# Get numbers of files
puts "Checking number of files at #{PATH}"
n = `find #{PATH} | wc -l`.to_i
#n = 0
puts "#{n} files"

puts ""
puts "Start processing"

out_file = "result_" + Time.now.strftime("%Y%m%d%H%M%S") + ".csv"

# Process md5
data = []
cnt = 0

Find.find(PATH) do |path|
  if path.start_with?("/dev")
    Find.prune
  end

  begin
    if FileTest.file?(path)
      puts "#{cnt}/#{n} " + path
      md5 = Digest::MD5.file(path).hexdigest()
      data << [path, md5] # id, path, md5

      # write data every 1000 files
      if data.length >= 1000
        out = ""
        data.each do |row|
          out += row.join(",") + "\n"
        end

        open(out_file, "a") {|f| f.write out}

        data = []
      end
    end
  rescue
  end

  cnt += 1
end