dollschasingmen
5/26/2017 - 6:47 PM

stripped down sync_list w/ timings

stripped down sync_list w/ timings

def sync_list(table, bucket_name, prefix)
  aws_resource = Aws::S3::Resource.new
  t1 = Time.now

  
  bucket = aws_resource.bucket(bucket_name)
  rows = []
  
  bucket.objects(prefix: prefix).each do |obj|
    s3_path = "s3://#{bucket_name}/#{obj.key}"
    row = LumosEtl::RedshiftIncrementalLoadFile.where(destination_table: table, file_name: s3_path).first
    rows << row
  end
  puts rows.size
  
  t2 = Time.now
  delta = t2 - t1 # in seconds
  puts delta
end


adjust_rows = sync_list('adjust.events', 'lumos-adjust', '')
impressions_rows = sync_list('appnexus.impressions', 'lumos-appnexus', 'log-level-data')
visitor_rows = sync_list('events.visitor', 'lumos-partitioned-events-user-production', 'yyyy=2017')