stripped down sync_list w/ timings
def sync_list(table, bucket_name, prefix)
aws_resource = Aws::S3::Resource.new
t1 = Time.now
bucket = aws_resource.bucket(bucket_name)
rows = []
bucket.objects(prefix: prefix).each do |obj|
s3_path = "s3://#{bucket_name}/#{obj.key}"
row = LumosEtl::RedshiftIncrementalLoadFile.where(destination_table: table, file_name: s3_path).first
rows << row
end
puts rows.size
t2 = Time.now
delta = t2 - t1 # in seconds
puts delta
end
adjust_rows = sync_list('adjust.events', 'lumos-adjust', '')
impressions_rows = sync_list('appnexus.impressions', 'lumos-appnexus', 'log-level-data')
visitor_rows = sync_list('events.visitor', 'lumos-partitioned-events-user-production', 'yyyy=2017')