donfanning
8/15/2018 - 1:54 PM

zillabyte notes

zillabyte notes

# sample zillabyte stuff
# I think some of this is python

# nice basic thing
# search like 34 million web thingies for urls ending in ".com"
require 'zillabyte' 

Zillabyte.app("example1")
  .source("homepages") # 1
  .each{ |page|
    if page['url'].include? ".com"
      emit :url => page['url']
    end
  }
  .sink{
    name "example1"
    column "url", :string
  }



########## this is python?
######### searches through urls internal to amazon
app = Zillabyte.app(name = "crawler")

stream = app.source {

  begin_cycle do
   @count = 0
  end

  next_tuple do
    url = "amazon.com"
    p url
    emit :url => url
    @count += 1

    end_cycle if @count == 1
  end
}

stream = stream.call_component("domain_crawl")