mileszs
11/29/2011 - 5:35 PM

gistfile1.rb

class Uri < ActiveRecord::Base

  belongs_to  :job
  has_many    :scrapes
  
  def get_request_object options
    require 'rubygems'
    require 'typhoeus'
    
    request = Typhoeus::Request.new(self.uri, options)
    scrape = self.scrapes.new
    
    # Callback Handler
    request.on_complete = lambda do |response|
      # Create new scrape record
      
      
      # Global Data 
      scrape.code = response.code    # http status code
      scrape.time = response.time    # time in seconds the request took
      
      # response.headers # the http headers
      # response.headers_hash # http headers put into a hash
      
      # Check response 
      if response.success?
        # Save content
        scrape.content = response.body
        
      elsif response.timed_out?
        scrape.is_timeout = 1
        
      elsif response.code == 0
        scrape.error = "Could not get an http response, something's wrong: " + response.curl_error_message 
      else
        scrape.error = "HTTP request failed, see code."
      end
      
      # Save the scrape record.
      scrape.save
    end
    return request
  end

...

# In Daemon

          hydra = Typhoeus::Hydra.new
          uri_requests.each do |hash| 
            # Save Requested At
            hash[:uri].requested_at = requested_at
            hash[:uri].save
            
            hydra.queue hash[:request]
          end
          hydra.run # Blocks!