yankov
3/2/2012 - 8:08 AM

gistfile1.rb

require 'nokogiri'
require 'eventmachine'
require 'em-http-request'
require 'em-redis'

PAGES = 140

EM.run {

  @redis = EM::Protocols::Redis.connect

  PAGES.times do |page_num|                   

    url = "http://www.match.com/search/searchSubmit.aspx?by=radius&lid=226&cl=1&gc=2&tr=1&lage=27&uage=29&ua=29&pc=94121&dist=10&po=1&oln=0&do=2&q=woman,men,27,29,1915822078&st=quicksearch&pn=#{page_num}&rn=4"
    http = EM::HttpRequest.new(URI.escape(url)).get :head => {'cookie' => "SECU=YOUR_SESSION_COOKIE;"}

    http.callback { 

      p "parsing page #{num+1}"
      doc = Nokogiri::HTML(http.response)

      doc.xpath("//img[@class='profilePic']/..").each do |link|     
        img_src = link.xpath("img/@src").to_s.gsub('sthumbnails.match.com/sthumbnails', 'pictures.match.com/pictures')       
        @redis.hset("people", img_src, link['href'])
      end                         

    }                                                          

  end
}