taizilongxu
9/3/2016 - 10:03 AM

scrapy常用

scrapy常用

# 命令
scrapy startproject tutorial
scrapy crawl dmoz  # spider name: dmoz
scrapy shell 'http://www.example.com'  # enter shell

# start
start_urls

# fisrt start
def start_requests(self):
    pass
  
# second start
def parse(self, response):
    pass

# return post
yield scrapy.http.FormRequest(url, callback=self.parse_music, headers=headers, formdata=data)
# return get
yield scrapy.Request(url, callback=self.parse_playlist)
  
# selector
response.xpath('//*[@id="m-pl-container"]/li/div/a/@href').extract()