Scraping recursive tool
from scrapy.spider import Spider
from scrapy.selector import Selector
from scrapy.utils.markup import remove_tags
from scrapy.cmdline import execute
class MySpider(Spider):
# setup spider
name = "wiki"
allowed_domains = ["wikipedia.org"]
start_urls = ["http://en.wikipedia.org/wiki/Asia"] # list of url to parse in the same time
# parser
def parse(self, response):
titles = response.selector.xpath("normalize-space(//title)")
for title in titles:
body = response.xpath("//p").extract()
body2 = "".join(body)
print remove_tags(body2)
### main
if __name__ == "__main__":
# execute bash command
#out, err = run_command("scrapy runspider myspider.py")
execute(['scrapy','runspider','myspider.py'])