#!/bin/python3
import requests
from lxml import etree
NUM_ITEM_OF_PAGE=10
NUM_PAGE=8
LINE='--------------------------------------------------------------'
def get_content_of_page(etree_html,content_xpath_list):
for i in range(NUM_ITEM_OF_PAGE):
content_xpath = "//*[@id=\"__layout\"]/div/div[3]/div[1]/div[2]/div[2]/section[{}]/div[2]/a/span/span/span/text()".format(i+1)
content_xpath_list.append(content_xpath)
content = etree_html.xpath(content_xpath)
for each in content:
replace = each.replace('\n', '').replace(' ', '')
if replace == '\n' or replace == '':
continue
print(replace)
print(LINE)
def get_all_pages(content_xpath_list):
for i in range(NUM_PAGE):
url="https://www.juzikong.com/works/ac5e4867-5b49-4ea0-b8cc-c1777522ea5b?page={}".format(i+1)
html = requests.get(url)
get_content_of_page(etree_html=etree.HTML(html.text),content_xpath_list=content_xpath_list)
if __name__ == '__main__':
content_xpath_list = []
get_all_pages(content_xpath_list)