JONCHAN-CN
10/3/2019 - 6:52 PM

scrape Iciba daily sentence

import requests
import datetime


def get_news1(date):
    url = f"http://sentence.iciba.com/?&c=dailysentence&m=getdetail&title={date}"
    r = requests.get(url)
    contents = r.json()['content']
    translation = r.json()['translation']
    return contents, translation


def datelist(start, end):
    start_date = datetime.date(*start)
    end_date = datetime.date(*end)

    result = []
    curr_date = start_date
    while curr_date != end_date:
        result.append("%04d-%02d-%02d" % (curr_date.year, curr_date.month, curr_date.day))
        curr_date += datetime.timedelta(1)
    result.append("%04d-%02d-%02d" % (curr_date.year, curr_date.month, curr_date.day))
    return result

sect=[]
date_list = datelist((2017, 7, 28), (2019, 8, 3))
for date in date_list:
    try:
        a, b = get_news1(date)
        sect.append(a)
        sect.append('\n')
        print(f'{date} done')
    except:
        print(f'{date} fail')
        pass
with(open('./iciba.txt','w',encoding='utf-8')) as f:
    for sec in sect:
        f.write(sec)
    print('done')