andy0130tw
4/14/2015 - 5:11 PM

HSNU Calender Scraper

HSNU Calender Scraper

#!python3
import requests
import datetime
import json

URL = "http://www.hs.ntnu.edu.tw/hsnuwp/wp-admin/admin-ajax.php?date=%s&action=hsnu_cal_get_events"

session = requests.Session()
def fetchEvents(dateStr):
	return session.get(URL % dateStr).json()

s = []
curr = datetime.date.today()
for x in range(270):
	curr += datetime.timedelta(days=-1)
	dateStr = curr.strftime("%Y-%m-%d")
	print("Fetching date = " + dateStr)

	v = fetchEvents(curr)
	
	for rec in v:
		try:
			print("%s | %s | %s" % (rec["event_id"], rec["date"], rec["description"]))
		except:
			print("%s | %s | N/A" % (rec["event_id"], rec["date"]))

	s += v
	print("Rec: " + str(len(v)) + " / " + str(len(s)))
	print("")

f = open("data", "w")
f.write(json.dumps(s))
print("EOF!")