Download Shingeki no Kyojin from http://readsnk.com/
from bs4 import BeautifulSoup
import requests
import os
import shutil
# Taken from http://stackoverflow.com/questions/23793987/python-write-file-to-directory-doesnt-exist
# Taken from http://stackoverflow.com/a/600612/119527
def mkdir_p(path):
try:
os.makedirs(path)
except OSError as exc: # Python >2.5
if os.path.isdir(path):
pass
else:
raise
def safe_open_w(path):
''' Open "path" for writing, creating any parent directories as needed.
'''
mkdir_p(os.path.dirname(path))
return open(path, 'w+b')
URL = 'http://readsnk.com/'
r = requests.get(URL)
responseBody = r.text
soup = BeautifulSoup(responseBody, 'html.parser')
chapterList = soup.find('ul', {'class': 'chapters-list'}).find_all('a')
for chapter in chapterList:
chapterLink = chapter['href']
chapterNumber = chapterLink.partition('chapter-')[2][:-1]
r = requests.get(chapterLink)
chapterBody = r.text
soup = BeautifulSoup(chapterBody, 'html.parser')
imageList = soup.find_all('img', {'class': 'pages__img'})
counterImage = 1
for image in imageList:
imageLink = image['src'].rstrip()
imageNumber = '/' + str(format(counterImage, '03'))
# ext = '.' + imageLink.split('.')[-1]
ext = '.jpg'
imageLocation = 'SNK/' + chapterNumber + imageNumber + ext
# print imageLocation
try:
r = requests.get(imageLink, stream=True)
if r.status_code == 200:
with safe_open_w(imageLocation) as f:
r.raw.decode_content = True
shutil.copyfileobj(r.raw, f)
except:
pass
counterImage += 1