jss367
3/29/2017 - 6:27 PM

raw_gutenberg.py

import re
from urllib import request

# Now let's grab some text from Great Expectations
url = 'http://www.gutenberg.org/files/1400/1400-0.txt'
response = request.urlopen(url)
raw = response.read().decode('utf8')

# Here is some text we'll start with
text = raw[886:1091]
# Let's clean out all the annoying line markings
text = text.replace('\r', '')
text = text.replace('\n', ' ')
print(text)