Web Crawling (Test)
import requests
from bs4 import BeautifulSoup
def trade_spider():
url = 'http://www.reddit.com/r/pics/'
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:20.0) Gecko/20100101 Firefox/20.0'}
source_code = requests.get(url, headers=headers)
plain_text = source_code.text
soup = BeautifulSoup(plain_text, "html.parser")
for link in soup.findAll('a',{'class':'title may-blank '}):
href = link.get('href')
title = link.string
print(href)
print(title[:80])
trade_spider()
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VagrantProjectSettings">
<option name="instanceFolder" value="" />
<option name="provider" value="" />
</component>
</project>
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="jdk" jdkName="Python 3.5.0 (/usr/local/Cellar/python3/3.5.0/Frameworks/Python.framework/Versions/3.5/bin/python3.5)" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>
<component name="DependencyValidationManager">
<state>
<option name="SKIP_IMPORT_STATEMENTS" value="false" />
</state>
</component>
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/untitled.iml" filepath="$PROJECT_DIR$/.idea/untitled.iml" />
</modules>
</component>
</project>
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectLevelVcsManager" settingsEditedManually="false">
<OptionsSetting value="true" id="Add" />
<OptionsSetting value="true" id="Remove" />
<OptionsSetting value="true" id="Checkout" />
<OptionsSetting value="true" id="Update" />
<OptionsSetting value="true" id="Status" />
<OptionsSetting value="true" id="Edit" />
<ConfirmationsSetting value="0" id="Add" />
<ConfirmationsSetting value="0" id="Remove" />
</component>
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.5.0 (/usr/local/Cellar/python3/3.5.0/Frameworks/Python.framework/Versions/3.5/bin/python3.5)" project-jdk-type="Python SDK" />
</project>
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="Encoding" useUTFGuessing="true" native2AsciiForPropertiesFiles="false">
<file url="PROJECT" charset="UTF-8" />
</component>
</project>