bpeterso2000
6/22/2014 - 6:57 AM

Scrape all table headers from Wikipedia page

Scrape all table headers from Wikipedia page

import urllib.request

from bs4 import BeautifulSoup

WIKIPEDIA_URL = 'http://en.wikipedia.org/wiki/'
TARGET_URL = WIKIPEDIA_URL + 'list_of_highest_mountains'
INDENT = ' ' * 2

opener = urllib.request.build_opener()
opener.addheaders = [('User-agent', 'Mozilla/5.0')]
page = opener.open(TARGET_URL)
soup = BeautifulSoup(page)

print("PAGE:", TARGET_URL)
for table_num, table in enumerate(soup('table')):
    print()
    print("{}Table.{}:".format(INDENT, table_num + 1))
    for header_num, table_header in enumerate(table('th')):
        header = table_header.get_text()
        print("{}{}. {}".format(INDENT * 2, header_num, header))