get_all_html_links.py - Cacher Snippet

3/31/2015 - 12:31 PM

get_all_html_links.py

get_all_html_links.py

import urllib2
import re

#connect to a URL
website = urllib2.urlopen("url")

#read html code
html = website.read()

#use re.findall to get all the links
links = re.findall('"((http|ftp)s?://.*?)"', html)

print links