jmquintana79
12/14/2015 - 2:11 AM

Scraping of all items of html selected tags into website

Scraping of all items of html selected tags into website

import csv
import requests 
from bs4 import BeautifulSoup


def parse_text(stags,soup):
	# initialize list
	lfound = list() 

	# search tags into html code
	g_data = soup.find_all(stags)

	# process found goals
	for item in g_data:
	  try:
	  	lfound.append(item.text)
	  except Exception,e:
	  	print "WARNING: %s"%str(e)
	  	pass

	return lfound


def parse_att(stags,sattribute,soup):
	# initialize list
	lfound = list() 

	# search tags into html code
	g_data = soup.find_all(stags)

	# process found goals
	for item in g_data:
	  try:
	  	lfound.append(item.get(sattribute))
	  except Exception,e:
	  	print "WARNING: %s"%str(e)
	  	pass

	return lfound




if __name__ == "__main__":

	for i in range(1):
	  # url to parse
		url="http://www.nytimes.com/"
		print url
		# build Beautiful object
		r = requests.get(url)
		soup = BeautifulSoup(r.content, "lxml")


		## parse text of "p" html tag
		print parse_text("p",soup)

		## parse "src" attribute of "img" html tag
		print parse_att("img","src",soup)