bhavika
1/8/2016 - 8:27 AM

Comparing the US & India Netflix collections | Excel file: https://goo.gl/JZMb3C

Comparing the US & India Netflix collections | Excel file: https://goo.gl/JZMb3C

from urllib.request import urlopen
from bs4 import BeautifulSoup as bs
import csv as csv

url = 'http://www.finder.com/in/netflix-india-vs-netflix-us-titles-list'
html = urlopen(url).read()

soup = bs(html, "lxml")
table = soup.find('table', id='tablepress-9')

data = []
headers = ['Title', 'Year', 'Type', 'India', 'USA']
data.append(headers)

#extract text from markup
for row in table.findAll('tr')[1:]:
    col = row.findAll('td')
    col = [ele.text.strip() for ele in col]
    data.append([ele for ele in col if ele])
    
#write to a csv file    
with open('./data/netflix.csv', 'w', newline='') as f:
    writer = csv.writer(f, delimiter=',', lineterminator='\n')
    writer.writerows(data)