Python Scraper
#1 - find the next sibling data
apn = soup.find(string="APN").find_next("div").contents[0]
#--------------------------------------------------------------------------------------------
#2 - requests headers
headers = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) "
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36"
}
#--------------------------------------------------------------------------------------------
#3 - txt to python list
with open("urls.txt", "r") as f:
urls = [line.strip() for line in f]
#--------------------------------------------------------------------------------------------
#4 - list to txt
with open('urls.txt', 'w') as f:
for item in my_list:
f.write("%s\n" % item)
#--------------------------------------------------------------------------------------------
#5 - list of dict into xlsx
import openpyxl
def export_xls(location, list_data):
print("Saving the data into spreadsheets...")
workbook = Workbook()
sheet = workbook.active
sheet.freeze_panes = "B2"
# table column, based on the dict item
sheet.cell(row=1, column=1).value = "URL"
sheet.cell(row=1, column=2).value = "APN/PIN"
sheet.column_dimensions["A"].width = 100
sheet.column_dimensions["B"].width = 20
for data in list_data:
sheet.append(list(data.values()))
workbook.save(location)
print("Spreadsheet saved!")
#--------------------------------------------------------------------------------------------