mutegdp
10/15/2019 - 10:08 AM

Python Scraper

Python Scraper

#1 - find the next sibling data
apn = soup.find(string="APN").find_next("div").contents[0]
#--------------------------------------------------------------------------------------------
#2 - requests headers
headers = {
    "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) "
    "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36"
}
#--------------------------------------------------------------------------------------------
#3 - txt to python list
with open("urls.txt", "r") as f:
    urls = [line.strip() for line in f]
#--------------------------------------------------------------------------------------------
#4 - list to txt
with open('urls.txt', 'w') as f:
    for item in my_list:
        f.write("%s\n" % item)
#--------------------------------------------------------------------------------------------
#5 - list of dict into xlsx
import openpyxl
def export_xls(location, list_data):
    print("Saving the data into spreadsheets...")
    workbook = Workbook()
    sheet = workbook.active
    sheet.freeze_panes = "B2"
    # table column, based on the dict item
    sheet.cell(row=1, column=1).value = "URL"
    sheet.cell(row=1, column=2).value = "APN/PIN"
    sheet.column_dimensions["A"].width = 100
    sheet.column_dimensions["B"].width = 20

    for data in list_data:
        sheet.append(list(data.values()))

    workbook.save(location)
    print("Spreadsheet saved!")
#--------------------------------------------------------------------------------------------