falkov
10/1/2017 - 5:53 AM

nav_2018

#!/usr/bin/env python3

import os
import sys
import time
import requests
import lxml.html as lxhtml
import logging
import openpyxl

logging.basicConfig(format="%(asctime)s  %(filename)s:%(lineno)d  %(message)s",
                    datefmt="%Y-%m-%d  %H:%M.%S", level=logging.DEBUG,
                    filename=f'{os.path.dirname(__file__)}/nav_2018.log')


def read_data_from_site():
    url = "https://2018.navalny.com"
    common_path = ".branches-map__statistic--desktop .branches-map__value"

    page = requests.get(url)
    parser = lxhtml.fromstring(page.text)

    span = []

    for elem in parser.cssselect(common_path):
        span += elem.cssselect("span")

    podpisi = span[0].text
    volonteers = span[1].text
    headquoters = span[2].text

    podpisi = "".join(podpisi.split())
    volonteers = "".join(volonteers.split())
    headquoters = "".join(headquoters.split())

    logging.info(f' read: {podpisi}, {volonteers}, {headquoters}')
    return podpisi, volonteers, headquoters


def write_data_to_xls(sheet_name, podpisi, volonteers, headquoters):
    path = f'{os.path.dirname(__file__)}/xls/nav_2018.xlsx'

    if not os.path.exists(path):
        logging.info(f'No file: "{path}"')
        exit(1)

    try:
        book = openpyxl.load_workbook(filename=path)
    except Exception as e:
        logging.info(f'Error in "book = openpyxl.load_workbook(filename=path)" - {e}')
        exit(1)

    sheet_names = book.get_sheet_names()

    if sheet_name not in sheet_names:
        logging.info(f'No sheet "{sheet_name}" in "{sheet_names}"')
        exit(1)

    sheet = book.get_sheet_by_name(sheet_name)

    for row in range(2, 11000):
        if sheet["B" + str(row)].value is None:
            break
        else:
            pass

    sheet["A" + str(row)].value = time.strftime("%d/%m/%Y %H:%M")
    sheet["B" + str(row)].value = int(podpisi)
    sheet["E" + str(row)].value = int(volonteers)
    sheet["H" + str(row)].value = int(headquoters)

    book.save(path)
    book.close()

    logging.info(f'write: {podpisi}, {volonteers}, {headquoters} in row {row}')


def main():
    logging.info('--------------------')

    if len(sys.argv) > 1:
        podpisi, volonteers, headquoters = read_data_from_site()

        if sys.argv[1] == "hour":
            write_data_to_xls("hour", podpisi, volonteers, headquoters)
        elif sys.argv[1] == "day":
            write_data_to_xls("day", podpisi, volonteers, headquoters)


if __name__ == "__main__":
    main()