ajib6ept
10/19/2019 - 6:39 PM

pypetter thread

import asyncio
import time

from pyppeteer import launch
from urllib.parse import urlparse

WEBSITE_LIST = [
    'http://www.python.org',
    'http://www.python.org/about/',
    'http://www.onlamp.com/pub/a/python/2003/04/17/metaclasses.html',
    'http://www.python.org/doc/',
    'http://www.python.org/download/',
    'http://www.python.org/getit/',
    'http://www.python.org/community/',
    'https://wiki.python.org/moin/',
    'http://planet.python.org/',
    'https://wiki.python.org/moin/LocalUserGroups',
    'http://www.python.org/psf/',
    'http://docs.python.org/devguide/',
    'http://www.python.org/community/awards/'
]

start = time.time()


async def fetch(url):
    browser = await launch(headless=False, args=['--no-sandbox'])
    page = await browser.newPage()
    await page.goto(f'{url}', {'waitUntil': 'load'})
    # await page.screenshot({'path': f'img/{urlparse(url)[1]}.png'})
    await browser.close()


async def fetch_with_q():
    while WEBSITE_LIST:
        url = WEBSITE_LIST.pop()
        browser = await launch(headless=False, args=['--no-sandbox'])
        page = await browser.newPage()
        await page.goto(f'{url}', {'waitUntil': 'load'})
        # await page.screenshot({'path': f'img/{urlparse(url)[1]}.png'})
        await browser.close()
        print(url)
        print(WEBSITE_LIST)


async def run():
    tasks = []

    for _ in range(3):
        task = asyncio.ensure_future(fetch_with_q())
        tasks.append(task)


    responses = await asyncio.gather(*tasks)


asyncio.get_event_loop().run_until_complete(
    asyncio.ensure_future(run()))


print(f'It took {time.time()-start} seconds.')