ajib6ept
10/19/2019 - 12:19 PM

intercept AJAX JSON response in Pyppeteer



import asyncio
from pyppeteer import launch


async def intercept_network_response(response):
    # In this example, we care only about responses returning JSONs
    if "application/json" in response.headers.get("content-type", ""):
        # Print some info about the responses
        print("URL:", response.url)
        print("Method:", response.request.method)
        print("Response headers:", response.headers)
        print("Request Headers:", response.request.headers)
        print("Response status:", response.status)
        # Print the content of the response
        try:
            # await response.json() returns the response as Python object
            print("Content: ", await response.json())
        except json.decoder.JSONDecodeError:
            # NOTE: Use await response.text() if you want to get raw response text
            print("Failed to decode JSON from", await response.text())


async def intercept_request(req):
    """Request Filtering"""

    if req.resourceType in ['image', 'media', 'eventsource', 'websocket']:
        await req.abort()
    elif 'jquery.min.js' in req.url:
        await req.abort()
    else:
        await req.continue_()


async def main():
    browser = await launch({'headless': False})
    page = await browser.newPage()
    await page.setRequestInterception(True)
    page.on('request', intercept_request)

    # page.on('response', intercept_network_response)

    await page.goto('http://books.toscrape.com/')
    input()


asyncio.get_event_loop().run_until_complete(main())