ChakshuGautam
11/17/2018 - 10:08 AM

Download IMDB files and save them in a directory.

Download IMDB files and save them in a directory.

import asyncio
from contextlib import closing
import aiohttp
import aiofiles

async def download_file(session: aiohttp.ClientSession, url: str):
    async with session.get(url) as response:
        assert response.status == 200
        print("Started saving file: ", url.split('/')[-1])
        f = await aiofiles.open(url.split('/')[-1], mode='wb')
        await f.write(await response.read())
        await f.close()
        return url.split('/')[-1]


async def download_multiple(session: aiohttp.ClientSession):
    urls = [
            'https://datasets.imdbws.com/name.basics.tsv.gz',
            'https://datasets.imdbws.com/title.akas.tsv.gz',
            'https://datasets.imdbws.com/title.basics.tsv.gz',
            'https://datasets.imdbws.com/title.crew.tsv.gz',
            'https://datasets.imdbws.com/title.episode.tsv.gz',
            'https://datasets.imdbws.com/title.principals.tsv.gz',
            'https://datasets.imdbws.com/title.ratings.tsv.gz'
    ]
    download_futures = [download_file(session, url) for url in urls]
    print('Results')
    for download_future in asyncio.as_completed(download_futures):
        result = await download_future
        print('finished saving:', result)
    return urls


async def main():
    async with aiohttp.ClientSession() as session:
        result = await download_multiple(session)
        print('finished:', result)


loop = asyncio.get_event_loop()
loop.run_until_complete(main())