checkaayush
2/20/2017 - 2:10 PM

Exports MySQL tables to .csv

Exports MySQL tables to .csv

"""Exports MySQL tables to CSV"""

import os

import pymysql
import pandas as pd


HOST = 'database_host_here'
DATABASE = 'database_name_here'
USER = 'mysql_username_here'
PASSWORD = 'your_password_here'
DESTINATION = 'destination_directory_here'


def ensure_dirs(path):
    if not os.path.exists(path):
        os.makedirs(path)


def table_names_query():
    return """SELECT TABLE_NAME FROM INFORMATION_SCHEMA.TABLES
              WHERE TABLE_SCHEMA = '{}'""".format(DATABASE)


def column_names_query(table_name):
    return """SELECT COLUMN_NAME FROM INFORMATION_SCHEMA.COLUMNS
              WHERE TABLE_SCHEMA = '{0}'
              AND TABLE_NAME = '{1}'""".format(DATABASE, table_name)


def column_names(cursor, table_name):
    cursor.execute(column_names_query(table_name))
    return [tup[0] for tup in list(cursor.fetchall())]


def table_names(cursor):
    cursor.execute(table_names_query())
    return [tup[0] for tup in list(cursor.fetchall())]


def chunks_to_csv(chunks, filepath):
    header = True
    for chunk in chunks:
        chunk.to_csv(filepath, mode='a', header=header,
                     index=False, encoding='utf-8')
        header = False


def main():
    ensure_dirs(DESTINATION)
    conn = pymysql.connect(host=HOST, user=USER, passwd=PASSWORD, db=DATABASE)
    cursor = conn.cursor()
    query = 'SELECT * FROM {table_name}'
    for table_name in table_names(cursor):
        chunks = pd.read_sql(query.format(table_name=table_name), conn,
                             columns=column_names(cursor, table_name),
                             chunksize=1000)
        filepath = os.path.join(DESTINATION, ''.join([table_name, '.csv']))
        if not os.path.exists(filepath):
            chunks_to_csv(chunks, filepath)
    cursor.close()
    conn.close()


if __name__ == "__main__":
    main()