xx-li
6/16/2018 - 2:03 AM

Scrapy异步写入数据库

Scrapy中使用pipline异步保存数据到MySQL数据库

from twisted.enterprise import adbapi

import MySQLdb
import MySQLdb.cursors


class MysqlTwistedPipline(object):
    # 异步写入数据库
    def __init__(self, dbpool):
        self.dbpool = dbpool

    @classmethod
    def from_settings(cls, setting):
        dbparms = dict(
            host = setting["MYSQL_HOST"],
            db = setting["MYSQL_DBNAME"],
            user = setting["MYSQL_USER"],
            passwd = setting["MYSQL_PASSWD"],
            charset = 'utf8',
            cursorclass = MySQLdb.cursors.DictCursor,
            use_unicode = True,
        )

        dbpool = adbapi.ConnectionPool("MySQLdb", **dbparms)
        return cls(dbpool)

    def process_item(self, item, spider):
        # 使用twisted将mysql插入变成异步执行
        query = self.dbpool.runInteraction(self.do_insert, item)
        query.addErrback(self.handle_error, item, spider)

    def handle_error(self, failure, item , spider):
        # 处理异步插入的异常
        print(failure)

    def do_insert(self, cursor, item):
        # 执行具体的插入
        insert_sql = """
                    insert into jobbole_article(title, url ,create_date, fav_nums)
                    VALUES (%s, %s, %s, %s)
                """
        cursor.execute(insert_sql, (item["title"], item["url"], item["create_date"], item["fav_nums"]))