Scrapy中使用pipline异步保存数据到MySQL数据库
from twisted.enterprise import adbapi
import MySQLdb
import MySQLdb.cursors
class MysqlTwistedPipline(object):
# 异步写入数据库
def __init__(self, dbpool):
self.dbpool = dbpool
@classmethod
def from_settings(cls, setting):
dbparms = dict(
host = setting["MYSQL_HOST"],
db = setting["MYSQL_DBNAME"],
user = setting["MYSQL_USER"],
passwd = setting["MYSQL_PASSWD"],
charset = 'utf8',
cursorclass = MySQLdb.cursors.DictCursor,
use_unicode = True,
)
dbpool = adbapi.ConnectionPool("MySQLdb", **dbparms)
return cls(dbpool)
def process_item(self, item, spider):
# 使用twisted将mysql插入变成异步执行
query = self.dbpool.runInteraction(self.do_insert, item)
query.addErrback(self.handle_error, item, spider)
def handle_error(self, failure, item , spider):
# 处理异步插入的异常
print(failure)
def do_insert(self, cursor, item):
# 执行具体的插入
insert_sql = """
insert into jobbole_article(title, url ,create_date, fav_nums)
VALUES (%s, %s, %s, %s)
"""
cursor.execute(insert_sql, (item["title"], item["url"], item["create_date"], item["fav_nums"]))