ajib6ept
10/19/2019 - 12:10 PM

scrapy_cache

HTTPCACHE_POLICY = 'mypars.middlewares.DummyPolicy'
class DummyPolicy(object):

    def __init__(self, settings):
        self.ignore_schemes = settings.getlist('HTTPCACHE_IGNORE_SCHEMES')
        self.ignore_http_codes = [
            int(x) for x in settings.getlist('HTTPCACHE_IGNORE_HTTP_CODES')]

    def should_cache_request(self, request):
        return urlparse_cached(request).scheme not in self.ignore_schemes

    def should_cache_response(self, response, request):
        # print(response.url)
        # print(dir(response))
        check_text = Selector(text=gunzip(response.body)
                              ).xpath('//h1/text()').get()
        if check_text == 'A Light in the Attic':
            return True
        return False
        return response.status not in self.ignore_http_codes

    def is_cached_response_fresh(self, cachedresponse, request):
        return True

    def is_cached_response_valid(self, cachedresponse, response, request):
        return True