#!/usr/bin/env python
# -*- coding: utf-8 -*-
import sys, requests, codecs
from bs4 import BeautifulSoup
def scraping(url):
response = requests.get(url)
html = response.text.encode(response.encoding)
soup = BeautifulSoup(html, 'lxml')
header = soup.find('head')
title = header.find('title').text
description = header.find('meta', attrs={'name': 'description'})
description_content = description.attrs['content']
keywords = header.find('meta', attrs={'name': 'keywords'})
keywords_content = keywords.attrs['content']
return [title, description_content, keywords_content]
if __name__ == '__main__':
urls = [x.strip() for x in open('url_list.txt')]
array = map(scraping, urls)
array_line = map("\t".join, array)
lines = "\n".join(array_line)
f = codecs.open('seo_text_out.txt', 'w', 'utf-8')
f.write(lines)
f.close()