#!/usr/bin/env python
from BeautifulSoup import BeautifulStoneSoup
import sys, urllib2, re, pprint, codecs
import inspect, doctest
from pprint import pprint
sys.stdout = codecs.getwriter('utf-8')(sys.stdout)
def main():
file = './list.txt'
for line in open(file, 'r'):
if line:
items = line.split('\t')
id = items[0]
print_date(id)
else:
break
def print_date(id):
url = 'http://b.hatena.ne.jp/' + id + '/rss'
try:
get_xml = urllib2.urlopen(url).read()
except:
get_xml = ''
soup = BeautifulStoneSoup(get_xml, convertEntities=BeautifulStoneSoup.XML_ENTITIES)
reg = re.compile(ur"""<opensearch:totalResults>(.*?)</opensearch:totalResults>""")
reg.search(str(get_xml))
try:
s = reg.search(get_xml)
total = s.group(1)
title = soup.channel('title')[0].string
date = soup('dc:date')
# import prettyPrint;prettyPrint.dumpObj(total)
# pprint(inspect.getmembers(total),indent=2)
# print id + '::' + '::' + total + '::' + date[0].string
print id + "\t" + title + "\t" + total + "\t" + date[0].string
except:
pass
if __name__ == '__main__':
main()