#!/usr/bin/python
# -*- encoding:utf-8 -*-
import cookielib, urllib2, urllib
import re
def get_postFormID(opener):
temp = open('temp.txt', 'w+')
form_url = 'http://blog.renren.com/blog/0/addBlog'
lines = opener.open(form_url).readlines()
# it seems the order of value and id is random
pattern1 = r'.+value="(-?\d+)".+id="postFormId".+'
pattern2 = r'.+id="postFormId".+value="(-?\d+)".+'
for line in lines:
postFormId1 = re.findall(pattern1, line)
temp.write(line)
if postFormId1:
print postFormId1
return postFormId[0]
else:
postFormId2 = re.findall(pattern2, line)
if postFormId2:
print postFormId2
return postFormId2[0]
print "not found!"
return 0
#登陆校内网,取得cookie
def login(email, password):
cj = cookielib.CookieJar()
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
exheaders = [("User-Agent","Mozilla/4.0 (compatible; MSIE 7.1; Windows NT 5.1; SV1)"),]
opener.addheaders=exheaders
URL_LOGIN = 'http://www.renren.com/Login.do'
body = (('email',email), ('password',password))
req1 = opener.open(URL_LOGIN, urllib.urlencode(body)) #cookie在opener对象中,以后一直用opener来处理就不用登陆了
# print "req1",req1.info()
# for num,cookie in enumerate(cj): #打印cookies
# print "%d-----%s" % (num, cookie)
return opener
def save_total_page(opener, url, name="totalpage.html"):
fd = open(name, "w+")
html = opener.open(url).read()
fd.write(html)
def transcode(str):
return str.decode('mbcs').encode('utf8')
#IMPORTANT: 更改登录名和密码
email = 'xxxx@xxx.xxx'
password = 'xxxxx'
body = {'relative_optype':'publisher', 'blogControl':'99', 'categoryId':'0', 'editBlogControl':'99' ,'isVip':'false'}
url_post = 'http://blog.renren.com/NewEntry.do'
if __name__ == "__main__":
opener = login(email, password)
postFormId = get_postFormID(opener)
save_total_page(opener, 'http://blog.renren.com/', name='blog_home_before.html')
body['postFormId'] = postFormId #十分重要,与用户每次登陆的session绑定,是个随机值
body['title'] = transcode('标题是也')
body['body'] = transcode('正文是也,啊哈哈哈哈哈哈~')
# print body
req2 = opener.open(url_post, urllib.urlencode(body)) #提交POST请求,发表文章
save_total_page(opener, 'http://blog.renren.com/', name='blog_home_after.html')