LiamBao
6/21/2017 - 6:52 AM

normal.py

normal.py

rownodes=data.find_all('div',id=re.compile('post_(\d+)'))

content = node[0].xpath('string(.)').strip()

node=rownode.xpath('.//p[@class="post-timestamp"]/a[contains(@name,"floor_(\d+)")]')

floor=re.search("write\('(.*)'\)",floor).group(1)

    node = rownode.xpath('.//div[@class="post_msg replyBody"]/div//text() | .//div[@class="post_msg replyBody"]/font//text()')


def parse_item(self, response):
        self.log('Hi, this is an item page! %s' % response.url)

        item = scrapy.Item()
        item['id'] = response.xpath('//td[@id="item_id"]/text()').re(r'ID: (\d+)')
        item['name'] = response.xpath('//td[@id="item_name"]/text()').extract()
        item['description'] = response.xpath('//td[@id="item_description"]/text()').extract()
        return item


def parsePosterName(rownode):
    node=rownode.select('div.authi a.xw1')
    if len(node)==0:
        node=rownode.select('td.pls div.pi')
    if len(node)==0:
        raise NameError('Can not parse PosterName!')
    if(re.search('(.*)该用户已被删除',node[0].get_text())):
        tmp=re.search('(.*)该用户已被删除',node[0].get_text()).group(1)
        return tmp
    else:
        return node[0].string


def getExcel(data):
    title=['siteid','subject','content','dateOfPost','floor','posterName','posterURL','posterID','threadURL','isTopicPost','pageNum']
    
    filename=str(theSiteid)+'_'+str(time.strftime('%Y-%m-%d %H-%M-%S',time.localtime()))
    workbook = wx.Workbook(filename+'.xlsx')
    worksheet = workbook.add_worksheet()

    for i in range(len(data)):
        for j in range(len(title)):
            if i==0:
                worksheet.write(i, j, title[j])
            worksheet.write(i+1, j, data[i][j])
    workbook.close()




 文件夹框打开方式
import win32ui
 
clr = Color()
clr.print_green_text('Enter to Open File')
dlg = win32ui.CreateFileDialog(1) # 表示打开文件对话框
dlg.SetOFNInitialDir('C:/') # 设置打开文件对话框中的初始显示目录
dlg.DoModal()
filename = dlg.GetPathName()
clr.print_green_text('Open File or directory: '+filename)
# f = open(os.getcwd()+r'/indexCrawl.txt','rb')
if filename is None or filename == '':
   sys.exit(0)
f = open(filename,'rb')
mainlists = [i for i in f.readlines()]
f.close()


线程使用
#coding:utf-8
import threading
import time

        list_thread=[]
##        realthreadnum=threadnum
        threadnum=math.ceil(len(mainlists)/10)
        for i in range(0,len(mainlists),threadnum):
           list_thread.append(mainlists[i:i+threadnum])

        threads = []
        for i in list_thread:
           threads.append(threading.Thread(target=threadMain,args=(i,)))

        print('============== start in threading ==============')
        for t in threads:
           t.setDaemon(True)
           t.start()

        for t in threads:
          t.join()  




def action(arg):
    time.sleep(1)
    print  'sub thread start!the thread name is:%s    ' % threading.currentThread().getName()
    print 'the arg is:%s   ' %arg
    time.sleep(1)

thread_list = []    #线程存放列表
for i in xrange(4):
    t =threading.Thread(target=action,args=(i,))
    t.setDaemon(True)
    thread_list.append(t)

for t in thread_list:
    t.start()

for t in thread_list:
    t.join()







excel读取

    wr = xlrd.open_workbook(filename)
    wr = wr.sheet_by_name('Thread')
    wr = wr.col_values(3)
    print("ALL "+str(len(wr))+" threads!")




excel 插入数据

import xlwt
import xlrd
from xlutils.copy import copy
 
styleBoldRed   = xlwt.easyxf('font: color-index red, bold on')
headerStyle = styleBoldRed
wb = xlwt.Workbook()
ws = wb.add_sheet(gConst['xls']['sheetName'])
ws.write(0, 0, "Header",        headerStyle)
ws.write(0, 1, "CatalogNumber", headerStyle)
ws.write(0, 2, "PartNumber",    headerStyle)
wb.save(gConst['xls']['fileName'])
 
#open existed xls file
#newWb = xlutils.copy(gConst['xls']['fileName'])
#newWb = copy(gConst['xls']['fileName'])
oldWb = xlrd.open_workbook(gConst['xls']['fileName'], formatting_info=True)
print oldWb #<xlrd.book.Book object at 0x000000000315C940>
newWb = copy(oldWb)
print newWb #<xlwt.Workbook.Workbook object at 0x000000000315F470>
newWs = newWb.get_sheet(0)
newWs.write(1, 0, "value1")
newWs.write(1, 1, "value2")
newWs.write(1, 2, "value3")
print "write new values ok"
newWb.save(gConst['xls']['fileName'])
print ("save with same name ok")



MYSQL操作

class SelectMySQL(object):
    def select_data(self,sql):
        result = []
        try:
            conn = MySQLdb.connect(host=host,
                                port=port,
                                user=user,
                                passwd=passwd,
                                db=db,
                                charset='utf8', )
            cur = conn.cursor()
            cur.execute(sql)
            alldata = cur.fetchall()
            for rec in alldata:
               result.append(rec[0]) 
        except Exception as e:
             print('Error msg: ' + e)
        finally:
             cur.close()
             conn.close()
 
        return result

    def get_result(self, sql, filename):
        print(sql)
        results = self.select_data(sql)
        print('The amount of datas: %d' % (len(results)))
        with open(filename, 'w') as f:
            for result in results:
                f.write(str(result) + '\n')
        print('Data write is over!')
        return results
 



tutorial

>>> # flatten a list using a listcomp with two 'for'
>>> vec = [[1,2,3], [4,5,6], [7,8,9]]
>>> [num for elem in vec for num in elem]
[1, 2, 3, 4, 5, 6, 7, 8, 9]

------
>>> from math import pi
>>> [str(round(pi, i)) for i in range(1, 6)]
['3.1', '3.14', '3.142', '3.1416', '3.14159']

------
matrix = [
 [1, 2, 3, 4],
 [5, 6, 7, 8],
 [9, 10, 11, 12],
]

print(len(matrix[0]))
print([[row[i] for row in matrix] for i in range(len(matrix[0]))])

>>4
>>[[1, 5, 9], [2, 6, 10], [3, 7, 11], [4, 8, 12]]





继承顺序问题
class Init(object):
    def __init__(self, value):
        self.val = value


class Add2(Init):
    def __init__(self, val):
        super(Add2, self).__init__(val)
        self.val += 2


class Mul5(Init):
    def __init__(self, val):
        super(Mul5, self).__init__(val)
        self.val *= 5


class Pro(Mul5, Add2):
    pass


class Incr(Pro):
    csup = super(Pro)

    def __init__(self, val):
        self.csup.__init__(val)
        self.val += 1


p = Incr(5)
print(p.val)