ajib6ept
3/22/2014 - 10:41 AM

находим бесплтаные public анонимные прокси с помощью python

находим бесплтаные public анонимные прокси с помощью python

# -*- coding: UTF-8 -*-

import logging, threading, Queue, traceback, re, random
from grab import Grab

TH = 10

class MyClass(threading.Thread):
    def __init__(self,queue):
        threading.Thread.__init__(self)
        self.queue = queue
        self.good_prox = []
    
    def run(self):
        while True:
            try: item = self.queue.get_nowait()
            except Queue.Empty: break
            try: self.worker(item)
            #except Exception, detail: traceback.print_exc()
            except: pass
            self.queue.task_done()
    
    def worker(self, prox):
        g = Grab()
        g.setup(proxy=prox, proxy_type='http')
        g.go('http://www.myip.ru/ru-RU/index.php')
        my_ip = g.doc.select('//tr/td[@bgcolor="white"]').text()
        if self.my_real_ip != my_ip:
            print 'good proxy => ', prox
            self.good_prox.append(prox)
            with open('file.txt', 'a') as f:
                f.write(prox+'\n')

def listmerge(lstlst):
    all=[]
    for lst in lstlst:
        all.extend(lst)
    return all       

def main():
    q = Queue.Queue()
    
    my_prox = []
    good_prox = []
    g = Grab()
    g.go('http://www.myip.ru/ru-RU/index.php')
    my_real_ip = g.doc.select('//tr/td[@bgcolor="white"]').text()

    g.go('http://fineproxy.org/')
    first_prox = re.findall('[0-9]{1,3}.[0-9]{1,3}.[0-9]{1,3}.[0-9]{1,3}:[0-9]{1,5}', g.response.body)
    first_prox = random.choice(first_prox)
    if first_prox.find(' ') != -1: first_prox = random.choice(first_prox)
    first_prox = first_prox.replace(':', '%3A')
    
    first_search = 'http://search.qip.ru/search?query=' + first_prox
    g.go(first_search)

    prox_urls = []
    for num in xrange(0, 11):        
        try:
            prox_url = g.doc.select('//article')[num].select('.//a').attr('href').encode('utf-8')
            print prox_url
            prox_urls.append(prox_url)
        except:
            pass

    for prox_url in prox_urls:
        try:
            g.go(prox_url)
            for prox in re.findall('[0-9]{1,3}.[0-9]{1,3}.[0-9]{1,3}.[0-9]{1,3}:[0-9]{1,5}', g.response.body):
                my_prox.append(prox)
        except:
            pass

    my_prox = list(set(my_prox))
    print len(my_prox)
   
    for prox in my_prox:
        q.put(prox)
    for i in xrange(TH):
        t = MyClass(q)
        t.my_real_ip = my_real_ip
        t.start()
        good_prox.append(t.good_prox)
    q.join()

    good_prox = listmerge(good_prox)
    good_prox = list(set(good_prox))

    print good_prox



if __name__ == '__main__':
    logging.basicConfig(level=logging.DEBUG)
    main()