guneysus
3/12/2016 - 11:55 PM

Compare socket, urllib, urllib2, urllib3 and requests libs getting last redirected URL times

Compare socket, urllib, urllib2, urllib3 and requests libs getting last redirected URL times

#!/usr/bin/env python
# coding:utf-8
import multiprocessing
import requests
import urllib
import urllib2
import urllib3
from functools import wraps
from threading import Thread
from time import time, sleep


def timing(f, N=3):
    @wraps(f)
    def wrap(*args, **kw):
        times = []
        total = 0.0
        result = None
        for _ in xrange(N):
            ts = time()
            result = f(*args, **kw)
            te = time()
            times.append(te - ts)
        avg_time = sum(times) / N

        print 'func:%r args:[%r, %r]  Avg %2.2f sec | Min: %2.2f | Max: %2.2f ' % \
              (f.__name__, args, kw, avg_time, min(times), max(times))
        sleep(0.5)
        return result

    return wrap


@timing
def get_urllib(url):
    return urllib.urlopen(url).url


@timing
def get_urllib2(url):
    try:
        return urllib2.urlopen(url)
    except urllib2.HTTPError, e:
        return e.fp.geturl()


@timing
def get_urllib3(url):
    http = urllib3.PoolManager()
    r = http.urlopen('HEAD', url, preload_content=False, redirect=0)
    return r.headers['location']


@timing
def get_by_curl():
    """
    curl -Ls -o /dev/null -w %{url_effective}  https://t.co/1c1aqGnBA8
    :return:
    """
    raise NotImplemented()


@timing
def get_requests(url):
    return requests.request('HEAD', url, allow_redirects=False).headers['location']


@timing
def get_by_socket(url, method='HEAD'):
    import urlparse
    import socket
    s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    parsed_url = urlparse.urlparse(url)
    s.connect((parsed_url.netloc, 80))
    s.sendall("%s %s://%s%s HTTP/1.1\n\n" % (method, parsed_url.scheme, parsed_url.netloc, parsed_url.path))
    data = s.recvfrom(512)
    return data[0].split('location: ')[1].splitlines()[0]


if __name__ == '__main__':
    from multiprocessing import Process
    url = "https://t.co/1c1aqGnBA8"
    # t = [
    #     Process(target=get_by_socket, args=[url]),
    #     Process(target=get_urllib, args=[url]),
    #     Process(target=get_urllib2, args=[url]),
    #     Process(target=get_urllib3, args=[url]),
    #     Process(target=get_requests, args=[url]),
    # ]
    # map(Process.start, t)
    # map(Process.join, t)
    get_by_socket(url, method='HEAD')
    get_urllib(url)
    get_urllib2(url)
    get_urllib3(url)
    get_requests(url)
/usr/bin/python2.7 /home/ahmed/PycharmProjects/socket/main.py
func:'get_by_socket' args:[('https://t.co/1c1aqGnBA8',), {'method': 'HEAD'}]  Avg 2.0 sec | Min: 1.7 | Max: 2.3 
func:'get_urllib' args:[('https://t.co/1c1aqGnBA8',), {}]  Avg 1.6 sec | Min: 1.2 | Max: 2.8 
func:'get_urllib2' args:[('https://t.co/1c1aqGnBA8',), {}]  Avg 1.3 sec | Min: 1.2 | Max: 1.4 
func:'get_urllib3' args:[('https://t.co/1c1aqGnBA8',), {}]  Avg 0.9 sec | Min: 0.8 | Max: 0.9 
func:'get_requests' args:[('https://t.co/1c1aqGnBA8',), {}]  Avg 0.9 sec | Min: 0.9 | Max: 0.9 


/usr/bin/python2.7 /home/ahmed/PycharmProjects/socket/main.py
func:'get_by_socket' args:[('https://t.co/1c1aqGnBA8',), {'method': 'HEAD'}]  Avg 0.4 sec | Min: 0.4 | Max: 0.5 
func:'get_urllib' args:[('https://t.co/1c1aqGnBA8',), {}]  Avg 1.3 sec | Min: 1.2 | Max: 1.4 
func:'get_urllib2' args:[('https://t.co/1c1aqGnBA8',), {}]  Avg 1.3 sec | Min: 1.2 | Max: 1.6 
func:'get_urllib3' args:[('https://t.co/1c1aqGnBA8',), {}]  Avg 0.9 sec | Min: 0.8 | Max: 1.2 
func:'get_requests' args:[('https://t.co/1c1aqGnBA8',), {}]  Avg 0.9 sec | Min: 0.8 | Max: 1.1

/usr/bin/python2.7 /home/ahmed/PycharmProjects/socket/main.py
func:'get_by_socket' args:[('https://t.co/1c1aqGnBA8',), {'method': 'HEAD'}]  Avg 0.45 sec | Min: 0.41 | Max: 0.61 
func:'get_urllib' args:[('https://t.co/1c1aqGnBA8',), {}]  Avg 1.35 sec | Min: 1.25 | Max: 1.48 
func:'get_urllib2' args:[('https://t.co/1c1aqGnBA8',), {}]  Avg 1.38 sec | Min: 1.20 | Max: 1.66 
func:'get_urllib3' args:[('https://t.co/1c1aqGnBA8',), {}]  Avg 0.86 sec | Min: 0.80 | Max: 1.00 
func:'get_requests' args:[('https://t.co/1c1aqGnBA8',), {}]  Avg 0.88 sec | Min: 0.83 | Max: 1.00