guneysus
9/10/2016 - 8:27 PM

python low (socket) level http request experiments

python low (socket) level http request experiments

# Created by .ignore support plugin (hsz.mobi)
### Python template
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
env/
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
*.egg-info/
.installed.cfg
*.egg

# PyInstaller
#  Usually these files are written by a python script from a template
#  before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*,cover

# Translations
*.mo
*.pot

# Django stuff:
*.log

# Sphinx documentation
docs/_build/

# PyBuilder
target/
### JetBrains template
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio

*.iml

## Directory-based project format:
.idea/
# if you remove the above rule, at least ignore the following:

# User-specific stuff:
# .idea/workspace.xml
# .idea/tasks.xml
# .idea/dictionaries

# Sensitive or high-churn files:
# .idea/dataSources.ids
# .idea/dataSources.xml
# .idea/sqlDataSources.xml
# .idea/dynamic.xml
# .idea/uiDesigner.xml

# Gradle:
# .idea/gradle.xml
# .idea/libraries

# Mongo Explorer plugin:
# .idea/mongoSettings.xml

## File-based project format:
*.ipr
*.iws

## Plugin-specific files:

# IntelliJ
/out/

# mpeltonen/sbt-idea plugin
.idea_modules/

# JIRA plugin
atlassian-ide-plugin.xml

# Crashlytics plugin (for Android Studio and IntelliJ)
com_crashlytics_export_strings.xml
crashlytics.properties
crashlytics-build.properties

python low (socket) level http request experiments

#!/usr/bin/env python
# coding:utf-8
import urllib
import urllib2
import urllib3
from functools import wraps
from time import time, sleep

import requests


def timing(f, N=3):
    @wraps(f)
    def wrap(*args, **kw):
        times = []
        total = 0.0
        result = None
        for _ in xrange(N):
            ts = time()
            result = f(*args, **kw)
            te = time()
            times.append(te - ts)
        avg_time = sum(times) / N

        print 'func:%r args:[%r, %r]  Avg %2.2f sec | Min: %2.2f | Max: %2.2f ' % \
              (f.__name__, args, kw, avg_time, min(times), max(times))
        sleep(0.5)
        return result

    return wrap


@timing
def get_urllib(url):
    return urllib.urlopen(url).url


@timing
def get_urllib2(url):
    try:
        return urllib2.urlopen(url)
    except urllib2.HTTPError, e:
        return e.fp.geturl()


@timing
def get_urllib3(url):
    http = urllib3.PoolManager()
    r = http.urlopen('HEAD', url, preload_content=False, redirect=0)
    return r.headers['location']


@timing
def get_by_curl():
    """
    curl -Ls -o /dev/null -w %{url_effective}  https://t.co/1c1aqGnBA8
    :return:
    """
    raise NotImplemented()


@timing
def get_requests(url):
    return requests.request('HEAD', url, allow_redirects=False).headers['location']


@timing
def get_by_socket(url, method='HEAD'):
    import urlparse
    import socket
    s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    parsed_url = urlparse.urlparse(url)
    s.connect((parsed_url.netloc, 80))
    s.sendall("%s %s://%s%s HTTP/1.1\n\n" % (method, parsed_url.scheme, parsed_url.netloc, parsed_url.path))
    data = s.recvfrom(512)
    return data[0].split('location: ')[1].splitlines()[0]


if __name__ == '__main__':
    url = "https://t.co/1c1aqGnBA8"
    get_by_socket(url, method='HEAD')
    get_urllib(url)
    get_urllib2(url)
    get_urllib3(url)
    get_requests(url)