python snippets

#!/usr/bin/env python
# -*- coding: utf-8 -*-
#author:         rex
#blog:           http://iregex.org

import re

from os.path import dirname as dirname
from os.path import join as pathjoin

def forceUTF8():
    import sys
    default_encoding = 'utf-8'

    if sys.getdefaultencoding() != default_encoding:

def getPath(sufix=""):
    '''get absolute path of the current dir'''
    path = dirname(__file__)
        index = path.index("..")
        if index != -1:
            path = path[:index]
    return pathjoin(path, sufix).replace('\\', '/')

from hashlib import md5
def md5ize(s):
    '''return the md5 encoding of a string'''
    return md5(s).hexdigest()

#integer file size to stringG

def format_size(size):
    if size < 1024:
        return "%s Byte" % size
    elif size >= 1024 and size < 1024 * 1024:
        size /= 1024
        return "%s Kb" % size
    elif size >= 1024 * 1024 and size < 1024 * 1024 * 1024:
        size /= (1024 * 1024)
        return "%s Mb" % size

#get parameter from url:

def get_V(p, regex=None):
    """return cgi GET parameter; strip white spaces at both ends if any;
    if verify pattern provided, do match test; only return matched values.
    Note: it uses re.match to check , not re.search.
    import cgi
    form = cgi.FieldStorage()
    value = form.getfirst(p)

    if not value:
        return None
    value = value.strip()

    if regex is not None:
        if re.match(regex + "$", value):
            return value
            return None
        return value

def stringio():
    buffer = StringIO.StringIO()
    c.setopt(c.WRITEFUNCTION, buffer.write)
    value = buffer.getvalue()

def Do(cmdstr):
    from commands import getoutput
        return getoutput(cmdstr)
    except Exception, e:
        return str(e)

import urllib

def getHeader(url):
    '''get header information of a url'''
    remotefile = urllib.urlopen(url)
    return remotefile.headers.dict

def getRemoteFileLength(url, unit='k'):
    '''get length of an remote file, without downloading that file.'''
    remotefile = urllib.urlopen(url)
    unit = unit.upper()
    units = {
        'B': 1,
        'K': 1024,
        'M': 1024 * 1024,
        'G': 1024 * 1024 * 1024,

        length = remotefile.headers.dict['content-length']
        print 'no length infor. loading complete file to caclulate length'
        length = len(remotefile.read())

    reallen = float(float(length) / units[unit])

    formatedLength = "%.2f%s" % (reallen, unit)
    return formatedLength

#escaping html
html_escape_table = {
    "&": "&amp;",
    '"': "&quot;",
    "'": "&apos;",
    ">": "&gt;",
    "<": "&lt;",

def html_escape(text):
    """Produce entities within text."""
    return "".join(html_escape_table.get(c, c) for c in text)

from HTMLParser import HTMLParser

def unescape(s):
    s = HTMLParser.unescape.__func__(HTMLParser, s)
    s = HTMLParser.unescape.__func__(HTMLParser, s)
    return s

def err(msg):
    import sys
    print >> sys.stderr, str(msg)

def getkey():
    "get key press without Enter"

    import termios, sys, os

    fd = sys.stdin.fileno()
    old = termios.tcgetattr(fd)
    new = termios.tcgetattr(fd)
    new[3] = new[3] & ~TERMIOS.ICANON & ~TERMIOS.ECHO
    new[6][TERMIOS.VMIN] = 1
    new[6][TERMIOS.VTIME] = 0
    termios.tcsetattr(fd, TERMIOS.TCSANOW, new)
    c = None
            c = os.read(fd, 1)
            termios.tcsetattr(fd, TERMIOS.TCSAFLUSH, old)
    return c