joetyson
7/18/2011 - 7:31 AM

Super basic protobuf string encode/decode proof of concept

Super basic protobuf string encode/decode proof of concept

def varint32(v):
  """This works by taking an integer value and storing the two's 
   complement of the integer in groups of 7 bits"""
  result = array.array('B')
  # Check if the first bit is 0, if it is, there are no more groups.
  if v & 127 == v:
    result.append(v)
    return result
  while True:
    bits = v & 127
    v >>= 7 # next 7 bits
    if v:
      # set the first bit (msb) to 1, meaning 
      # there is a group after this.
      bits |= 128
    result.append(bits)
    if not v:
      break
  return result

def get_varint32(arr):
  b = arr[0]
  idx = 1
  if not (b & 128):
    return idx, b
  result = long(0)
  shift = 0
  while 1:
    result |= (long(b & 127) << shift)
    # last 7 bits are two's complement representation
    shift += 7
    # msb set?
    if not (b & 128):
      # nope!
      break
    # yup, next up!
    b = b[idx]
    idx += 1
  return (idx, result)


class FieldType:
  VARINT = 0
  FIXED64 = 1
  STRING = 2
  FIXED32 = 5


def encode_string(tag, v):
  arr = array.array('B')
  wire_tag = (tag << 3) | FieldType.STRING
  arr.append(wire_tag)
  arr.extend(varint32(len(v)))
  arr.fromstring(v)
  return arr.tostring()

def decode_string(encoded):
  arr = array.array('B')
  arr.fromstring(encoded)
  idx, encoded_tag = get_varint32(arr)
  tag = encoded_tag >> 3
  wire_type = encoded_tag & 7
  assert wire_type == FieldType.STRING, 'Oh noes, not a string. wtf'
  # Next up will be a varint32 of string length
  pos, len = get_varint32(arr[idx:])
  return (tag, arr[pos+idx:pos+idx+len].tostring())