recognize url in html and extract the id part
#!/usr/bin/python
# -*- coding: utf-8 -*-
import re
p = re.compile('("http://hk\.centadata\.com/ccichart/estate_info\.aspx\?id=)([0-9]+)')
s = '<a href="http://hk.centadata.com/ccichart/estate_info.aspx?id=008600" target="_top"> sdfasdfasd <a href="http://hk.centadata.com/ccichart/estate_info.aspx?id=05600" target="_top">'
for match in re.findall(p,s):
print(match[1])