morristech
5/21/2019 - 6:03 AM

recognize url in html and extract the id part

recognize url in html and extract the id part

#!/usr/bin/python
# -*- coding: utf-8 -*-
import re

p = re.compile('("http://hk\.centadata\.com/ccichart/estate_info\.aspx\?id=)([0-9]+)')
s = '<a href="http://hk.centadata.com/ccichart/estate_info.aspx?id=008600" target="_top"> sdfasdfasd <a href="http://hk.centadata.com/ccichart/estate_info.aspx?id=05600" target="_top">'

for match in re.findall(p,s):
    print(match[1])