ajib6ept
1/28/2019 - 6:04 PM

lxml example


import lxml.html

html = '''<html>
            <head>
                <title>MyTitle</title>
            </head>
            <body>
                <h1>Header</h1>
                <a href = "ya.ru">1</a>
                </body>
            </html>'''

doc = lxml.html.fromstring(html)
title = doc.xpath('//title/text()')
print(title)  # ['MyTitle']
title = doc.xpath('//title')[0].text
print(title)  # MyTitle

title_elem = doc.xpath('//title')[0]

print("title tag:", title_elem.tag)
print("title text:", title_elem.text_content())
print("title html:", lxml.html.tostring(title_elem))
print("title tag:", title_elem.tag)
print("title's parent's tag:", title_elem.getparent().tag)

href = doc.xpath('//a')[0]
print(href.attrib['href'])  # ya.ru