baobao
11/4/2017 - 8:19 AM

beautifulsoup0.py

from bs4 import BeautifulSoup

html = """
<html><body>
    <h1><a href="http://www.shibuya24.info/">渋谷ほととぎす通信</a></h1>
    <p>ブログ更新が</p>
    <p>とてもとても</p>
    <div id="piyo">滞っています</div>
<ul id="test">
    <li><a href="http://www.shibuya24.info/archive/category/Unity">Unity</a></li>
    <li><a href="http://www.shibuya24.info/archive/category/DOTween">DOTween</a></li>
</ul>
<p class="hoge">でも頑張ります</p>
</body></html>
"""
# 第2引数でパーサーの種類を指定する
soup = BeautifulSoup(html, 'html.parser')
h1 = soup.html.body.h1

# 1つ目のp要素取得
p1 = soup.html.p
# 2つ目のp要素取得
p2 = p1.next_sibling.next_sibling

# id="piyo"を取得
idPiyo = soup.find(id="piyo")
idPiyo2 = soup.select("#piyo")

# class要素を取得
classHoge = soup.find(class_="hoge")
classHoge2 = soup.select(".hoge")

# 全a情報を取得する
linkGroup = soup.find_all("a")

# DOM構造で取得する
listGroup = soup.select("ul#test > li")

print("h1:", h1.string)
print("p1:", p1.string)
print("p2:", p2.string)
print("idPiyo", idPiyo.string)
for piyo in idPiyo:
    print("piyo", piyo.string)
print("classHoge", classHoge.string)
for hoge in classHoge2:
    print("classHoge", hoge.string)
for link in linkGroup:
    href = link.attrs['href']
    print("URL", href)

for list in listGroup:
    print(list.string)