渋谷区図書館からいまあなたが借りている本のタイトルを取り出します
# coding: utf-8
require 'mechanize'
require 'optparse'
params = ARGV.getopts('', 'rid:', 'password:')
results = []
client = Mechanize.new
client.follow_meta_refresh = true
client.redirect_ok = true
client.user_agent_alias = 'Mac Safari'
url = 'https://www.taitocity.net/taito-opac/index.jsp'
page = client.get(url)
# p page.title
form = page.form_with(name: "LOGIN")
redirect_page = nil;
client.page.form_with(:name => 'LOGIN'){|form|
# name が q な入力欄を探す
form.field_with(:name => 'USERID'){|field|
field.value = params['rid']
}
form.field_with(name: 'PASSWORD') {|field|
field.value = params['password']
}
# フォームに submit ボタンがあれば「押」して送信
redirect_page = form.click_button
}
# p redirect_page.title
redirect_page = client.get("https://www.taitocity.net/taito-opac/OPP1000")
doc = Nokogiri::HTML(redirect_page.body)
doc.css('body > table:nth-child(5) > tr:nth-child(1) > td:nth-child(1) > table:nth-child(1) > tr:nth-child(2) > td:nth-child(1) > form:nth-child(2) > table:nth-child(4) > tr:nth-child(1) > td:nth-child(1) > table:nth-child(1)')
.search('tr').each do |d|
results << d.search('td')[4].text.strip
end
puts results.drop(1).join("\n")
# coding: utf-8
require 'mechanize'
require 'optparse'
params = ARGV.getopts('', 'rid:', 'password:')
client = Mechanize.new
url = "https://www.lib.city.shibuya.tokyo.jp/asp/WwJouNinshou.aspx"
page = client.get(url)
# p page.title
form = page.form_with(name: "Form1")
post_params = {
btnKakunin: "確 認",
inpMel: form.field_with(name: "inpMel").value,
txtPassword: params["password"],
txtRiyoshaCD: params["rid"],
__EVENTVALIDATION: form.field_with(name: "__EVENTVALIDATION").value,
__VIEWSTATE: form.field_with(name: "__VIEWSTATE").value
}
redirect_page = client.post('https://www.lib.city.shibuya.tokyo.jp/asp/WwJouNinshou.aspx', post_params)
# p redirect_page.title
result = []
doc = Nokogiri::HTML(redirect_page.body)
if !(doc.css('table#dgdKas')[0]) then
puts "しばらく時間をおいてから試してください"
exit
end
doc.css('table#dgdKas')[0].children.each do |element|
if (element.children[2])
result << element.children[2].children[0].children.inner_text.strip
end
end
puts result.join("\n")
GEM
remote: https://rubygems.org/
specs:
domain_name (0.5.25)
unf (>= 0.0.5, < 1.0.0)
http-cookie (1.0.2)
domain_name (~> 0.5)
mechanize (2.7.3)
domain_name (~> 0.5, >= 0.5.1)
http-cookie (~> 1.0)
mime-types (~> 2.0)
net-http-digest_auth (~> 1.1, >= 1.1.1)
net-http-persistent (~> 2.5, >= 2.5.2)
nokogiri (~> 1.4)
ntlm-http (~> 0.1, >= 0.1.1)
webrobots (>= 0.0.9, < 0.2)
mime-types (2.6.2)
mini_portile (0.6.2)
net-http-digest_auth (1.4)
net-http-persistent (2.9.4)
nokogiri (1.6.6.2)
mini_portile (~> 0.6.0)
ntlm-http (0.1.1)
unf (0.1.4)
unf_ext
unf_ext (0.0.7.1)
webrobots (0.1.1)
PLATFORMS
ruby
DEPENDENCIES
mechanize
nokogiri
# A sample Gemfile
source "https://rubygems.org"
# gem "rails"
gem "nokogiri"
gem "mechanize"
vendor
.bundle
scraper_lib_shibuya