hackugyo
2/12/2017 - 5:38 AM

UNEXTのマイリストから,それぞれの配信期限を取得してcsv形式で出力します.

UNEXTのマイリストから,それぞれの配信期限を取得してcsv形式で出力します.

# coding: utf-8
require 'bundler'
Bundler.require
require 'capybara/poltergeist'

def normalize(date_text)
  meaningful = date_text.split('日')[0]
  regex = /[一-龠]+/
  year, month, day = meaningful.split(regex)
  format("%04d", year) + format("%02d", month) + format("%02d", day)
end

target_url = "http://video.unext.jp/mylist/favorite"

Movie = Struct.new(:url, :title, :limit, :price)

Capybara.register_driver :poltergeist do |app|
  Capybara::Poltergeist::Driver.new(app, {:js_errors => false, :timeout => 5000 })
end
Capybara.default_selector = :xpath # :css -> :xpath
session = Capybara::Session.new(:poltergeist)
session.driver.headers = { 'User-Agent' => "Mozilla/5.0 (Macintosh; Intel Mac OS X)" }

session.visit target_url

# ログイン
email_input = session.find("/html/body/div[1]/div/div/div/div[1]/form/div[1]/label[1]/input")
email_input.native.send_key(ENV['UNEXT_ID'])

password_input = session.find("/html/body/div[1]/div/div/div/div[1]/form/div[1]/label[2]/input")
password_input.native.send_key(ENV['UNEXT_PASSWORD'])

Capybara.default_selector = :css
login_button = session.find(".js-acc-login-btn--submit")
login_button.click

# 一覧を最後まで取得する
initial_count = session.all(".ui-item-v__link").count
while true do
  session.execute_script "window.scrollBy(0,10000)"
  sleep 3
  current_count = session.all(".ui-item-v__link").count
  if (initial_count == current_count) then
    break
  else
    initial_count = current_count
  end
end

# 配信期限を取得する
links = session.all(".ui-item-v__link")

results = []
agent = Mechanize.new
links.each.with_index do |link, i|
  agent.get(link[:href]) do |page|
    title = page.search('/html/body/div[1]/div/div/div[3]/div/div/div[1]/div[2]/h1').text
    limit = page.search('/html/body/div[1]/div/div/div[4]/div[2]/div/div[1]/span').text
    price = page.search('/html/body/div[1]/div/div/div[3]/div/div/div[1]/div[4]/div/span').text
    results << Movie.new(link[:href], title, limit, price)
  end
end

results
  .sort_by { |m| normalize(m.limit)}
  .each { |m| puts "#{m.url},#{m.limit},#{m.title},#{m.price}"}

UNEXT_ID="your id"
UNEXT_PASSWORD="your password"

export UNEXT_ID UNEXT_PASSWORD

How to use

$ source application.env # Make your own one
$ bundle install --path vendor/bundle
$ ruby unext.rb > result.csv
GEM
  remote: https://rubygems.org/
  specs:
    addressable (2.5.0)
      public_suffix (~> 2.0, >= 2.0.2)
    capybara (2.12.0)
      addressable
      mime-types (>= 1.16)
      nokogiri (>= 1.3.3)
      rack (>= 1.0.0)
      rack-test (>= 0.5.4)
      xpath (~> 2.0)
    cliver (0.3.2)
    domain_name (0.5.20161129)
      unf (>= 0.0.5, < 1.0.0)
    http-cookie (1.0.3)
      domain_name (~> 0.5)
    mechanize (2.7.5)
      domain_name (~> 0.5, >= 0.5.1)
      http-cookie (~> 1.0)
      mime-types (>= 1.17.2)
      net-http-digest_auth (~> 1.1, >= 1.1.1)
      net-http-persistent (~> 2.5, >= 2.5.2)
      nokogiri (~> 1.6)
      ntlm-http (~> 0.1, >= 0.1.1)
      webrobots (>= 0.0.9, < 0.2)
    mime-types (3.1)
      mime-types-data (~> 3.2015)
    mime-types-data (3.2016.0521)
    mini_portile2 (2.1.0)
    net-http-digest_auth (1.4.1)
    net-http-persistent (2.9.4)
    nokogiri (1.7.0.1)
      mini_portile2 (~> 2.1.0)
    ntlm-http (0.1.1)
    poltergeist (1.13.0)
      capybara (~> 2.1)
      cliver (~> 0.3.1)
      websocket-driver (>= 0.2.0)
    public_suffix (2.0.5)
    rack (2.0.1)
    rack-test (0.6.3)
      rack (>= 1.0)
    unf (0.1.4)
      unf_ext
    unf_ext (0.0.7.2)
    webrobots (0.1.2)
    websocket-driver (0.6.5)
      websocket-extensions (>= 0.1.0)
    websocket-extensions (0.1.2)
    xpath (2.0.0)
      nokogiri (~> 1.3)

PLATFORMS
  ruby

DEPENDENCIES
  capybara
  mechanize
  poltergeist

BUNDLED WITH
   1.14.3
# A sample Gemfile
source "https://rubygems.org"

# gem "rails"
gem 'mechanize'
gem 'capybara'
gem 'poltergeist'
### https://raw.github.com/github/gitignore/26a147d00d26a47389bc28408afe1ee62bd78387/ruby.gitignore

*.gem
*.rbc
/.config
/coverage/
/InstalledFiles
/pkg/
/spec/reports/
/spec/examples.txt
/test/tmp/
/test/version_tmp/
/tmp/

# Used by dotenv library to load environment variables.
# .env

## Specific to RubyMotion:
.dat*
.repl_history
build/
*.bridgesupport
build-iPhoneOS/
build-iPhoneSimulator/

## Specific to RubyMotion (use of CocoaPods):
#
# We recommend against adding the Pods directory to your .gitignore. However
# you should judge for yourself, the pros and cons are mentioned at:
# https://guides.cocoapods.org/using/using-cocoapods.html#should-i-check-the-pods-directory-into-source-control
#
# vendor/Pods/

## Documentation cache and generated files:
/.yardoc/
/_yardoc/
/doc/
/rdoc/

## Environment normalization:
/.bundle/
/vendor/bundle
/lib/bundler/man/

# for a library or gem, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# Gemfile.lock
# .ruby-version
# .ruby-gemset

# unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
.rvmrc


vendor/bundle
application.env