r15ch13
2/6/2015 - 2:18 AM

Nickelodeon Downloader

Nickelodeon Downloader

#!/usr/bin/env python3
#
# Downloads episodes from nick.com
# based on http://www.reddit.com/r/TheLastAirbender/comments/28peyk/script_to_download_all_the_episodes_from_nick/

import subprocess
import sys
import os
from ntpath import basename
from urllib import request
from xml.dom import minidom
from bs4 import BeautifulSoup

USE_PROXY = False
PROXY_URL = 'us.premiumize.me'
PROXY_PORT = '82'
PROXY_USER = ''
PROXY_PASS = ''

FFMPEG = 'ffmpeg.exe' # path to linux or windows binary
LIVESTREAMER = 'livestreamer.exe' # path to linux or windows binary
USE_LIVESTREAMER = True
DOWNLOAD_DIR = 'download'

FEED_URL = (
    'http://udat.mtvnservices.com/service1/dispatch.htm?'
    'feed=nick_arc_player_prime&plugin.stage=live&'
    'mgid={mgid}')

def downloadWithLivestraemer(urls):

  videos = []
  commands = []

  for url in urls:
    video = os.path.join(DOWNLOAD_DIR, basename(url))
    videos.append(video)
    commands.append([which(LIVESTREAMER), '--loglevel', 'error', url, '-f', '-o', video, '--default-stream', 'best'])

  # run in parallel
  print("Spawing {} livestreamer instances\n".format(len(commands)))
  processes = [subprocess.Popen(cmd, universal_newlines=True) for cmd in commands]

  # do other things here..
  # wait for completion
  for p in processes: p.wait()

  return videos

def download(mgid, out):

  if not os.path.exists(DOWNLOAD_DIR):
    os.mkdir(DOWNLOAD_DIR)

  urls = []

  feed = minidom.parse(request.urlopen(FEED_URL.format(mgid=mgid)))
  print("Feed url: {}".format(FEED_URL.format(mgid=mgid)))

  for item in feed.getElementsByTagName('media:content'):
    media = minidom.parse(request.urlopen(item.getAttribute('url')))
    renditions = media.getElementsByTagName('rendition')
    best = max(renditions, key=lambda x: int(x.getAttribute('bitrate')))
    url = best.getElementsByTagName('src')[0].firstChild.nodeValue
    urls.append(url)
    print("Found: {}".format(url))

  if USE_LIVESTREAMER:
    # Use livestreamer to download, and concat with ffmpeg
    videos = downloadWithLivestraemer(urls)

    txt = '\n'.join("file '{}'".format(v) for v in videos).encode('utf-8')

    print("Concatenating files to: {}".format(out))
    process = subprocess.Popen(
      [which(FFMPEG), '-y', '-loglevel', 'info', '-f', 'concat', '-i', '-', '-c', 'copy', out],
      stdin=subprocess.PIPE
    )
    process.communicate(txt)

    process.wait()

    for v in videos: os.remove(v)

    if process.returncode:
      raise subprocess.CalledProcessError(process.returncode, process.args)

  else:
    # Use pure ffmpeg to download and concat
    ffmpeg = [which(FFMPEG), '-y', '-re']

    for url in urls: ffmpeg.extend(['-i', url])

    urlCount = len(urls)

    filter_complex = "{1} concat=n={0} [v]; {2} concat=n={0}:v=0:a=1 [a]".format(
      urlCount,
      "".join("[{}:0]".format(x) for x in range(0, urlCount)),
      "".join("[{}:a]".format(x) for x in range(0, urlCount))
    )

    ffmpeg.extend(['-filter_complex', filter_complex, '-map', '[v]', '-map', '[a]', out])

    print("Concatenating files to: {}".format(out))
    process = subprocess.Popen(ffmpeg)

    if process.returncode:
      raise subprocess.CalledProcessError(process.returncode, process.args)

def scrape(url):

  html = ''
  proxyUrl = 'http://' + PROXY_USER + ':' + PROXY_PASS + '@' + PROXY_URL + ':' + PROXY_PORT

  if USE_PROXY:
    proxy = request.ProxyHandler({'http': proxyUrl})
    auth = request.HTTPBasicAuthHandler()
    opener = request.build_opener(proxy, auth, request.HTTPHandler)
    request.install_opener(opener)
    html = request.urlopen(url).read()
  else:
    html = request.urlopen(url).read()

  if not html:
    print('No HTML response')

  soup = BeautifulSoup(html)
  vp = soup.find('div', { 'class' : 'video-player' })
  if vp:
    return vp['data-contenturi']
  else:
    print('Video container not found')
  return

def which(program):
  import os
  def is_exe(fpath):
    return os.path.isfile(fpath) and os.access(fpath, os.X_OK)

  fpath, fname = os.path.split(program)
  if fpath:
    if is_exe(program):
      return program
  else:
    for path in os.environ["PATH"].split(os.pathsep):
      path = path.strip('"')

      exe_file = os.path.join(path, program)
      if is_exe(exe_file):
        return exe_file

  return None

def usage():
  print("\nNickelodeon Downloader\n")
  print("Usage: <url> <filename>\n")
  print("Example: http://www.nick.com/videos/clip/legend-of-korra-101-full-episode.html The.Legend.of.Korra.S01E01.mp4\n")

def main():

  if not which(LIVESTREAMER):
    print("Livestreamer is not installed")
    return

  if not which(FFMPEG):
    print("FFmpeg is not installed")
    return

  if len(sys.argv) == 3:
    mgid = scrape(sys.argv[1])
    download(mgid, sys.argv[2])
  else:
    usage()

if __name__ == '__main__':
  main()