ayaniimi213
7/16/2014 - 5:21 AM

Extract comments from Niconama Comment Viewer

Extract comments from Niconama Comment Viewer

#!/usr/bin/env ruby
# -*- coding: utf-8 -*-
# extract comments from Niconama Comment Viewer
require "xmlsimple" # gem install xml-simple
require "nkf"
opt = "-w -h -Z1"
require "pp"

file = ARGV.shift

hash = XmlSimple.xml_in(File.new(file))
hash["LiveCommentDataArray"][0]["chat"].each{|chat|
  comment = NKF.nkf(opt, chat["content"])
  comment.gsub!(/\/telop show0/, "")
  comment.gsub!(/\/hb ifseetno \d+/, "")
  
  # remove tags
  comment.gsub!(/<a href=([^>]*)>/, "")
  comment.gsub!(/<\/a>/, "")
  comment.gsub!(/<u>/, "")
  comment.gsub!(/<\/u>/, "")
  
  # character entity references
  comment.gsub!(/&lt;/, "<")
  comment.gsub!(/&gt;/, ">")
  comment.gsub!(/&apos;/, "'")
  
  comment.gsub!(/\/perm .*$/, "")
  comment.gsub!(/\/telop on .*$/, "")
  comment.gsub!(/\/telop on .*$/, "")
  comment.gsub!(/\/telop off/, "")
  comment.gsub!(/\/cls/, "")
  comment.gsub!(/\/vote start .*$/, "")
  comment.gsub!(/\/vote showresult per .*$/, "")
  comment.gsub!(/\/vote stop/, "")
  
  comment.gsub!(/\/play .*/, "")
  comment.gsub!(/\/telop show 実況に接続しました/, "")
  comment.gsub!(/\/disconnect/, "")
  
  date = Time.at(chat["date"].to_i) 
  print date.to_s << ":" << comment << "\n"
}