donfanning
8/15/2018 - 1:39 PM

Markov Twitter Bot

Markov Twitter Bot

# -*- coding: utf-8 -*-

require 'igo-ruby'
require 'net/http'
require 'twitter'
require 'uri'

class String
  def is_mention?
    match(/^@\w+\s*/)
  end
  def remove_uri
    str = self
    str = str.gsub(/^\.?(\s*@\w+)+/, '')     # 文頭のIDを削除
    str = str.gsub(/(RT|QT)\s*@?\w+.*$/, '') # RT/QT以降を削除
    str = str.gsub(/http:\/\/\S+/, '')       # URIを削除
    str = str.gsub(/\s+/, ' ').strip
  end
  def stringify
    str = self
    str = str.gsub(/<a\s.*?>(.*?)<\/a>/, '\1') # a要素を置換
    str = str.gsub(/<br\s?\/>/, "\n")          # br要素を置換
  end
end

module TwitterBot
  BEGIN_DELIMITER = '__BEGIN__'
  END_DELIMITER = '__END__'
  IGO_DIC_DIRECTORY = './ipadic' # 辞書ファイルがあるディレクトリを指定
  
  class Crawler
    def initialize(bot_screen_name, src_screen_name)
      @bot_screen_name = bot_screen_name
      @src_screen_name = src_screen_name
      @replied_users = Array.new
      @markov = Markov.new
      @markov_mention = Markov.new
      @splitter = Splitter.new
    end
    def http_query(method, uri_str, query)
      uri = URI.parse(uri_str)
      query_string = query.map{|k,v| URI.encode(k) + "=" + URI.encode(v) }.join('&')
      Net::HTTP.start(uri.host, uri.port) {|http|
        if method == 'get'
          query_string = '?' + query_string unless query_string.empty?
          http.get(uri.path + query_string)
        else
          http.post(uri.path, query_string)
        end
      }
    end
    def get_favorited_tweets
      response = http_query('get', "http://favstar.fm/users/#{@src_screen_name}/recent", {})
      matches = response.body.scan(/<p class='fs-tweet-text'>(.*?)<\/p>/m)
      matches.flatten.map {|match| match.stringify }
    end
    def get_best_tweets
      response = http_query('get', "http://favstar.fm/users/#{@src_screen_name}", {})
      matches = response.body.scan(/<p class='fs-tweet-text'>(.*?)<\/p>/m)
      matches.flatten.map {|match| match.stringify }
    end
    def build_tweet()
      counter = 0
      while counter <= 10 do
        result = @markov.build.join('')
        return result if result.size <= 140 # 140文字以内なら採用
        counter += 1
      end
      raise StandardError.new('retry limit is exceeded')
    end
    def build_reply(screen_name)
      counter = 0
      while counter <= 0 do
        result = @markov_mention.build.join('')
        result = "@#{screen_name} #{result}"
        return result if result.size <= 140 # 140文字以内なら採用
        counter += 1
      end
      raise StandardError.new('retry limit is exceeded')
    end
    def study
      Twitter.user_timeline(@src_screen_name, {
        "count" => 200,
      }).each {|status|
        formatted = status.text.remove_uri
        words = @splitter.split(formatted)
        if status.text.is_mention?
          @markov_mention.study(words)
        else
          @markov.study(words)
        end
        puts "study: #{formatted}"
      }
    end
    def reply_to_mentions
      # reply済リストを取得
      Twitter.user_timeline(@bot_screen_name).each {|status|
        screen_name = status.in_reply_to_screen_name
        @replied_users << screen_name if screen_name
      }
      # reply
      Twitter.mentions.each {|status|
        screen_name = status.user.screen_name
        next if status.created_at < Time.now - 3600 * 24 # 24時間以上前なら除外
        next if @replied_users.include?(screen_name)     # reply済リストに含まれるなら除外
        next if screen_name == @bot_screen_name          # 自分自身なら除外
        result = build_reply(screen_name)
        Twitter.update(result, {
          "in_reply_to_status_id" => status.id,
        })
        @replied_users << screen_name # reply済リストに入れる
        puts "reply: #{result}"
      }
    end
    def tweet
      # ランダムにモード決定
      random_value = rand
      if random_value < 0.8
        # tweet using markov
        result = build_tweet
        Twitter.update(result)
        puts "tweet(markov): #{result}"
      elsif random_value < 0.9
        # tweet using favstar-best
        result = get_best_tweets.sample.remove_uri
        Twitter.update(result)
        puts "tweet(best): #{result}"
      else
        # tweet using favstar-recent
        result = get_favorited_tweets.sample.remove_uri
        Twitter.update(result)
        puts "tweet(recent): #{result}"
      end
    end
  end

  class Splitter
    def initialize()
      @tagger = Igo::Tagger.new(IGO_DIC_DIRECTORY)
    end
    def split(str)
      array = Array.new
      array << BEGIN_DELIMITER
      array += @tagger.wakati(str)
      array << END_DELIMITER
      array
    end
  end

  class Markov
    def initialize()
      @table = Array.new
    end
    def study(words)
      return if words.size < 3
      for i in 0..(words.size - 3) do
        @table << [words[i], words[i + 1], words[i + 2]]
      end
    end
    def search1(key)
      array = Array.new
      @table.each {|row|
        array << row[1] if row[0] == key
      }
      array.sample
    end
    def search2(key1, key2)
      array = Array.new
      @table.each {|row|
        array << row[2] if row[0] == key1 && row[1] == key2
      }
      array.sample
    end
    def build
      array = Array.new
      key1 = BEGIN_DELIMITER
      key2 = search1(key1)
      while key2 != END_DELIMITER do
        array << key2
        key3 = search2(key1, key2)
        key1 = key2
        key2 = key3
      end
      array
    end
  end
end
# -*- coding: utf-8 -*-

require_relative 'twitterbot'

Twitter.configure {|config|
  config.consumer_key = 'consumer key'
  config.consumer_secret = 'consumer secret'
  config.oauth_token = 'oauth token'
  config.oauth_token_secret = 'oauth token secret'
}

counter = 0
while counter < 10
  begin
    bot = TwitterBot::Crawler.new('BOTのscreen_name', '取得元のscreen_name')
    bot.study
    bot.reply_to_mentions unless ARGV.include?("--no-reply")
    bot.tweet unless ARGV.include?("--no-tweet")
    break
  rescue Exception => e
    puts e
    counter += 1
  end
end