oinak
3/23/2018 - 6:15 AM

A Ruby script for collecting phone record statistics from a Facebook user data dump

A Ruby script for collecting phone record statistics from a Facebook user data dump

#! /usr/bin/env ruby

# This script can be used to parse and dump the information from
# the 'html/contact_info.htm' file in a Facebook user data ZIP download.
#
# It dumps all cell phone call + SMS message + MMS records, plus a summary of each.
#
# Place this script inside the extracted Facebook data download folder
# alongside the 'html' folder.
#
# This script requires Ruby and the Nokogiri library to be installed.

require 'nokogiri'
require 'byebug'
require 'time'

def hr
  $stdout.puts "-" * 24
end

def indent(level = 1)
  $stdout.print "   " * (level - 1)
  $stdout.flush
end

def section(title, level: 1)
  indent(level) and hr
  indent(level) and $stdout.puts title
  indent(level) and $stdout.puts
  yield
  indent(level) and hr
  indent(level) and $stdout.puts
end

# Extracts metadata from a call/text/sms/mms table
# Returns nil if there is no metadata in this table.
# Returns a 2d list of row/colums
def extract_table_metadata(metadata_table)
  records = metadata_table.css('tr')[1..-1]
  return nil if records.size == 0 # many tables are empty.

  records.map do |call_record|
    call_record.css('td').map(&:text).map(&:chomp)
  end
end

def dig_out_metadata(container:)
  # If a specific type of metadata is missing (calls, texts, ..), the
  # container div will simply not be present.
  return [] if container.nil?

  contact_tables = container.children.select { |c| c.name == "table" }

  contact_tables.map do |contact_table|
    metadata_table = contact_table.css('table')[0]
    extract_table_metadata(metadata_table)
  end.compact
end

def print_metadata(metadata, metadata_title:)
  section(metadata_title) do
    byebug if metadata.include? nil
    metadata.each do |record|
      section("Another Phone Number", :level => 2) do
        indent(2) and puts record.join(", ")
      end
    end
  end
end

def print_timestamps(metadata, metadata_name:)
  timestamps = metadata.map { |r| r[1].to_s.chomp }.select { |s| s.size > 0 }.map { |t| Time.parse(t) }

  if timestamps.size > 0
    puts "The oldest #{metadata_name} is from #{timestamps.min.to_date}, the most recent at #{timestamps.max.to_date}"
  end
end

def print_status_breakdown(metadata, metadata_name:)
  grouped_statuses = metadata.flatten(1).group_by(&:first)

  if grouped_statuses.size > 0
    puts "This includes " + grouped_statuses.map { |status,records| "#{records.size} #{status.downcase} #{metadata_name}"}.join(", ")
  end
end

def print_call_history(html_doc)
  call_history_container = html_doc.xpath("//h2[text()='Call History']/following-sibling::div")[0]
  sms_history_container = html_doc.xpath("//h2[text()='SMS History']/following-sibling::div")[0]
  mms_history_container = html_doc.xpath("//h2[text()='MMS History']/following-sibling::div")[0]

  call_metadata = dig_out_metadata(:container => call_history_container)
  sms_metadata = dig_out_metadata(:container => sms_history_container)
  mms_metadata = dig_out_metadata(:container => mms_history_container)

  if call_history_container
    phone_numbers = call_history_container.xpath("//b[text()='Number:']/following-sibling::text()")
      .map(&:text).sort.uniq
  else
    phone_numbers = []
  end

  print_metadata(call_metadata, :metadata_title => "Call History")
  print_metadata(sms_metadata, :metadata_title => "SMS History")
  print_metadata(mms_metadata, :metadata_title => "MMS History")

  section("The full list of phone numbers that have stored data") do
    phone_numbers.each_slice(8).to_a.map { |g| g.join(", ") }.each do |line|
      indent(2) and puts line
    end
  end


  $stdout.puts "A brief summary of phone records"
  hr
  $stdout.puts "There are phone records for #{phone_numbers.size} distinct phone numbers"
  $stdout.puts "There are records of #{call_metadata.flatten(1).size} distinct cell phone calls"
  indent(2) and print_timestamps(call_metadata, :metadata_name => "cell phone call")
  indent(2) and print_status_breakdown(call_metadata, :metadata_name => "cell phone calls")
  $stdout.puts "There are records of #{sms_metadata.flatten(1).size} distinct SMS messages"
  indent(2) and print_timestamps(sms_metadata, :metadata_name => "SMS message")
  indent(2) and print_status_breakdown(sms_metadata, :metadata_name => "SMS messages")
  $stdout.puts "There are records of #{mms_metadata.flatten(1).size} distinct MMS messages"
  indent(2) and print_timestamps(mms_metadata, :metadata_name => "MMS message")
  indent(2) and print_status_breakdown(mms_metadata, :metadata_name => "MMS messages")
  hr
end

html_text = File.read('html/contact_info.htm')
html_doc = Nokogiri::HTML(html_text)

print_call_history(html_doc)