poros
7/21/2015 - 10:46 PM

Extract ALL status updates from wall.htm in your downloaded facebook data copy

Extract ALL status updates from wall.htm in your downloaded facebook data copy

import codecs
import sys

from bs4 import BeautifulSoup


wall_file = open(sys.argv[1], "r")
wall = BeautifulSoup(wall_file, 'html.parser')
comment_divs = wall.find_all(class_="comment")
comments = [div.string for div in comment_divs]
out_file = codecs.open(sys.argv[2], "w", "utf-8")
for comment in comments:
    out_file.write("%s\n#######\n" % comment)