manniru
2/9/2011 - 8:07 PM

Script that allows batch-downloading a person's full Facebook photo collection.

Script that allows batch-downloading a person's full Facebook photo collection.

#!/usr/bin/env python
# encoding=utf-8
#
# Script that allows batch-downloading a person's full Facebook photo
# collection if the person is you or if you are friends with that person
# and have permission to see them.
#
# BEFORE YOU USE THIS:
#     pytz must be installed.
#
#     Make sure that `TIMEZONE`, `TOKEN`, `USER_ID`, and `DATE_FILTER` are
#     set the way you want them -- see below.
#
#     Then simply execute this script.
#

import json
import os
from urllib2 import urlopen, build_opener, HTTPSHandler
from urllib import urlencode, quote
from datetime import datetime
from pytz import utc, timezone

# Change this to the time zone you want the resulting timestamps to be displayed in
TIMEZONE = timezone("US/Eastern")

# Your OAuth access token
# If you need a token, see `README.mdown` in this gist
TOKEN = ""

# User ID of the person whose albums you want to download
USER_ID = "" # can be a FB profile "username" (URL alias) or ID number

# If you want to only download albums that have been updated since a certain date.
#DATE_FILTER = datetime(2010,12,1)
DATE_FILTER = None

# =========================================================================

PROFILE_URL = "https://graph.facebook.com/%s/albums/" % USER_ID
ALBUM_URL = "https://graph.facebook.com/%d/photos/"

PROJECT_ROOT = os.path.abspath(os.path.dirname(__file__))

def do_album_download():
    # Custom urllib2 opener since we're going to be making HTTPS requests.
    opener = build_opener(HTTPSHandler)
    
    # Output goes to:  ./photos_for_$USERID
    MAINDIR = os.path.join(PROJECT_ROOT, "photos_for_%s" % USER_ID)
    if not (os.path.exists(MAINDIR) and os.path.isdir(MAINDIR)):
        os.makedirs(MAINDIR)
    
    # Open the Graph API URL for the user's albums.
    u = opener.open(PROFILE_URL+"?"+urlencode({
        'access_token':TOKEN
    }))
    profile_data = json.loads(u.read())
    u.close()
    
    # Pull out the `data` portion since that's where all album information comes from.
    album_set = profile_data['data']
    
    # Since Graph API can paginate results, see if we have a "next page" and keep pulling in 
    temp_data = profile_data.copy()
    while temp_data.get("paging", {"next":None}).get("next", None):
        temp_u = opener.open(temp_data['paging']['next'])
        temp_data = json.loads(temp_u.read())
        temp_u.close()
        album_set.extend(temp_data['data'])
    
    # The timestamps are in UTC.
    for album in album_set:
        album['adj_time'] = utc.localize(datetime.strptime(album['updated_time'][:-5], "%Y-%m-%dT%H:%M:%S"))
    
    # If we have a DATE_FILTER, make sure we filter against that.
    if DATE_FILTER:
        date_filter = utc.localize(DATE_FILTER)
        album_set = filter(
            lambda item: item['adj_time'] >= date_filter,
            album_set
        )
    
    print
    print "Downloading %d albums..." % len(album_set)
    print
    
    # Counters that we can display at the end of the process
    total_albums = len(album_set)
    total_photos = 0
    
    
    # =====
    # Write out an index file for the root output directory.
    # Just contains a list of the albums we're going to download and links to the indexes
    # of the resulting album subdirectories.
    info_html = open(os.path.join(MAINDIR, "index.html"), "w")
    info_html.write(u"""<!doctype html>\n<html lang="en">\n<head>\n<meta charset="utf-8">\n<title>photos</title>\n<style type="text/css">img{max-width:100px;max-height:100px}</style>\n</head>\n\n<body>\n<h1>photos</h1>\n""")
    info_html.write("<ul>\n")
    for album in album_set:
        album_name = album['name'].encode("ascii", "xmlcharrefreplace")
        
        album_path = quote("%s - %s" % (album['id'], album['name'].encode('ascii', 'ignore')))
        info_html.write('<li><a href="%s/index.html">%s</a>: updated %s</li>' % (album_path, album_name, album['adj_time'].strftime("%b. %d, %Y")))
    info_html.write("\n</ul>\n</body>\n</html>")
    info_html.close()
    # =====
    
    
    # Go!
    for album in album_set:
        print
        print "Album: %s" % album['id']
        
        # Turn possible unicode into HTML-safe album name.
        album_name = album['name'].encode("ascii", "xmlcharrefreplace")
        
        # Make subdirectory for this album
        THISDIR = os.path.join(MAINDIR, "%s - %s" % (album['id'], album['name'].encode('ascii', 'ignore')))
        if not (os.path.exists(THISDIR) and os.path.isdir(THISDIR)):
            os.makedirs(THISDIR)
        
        # Get album from Graph API.
        album_u = opener.open(ALBUM_URL % int(album['id'])+"?"+urlencode({
            'access_token':TOKEN
        }))
        album_str = album_u.read()
        album_u.close()
        
        # Write this json out to a file in case we want to later parse out more of the metadata.
        album_json_file = open(os.path.join(THISDIR, "albumdata-00.json"), "w")
        album_json_file.write(album_str)
        album_json_file.close()
        
        # Parse out the set of photos.
        album_data = json.loads(album_str)
        photo_set = album_data['data']
        
        # Like above, we have to make sure we aggregate all paginated data.
        pagenum = 0
        temp_data = album_data.copy()
        while temp_data.get("paging", {"next":None}).get("next", None):
            pagenum += 1
            
            # Request next page
            temp_u = opener.open(temp_data['paging']['next'])
            album_str = temp_u.read()
            temp_u.close()
            
            # Write out this page's json
            album_json_file = open(os.path.join(THISDIR, "albumdata-%02d.json" % pagenum), "w")
            album_json_file.write(album_str)
            album_json_file.close()
            
            # Append photos from this page
            temp_data = json.loads(album_str)
            photo_set.extend(temp_data['data'])
        
        
        print "%d photos" % len(photo_set)
        total_photos += len(photo_set)
        
        # =====
        # Write out an HTML index for this album.
        info_html = open(os.path.join(THISDIR, "index.html"), "w")
        info_html.write(u"""<!doctype html>\n<html lang="en">\n<head>\n<meta charset="utf-8">\n<title>%s</title>\n<style type="text/css">img{max-width:100px;max-height:100px}</style>\n</head>\n\n<body>\n<h1>%s</h1>\n<h2>%d photos</h2>""" % (album_name, album_name, len(photo_set)))
        
        # Write out HTML for each photo in this album.
        for photo in photo_set:
            # Pull together comments on this photo.
            comment_str = u"<ul>"
            for comment in photo.get("comments", {'data':[]})['data']:
                t = utc.localize(datetime.strptime(comment['created_time'][:-5], "%Y-%m-%dT%H:%M:%S"))
                t = t.astimezone(TIMEZONE).strftime("%b. %d, %Y %I:%M:%S %p %Z")
                comment_str += u"\n   <li>%s (%s): %s</li>" % (comment['from'].get("name", "(Private)"), t, comment['message'])
            comment_str += u"</ul>"
            
            # Pull together tags for this photo.
            tagged_people = []
            for person in photo.get("tags", {'data':[]})['data']:
                tagged_people.append(person.get("name", "(Private)"))
            tag_str = u", ".join(tagged_people)
            tag_str = u"Tagged: %s" % tag_str.encode('ascii', "xmlcharrefreplace")
            
            # Make the caption HTML-safe.
            caption = photo.get("name","").encode("ascii", "xmlcharrefreplace").replace("\n","<br />\n")
            
            # Localize the time
            t = utc.localize(datetime.strptime(photo['created_time'][:-5], "%Y-%m-%dT%H:%M:%S"))
            t = t.astimezone(TIMEZONE).strftime("%b. %d, %Y %I:%M:%S %p %Z")
            
            # Write this photo out to the HTML file
            info_html.write(u'<p><a href="%s.jpg"><img src="%s.jpg"/><br />%s</a><br />%s<br />Uploaded: %s</p>\n%s\n<hr />\n\n'% (
                photo['id'], photo['id'], caption, tag_str, t, comment_str.encode("ascii", "xmlcharrefreplace")
            ))
        info_html.write("\n\n</body>\n</html>")
        info_html.close()
        
        # =====
        
        # Actually download the photos in this album.
        for photo in photo_set:
            print u"\t"+photo['id']
            photo_u = opener.open(photo['source'])
            photo_file = open(os.path.join(THISDIR, "%d.jpg" % int(photo['id'])), "wb")
            photo_file.write(photo_u.read())
            photo_u.close()
            photo_file.close()
    
    print "%d total albums" % total_albums
    print "%d total photos" % total_photos

if __name__ == '__main__':
    do_album_download()
Script that allows batch-downloading a person's full Facebook photo collection if you are the person or if you are friends with that person and have permission to see that person's photos.

### Dependencies

* **pytz**: use `easy_install pytz` or `pip install pytz`

## Before you start

1. Read the Facebook [Terms of Service][tos] and the [Developers Policy][policy]. Know that using this for purposes other than to back up your own photo album data is likely less than kosher. I take no responsibility for any damages caused by your use of this example code.

2. See the LICENSE.txt file attached to this gist.

## The USER_ID

Depending on the person's profile URL, the `USER_ID` can be a "username" or a raw ID number:

http://www.facebook.com/mike.tigas -> **mike.tigas**

http://www.facebook.com/profile.php?id=15921791 -> **15921791**

## How to get an access token with the proper permissions for this script.

1. [Create a dummy Facebook app][createapp]. It doesn't have to be called anything fancy.

2. Copy the URL below, and replace `$CLIENT_ID` with your app ID. Browse to the URL and give your application access to your profile data.

        https://graph.facebook.com/oauth/authorize?client_id=$CLIENT_ID&redirect_uri=http://www.facebook.com/connect/login_success.html&type=user_agent&display=popup&scope=user_photos,friends_photos

3. On the resulting success page, pull out the URL. Get the value for `access_token` and [urldecode][urldecode] it.

4. The resulting value is your `TOKEN`.

5. You might want to look at the [authentication documentation][authdoc] if you're having trouble.


[tos]: https://www.facebook.com/terms.php
[policy]: https://developers.facebook.com/policy/
[createapp]: https://www.facebook.com/developers/createapp.php
[urldecode]: http://meyerweb.com/eric/tools/dencoder/
[authdoc]: https://developers.facebook.com/docs/authentication/
Copyright 2011 Mike Tigas. All rights reserved.

Redistribution and use in source and binary forms, with or without modification, are
permitted provided that the following conditions are met:

   1. Redistributions of source code must retain the above copyright notice, this list of
      conditions and the following disclaimer.

   2. Redistributions in binary form must reproduce the above copyright notice, this list
      of conditions and the following disclaimer in the documentation and/or other materials
      provided with the distribution.

THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

The views and conclusions contained in the software and documentation are those of the
authors and should not be interpreted as representing official policies, either expressed
or implied, of the author or any contributors.