donfanning
8/15/2018 - 12:02 PM

sitecopy.js

var s3 = require('s3');
var AWS = require('aws-sdk');
var async = require('async');

var syncClient = s3.createClient({
    maxAsyncS3: 20,
});

tmpDir = "/tmp/sitecopy";
dstBucket = "aemtos3test"



function copySite(domain, url, context) {
	 async.waterfall([
	 	function downloadSite (next) {
	 		var fs = require("node-fs"),
		      url = require("url"),
		      path = require("path"),
		      Crawler = require("simplecrawler").Crawler;

		    var myCrawler = new Crawler(domain);
		    myCrawler.interval = 250;
		    myCrawler.maxConcurrency = 5;

		    myCrawler.on("fetchcomplete", function(queueItem, responseBuffer, response) {

		        // Parse url
		        var parsed = url.parse(queueItem.url);
		        // Rename / to index.html
		        if (parsed.pathname === "/") {
		            parsed.pathname = "/index.html";
		        }
		        // Where to save downloaded data path.join(tmpDir + __dirname, domain);
		        var outputDirectory = tmpDir;

		        // Get directory name in order to create any nested dirs
		        var dirname = outputDirectory + parsed.pathname.replace(/\/[^\/]+$/, "");

		        // Path to save file
		        var filepath = outputDirectory + parsed.pathname;
		        console.log ("Filepath " + filepath);

		        // Check if DIR exists
		        fs.exists(dirname, function(exists) {
		            // If DIR exists, write file
		            if (exists) {
		                fs.writeFile(filepath, responseBuffer, function() {});
		            } else {
		                // Else, recursively create dir using node-fs, then write file
		                fs.mkdir(dirname, 0755, true, function() {
		                    fs.writeFile(filepath, responseBuffer, function() {});
		                });
		            }
		        });
		        console.log("I just received %s (%d bytes)", queueItem.url, responseBuffer.length);
		        console.log("It was a resource of type %s", response.headers["content-type"]);
		    });

		    // Fire callback
		    myCrawler.on("complete", function() {
		        next(null);
		    });

		    // Start Crawl
		    myCrawler.start();

		},
		function upload(next) {
	        var params = {
	            localDir: tmpDir,
	            deleteRemoved: true,
	            s3Params: {
	                ACL: 'public-read',
	                Bucket: dstBucket,
	            },
	        };
	        console.log("starting upload");
	        var uploader = syncClient.uploadDir(params);
	        uploader.on('error', function(err) {
	            console.error("unable to sync up:", err.stack);
	            next(err);
	        });
	        uploader.on('progress', function() {
			  	console.log("progress", uploader.progressAmount, uploader.progressTotal);
			});
	        uploader.on('end', function() {
	            console.log("done uploading");
	            next(null);
	        });
 		}
	], function(err) {
        if (err) console.error("Failure because of: " + err)
        else console.log("All methods in waterfall succeeded.");

        context.done();
    });
}

exports.handler = function(event, context) {
    // Read options from the event.
    console.log("Reading options from event:\n", event);
    var site = event.site;
    var path = event.path;
    copySite(site, site + path, context);
};