NESTicle
9/13/2015 - 10:05 PM

Simple express api to get website meta data

Simple express api to get website meta data

var express = require('express')

// Define so we can attach routes
// and a listener port address
var testServer = express()

// Create a test website with a title
testServer.get('/test', function(req, res) {
    res.send("<html><title>test</title></html")
})

// Fake a favicon to for testing
testServer.get('/favicon.ico', function(req, res) {
    res.send("i am an icon!")
})

// Start it up on port 5555
testServer.listen(5555)
{
  "name": "metapi",
  "version": "0.1.0",
  "description": "Web service for getting a website's meta information",
  "main": "api.js",
  "dependencies": {
    "cheerio": "^0.19.0",
    "express": "^4.13.3",
    "unirest": "^0.4.2"
  },
  "devDependencies": {},
  "scripts": {
    "test": "echo \"Error: no test specified\" && exit 1"
  },
  "repository": {
    "type": "git",
    "url": "git+ssh://git@gist.github.com/b78e1dc7874d6274e465.git"
  },
  "keywords": [
    "meta",
    "api",
    "website",
    "url",
    "domain",
    "description",
    "data"
  ],
  "author": "Montana Flynn",
  "license": "ISC",
  "bugs": {
    "url": "https://gist.github.com/b78e1dc7874d6274e465"
  },
  "homepage": "https://gist.github.com/b78e1dc7874d6274e465"
}
// To send the HTTP(s) requests
var unirest = require('unirest')

// To parse the response body
var cheerio = require('cheerio')

// Get meta data from a website and pass to callback
function getMetaData(website, callback) {

    // Send an http request to the website 
    unirest.get(website).end(function(res) {

        // We had a problem getting the website
        if (!res.ok || res.body.length === 0) {
            callback(null, res)

        // We got a response that has a body
        } else {

            // Load the body into cheerio which
            // is like a server-side jQuery
            var $ = cheerio.load(res.body)

            // Create data 
            var data = {}
            data["title"] = $("title").text()
            data["keywords"] = $("meta[name='keywords']").attr("content"),
            data["description"] = $("meta[name='description']").attr("content"),
            data["domain"] = res.request.uri.host

            // If we got an icon link element in the page
            if ($("link[rel='icon']").attr("href")) {

            	data["favicon"] = $("link[rel='icon']").attr("href")
            	callback(data)

            // Try the default favicon url browsers check
            } else {

            var faviconUrl = res.request.uri.protocol
            faviconUrl += "//" + res.request.uri.host + "/favicon.ico"
            unirest.get(faviconUrl).end(function(res) {

                // Check if it came back with anything or skip
                if (res.ok){
                    data["favicon"] = faviconUrl
                }

                // Send back the data to express via the callback
                callback(data)
             })
           }
        }
    })
} 

// Export the function so it can be require'd
module.exports = getMetaData
// API stuff like routing, etc...
var express = require('express')

// To get the meta data, custom
var getMetaData = require("./meta.js") 

// Create express api and route
express().get('/', function(req, res) {

    // Get the query parameter url
    var website = req.query.website

    // Send missing param error back to client
    if (!website) {
        var errMsg = {error: "Missing website parameter"}
        res.status(401).json(errMsg)
    }

    // Get the data using a callback function
    getMetaData(website, function(data, err) {

        // Send error back to client
        if (err) {

            // Add more data for debugging
            err.request = req

            // Log error and request info
            console.log(err)

            // Return an internal server error
            res.status(500).end()

        // Send proper JSON response
        } else {
            res.json(data)
        }

    })

// Listen on port 4444
}).listen(4444)