Simple express api to get website meta data
var express = require('express')
// Define so we can attach routes
// and a listener port address
var testServer = express()
// Create a test website with a title
testServer.get('/test', function(req, res) {
res.send("<html><title>test</title></html")
})
// Fake a favicon to for testing
testServer.get('/favicon.ico', function(req, res) {
res.send("i am an icon!")
})
// Start it up on port 5555
testServer.listen(5555)
{
"name": "metapi",
"version": "0.1.0",
"description": "Web service for getting a website's meta information",
"main": "api.js",
"dependencies": {
"cheerio": "^0.19.0",
"express": "^4.13.3",
"unirest": "^0.4.2"
},
"devDependencies": {},
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1"
},
"repository": {
"type": "git",
"url": "git+ssh://git@gist.github.com/b78e1dc7874d6274e465.git"
},
"keywords": [
"meta",
"api",
"website",
"url",
"domain",
"description",
"data"
],
"author": "Montana Flynn",
"license": "ISC",
"bugs": {
"url": "https://gist.github.com/b78e1dc7874d6274e465"
},
"homepage": "https://gist.github.com/b78e1dc7874d6274e465"
}
// To send the HTTP(s) requests
var unirest = require('unirest')
// To parse the response body
var cheerio = require('cheerio')
// Get meta data from a website and pass to callback
function getMetaData(website, callback) {
// Send an http request to the website
unirest.get(website).end(function(res) {
// We had a problem getting the website
if (!res.ok || res.body.length === 0) {
callback(null, res)
// We got a response that has a body
} else {
// Load the body into cheerio which
// is like a server-side jQuery
var $ = cheerio.load(res.body)
// Create data
var data = {}
data["title"] = $("title").text()
data["keywords"] = $("meta[name='keywords']").attr("content"),
data["description"] = $("meta[name='description']").attr("content"),
data["domain"] = res.request.uri.host
// If we got an icon link element in the page
if ($("link[rel='icon']").attr("href")) {
data["favicon"] = $("link[rel='icon']").attr("href")
callback(data)
// Try the default favicon url browsers check
} else {
var faviconUrl = res.request.uri.protocol
faviconUrl += "//" + res.request.uri.host + "/favicon.ico"
unirest.get(faviconUrl).end(function(res) {
// Check if it came back with anything or skip
if (res.ok){
data["favicon"] = faviconUrl
}
// Send back the data to express via the callback
callback(data)
})
}
}
})
}
// Export the function so it can be require'd
module.exports = getMetaData
// API stuff like routing, etc...
var express = require('express')
// To get the meta data, custom
var getMetaData = require("./meta.js")
// Create express api and route
express().get('/', function(req, res) {
// Get the query parameter url
var website = req.query.website
// Send missing param error back to client
if (!website) {
var errMsg = {error: "Missing website parameter"}
res.status(401).json(errMsg)
}
// Get the data using a callback function
getMetaData(website, function(data, err) {
// Send error back to client
if (err) {
// Add more data for debugging
err.request = req
// Log error and request info
console.log(err)
// Return an internal server error
res.status(500).end()
// Send proper JSON response
} else {
res.json(data)
}
})
// Listen on port 4444
}).listen(4444)