retrography
8/20/2015 - 11:34 PM

Importing Yelp's Academic Database into MongoDB

Importing Yelp's Academic Database into MongoDB

grep ', "type": "business"}' yelp_academic_dataset.json | tr "\n" , | ghead -c -1 | gsed -e "s/^/[\n/" -e "s/$/\n]\n/" -e "s/},{/},\n{/g" > business.json
grep ', "type": "user"}' yelp_academic_dataset.json | tr "\n" , | ghead -c -1 | gsed -e "s/^/[\n/" -e "s/$/\n]\n/" -e "s/},{/},\n{/g" > user.json
grep ', "type": "review", ' yelp_academic_dataset.json | tr "\n" , | ghead -c -1 | gsed -e "s/^/[\n/" -e "s/$/\n]\n/" -e "s/},{/},\n{/g" > review.json

json2csv business.json business.csv
json2csv user.json user.csv
json2csv review.json review.csv
require "mongo"
require 'json'

cl = Mongo::Client.new('mongodb://192.168.99.100:27017/yelp')

File.readlines("yelp_academic_dataset.json").each do |line| 
	j = JSON::parse(line)
	cl[j.delete("type")].insert_one(j) 
end
# brew cask install dockertoolbox
# docker-machine create --driver virtualbox default

# To list vm environment variables for the machine and export them as global environment variables
docker-machine env default
eval "$(docker-machine env default)"

# Create a data volume
docker create -v /data --name mongodata mongo

# To run a MongoDB instance based on the default MongoDB Docker image
docker run --name mongodb -d -p 27017:27017 --volumes-from mongodata mongo --storageEngine=wiredTiger