Setting up Wukong-Hadoop on EMR, bootstrap script. (Wukong 3.0)
#!/bin/bash
# Update, upgrade and install development tools:
sudo apt-get update
sudo apt-get -y upgrade
sudo apt-get -y install build-essential git-core curl libssl-dev \
libreadline5 libreadline5-dev \
zlib1g zlib1g-dev \
libmysqlclient-dev \
libcurl4-openssl-dev \
libxslt-dev libxml2-dev
sudo mkdir /usr/local/rbenv
# Install rbenv
sudo git clone git://github.com/sstephenson/rbenv.git /usr/local/rbenv
# Add rbenv to the path:
sudo touch /etc/profile.d/rbenv.sh
sudo chmod 777 /etc/profile.d/rbenv.sh
echo '# rbenv setup' > /etc/profile.d/rbenv.sh
echo 'export RBENV_ROOT=/usr/local/rbenv' >> /etc/profile.d/rbenv.sh
echo 'export PATH="$RBENV_ROOT/bin:$PATH"' >> /etc/profile.d/rbenv.sh
echo 'eval "$(rbenv init -)"' >> /etc/profile.d/rbenv.sh
sudo mkdir /usr/local/rbenv/shims
sudo mkdir /usr/local/rbenv/versions
source /etc/profile.d/rbenv.sh
# Install ruby-build:
pushd /tmp
sudo git clone git://github.com/sstephenson/ruby-build.git
cd ruby-build
sudo ./install.sh
popd
# Install Ruby 1.9.3-p194:
sudo ruby-build 1.9.3-p194 /usr/local/
# Production installing gems skipping ri and rdoc
sudo apt-get update
sudo gem install bundler --no-rdoc --no-ri
sudo gem install gorillib --no-rdoc --no-ri
sudo gem install wukong-hadoop --no-rdoc --no-ri
# EMR INPUTS
# input: {s3_bucket}/input/data_input_folder
# output: {s3_bucket}/output/data_output_folder
# mapper: wu-local s3://{s3_bucket}/scripts/mapper.rb --run=mapper # wukong 3.0 script
# reducer: wu-local s3://{s3_bucket}/scripts/reducer.rb --run=reducer # wukong 3.0 script
# bootstrap: s3://{s3_bucket}/scripts/wukong_setup.sh # This gist