imnotquitejack
2/14/2013 - 8:25 PM

Setting up Wukong-Hadoop on EMR, bootstrap script. (Wukong 3.0)

Setting up Wukong-Hadoop on EMR, bootstrap script. (Wukong 3.0)

#!/bin/bash

# Update, upgrade and install development tools:
sudo apt-get update
sudo apt-get -y upgrade
sudo apt-get -y install build-essential git-core curl libssl-dev \
                   libreadline5 libreadline5-dev \
                   zlib1g zlib1g-dev \
                   libmysqlclient-dev \
                   libcurl4-openssl-dev \
                   libxslt-dev libxml2-dev

sudo mkdir /usr/local/rbenv
# Install rbenv
sudo git clone git://github.com/sstephenson/rbenv.git /usr/local/rbenv

# Add rbenv to the path:
sudo touch /etc/profile.d/rbenv.sh
sudo chmod 777 /etc/profile.d/rbenv.sh

echo '# rbenv setup' > /etc/profile.d/rbenv.sh
echo 'export RBENV_ROOT=/usr/local/rbenv' >>  /etc/profile.d/rbenv.sh
echo 'export PATH="$RBENV_ROOT/bin:$PATH"' >>  /etc/profile.d/rbenv.sh
echo 'eval "$(rbenv init -)"' >>  /etc/profile.d/rbenv.sh

sudo mkdir /usr/local/rbenv/shims
sudo mkdir /usr/local/rbenv/versions
source /etc/profile.d/rbenv.sh

# Install ruby-build:
pushd /tmp
  sudo git clone git://github.com/sstephenson/ruby-build.git
  cd ruby-build
  sudo ./install.sh
popd

# Install Ruby 1.9.3-p194:
sudo ruby-build 1.9.3-p194 /usr/local/

# Production installing gems skipping ri and rdoc

sudo apt-get update
sudo gem install bundler --no-rdoc --no-ri
sudo gem install gorillib --no-rdoc --no-ri
sudo gem install wukong-hadoop --no-rdoc --no-ri



# EMR INPUTS

# input:       {s3_bucket}/input/data_input_folder
# output:      {s3_bucket}/output/data_output_folder
# mapper:      wu-local s3://{s3_bucket}/scripts/mapper.rb --run=mapper     # wukong 3.0 script
# reducer:     wu-local s3://{s3_bucket}/scripts/reducer.rb --run=reducer   # wukong 3.0 script
# bootstrap:   s3://{s3_bucket}/scripts/wukong_setup.sh                     # This gist