gaiar
6/6/2018 - 12:03 PM

Complete system installation for Data Science / Neural Networks from scratch / bare metal (assuming you have assembled the PC)

Complete system installation for Data Science / Neural Networks from scratch / bare metal (assuming you have assembled the PC)

# BASIC SYSTEM SETUP

# First donwload Ubuntu iso file from https://www.ubuntu.com/download/desktop
# Use 16.04 LTS (17 is also ok, but it's better to use LTS versions, also 18 will be very mature in terms of systemd)

# Dowload Linux live USB creator and install the iso to your USB stick https://www.linuxliveusb.com

# Boot your system, go to BIOS on boot (usually Del) or boot menu (usually F12) and choose your USB stick as boot medium
# Install Linux (these steps can be omitted if clean Ubuntu installation is provided as service by admins / cloud provider / etc)
# Minor trick unplug ALL of your hard disks (unless you are an avanced user) except for the disk for your system
# When asked about partitions, do the following:
# Allocate ca. 80 GB to ext4 primary partition mounted on / for system
# Allocate ca. your RAM size for swap
# Allocate ca. 35-50MB for UEFI partition
# Also it's useful for you to Google more about LVM and mdadm at this stage for education

# update the packages and install ssh server
sudo apt-get update
sudo apt-get upgrade
sudo apt-get install openssh-server

# Now you can ssh into your server. Let's make it a bit more secure
# You can go as far as using ufw  - but this is overkill - https://www.digitalocean.com/community/tutorials/how-to-set-up-a-firewall-with-ufw-on-ubuntu-16-04
# Now you should congigure login into your server using only SSH key - https://www.digitalocean.com/community/tutorials/how-to-configure-ssh-key-based-authentication-on-a-linux-server

# Read the guidelines, and generate your private-public key pair
# It's easier just to use ssh-keygen on your system and then copy private key to your system
# Note that if you are using a Windows client like Pitty you will have to convert the key format yourself
# With putty for example you have to use puttygen to convert linux key to putty format by loading it and then saving it
# Do not forget to add your public key to authorized_keys and do
sudo service ssh restart

# Then it's worth disabling password authentication
sudo nano /etc/ssh/sshd_config
# Then change #PasswordAuthentication yes to PasswordAuthentication no

# Run sudo commands wo password (for advanced users)

# add user to visudo wo password
sudo visudo
# Add this line to the file. Do not forget to replace user name with proper one
YOUR_USERNAME ALL=(ALL) NOPASSWD: ALL
# Also ALWAYS use only visudo to edit this conf (!!!) otherwise the file may get locked to cause problems




# NVIDIA DRIVERS SETUP

# If you break everything - refer here - https://askubuntu.com/questions/760934/graphics-issues-after-while-installing-ubuntu-16-04-16-10-with-nvidia-graphics
# Find out our nvidia card model
# Check compatible drivers here - http://www.nvidia.com/Download/index.aspx
sudo add-apt-repository ppa:graphics-drivers/ppa
sudo apt-get update
sudo apt-get install nvidia-384 (change version to your latest compatible version)
# sometimes the latest driver will not work, sometimes you have to downgrade to make everything work...
# DO NOT FORGET TO REBOOT!
# Check your installation with command
nvidia-smi





# SETTING UP YOUR DISK PARTITIONS
# First read this https://help.ubuntu.com/community/Fstab
# The find out your devices sizes and names (assuming console only access, via GUI it's easiest to see via gparted) 
sudo lsblk -o NAME,FSTYPE,SIZE,MOUNTPOINT,LABEL
# Now see your devices UUIDs 
sudo blkid
# Now you can add your disk to fstab easily
# I usually follow a convention like this (easier to do system back-ups)
cd /
sudo mkdir -p media
sudo mkdir -p media/dump
sudo mkdir -p media/ssd

# CAREFUL - after editing fstab INCORRECTLY your system MAY NOT BOOT
I usually add standard lines like
UUID=0bef4ed6-c6bd-4d63-b4a4-b78af2e606cb /media/disk2 ext2 defaults 0 2

# also always check that all mounts correctly
sudo mount -a




# INSTALL BASIC ESSENTIAL ADMIN SOFTWARE / UTILS

# tmux is a client-server console app enabling you to have pane based detachable consoles, glances is best hardware monitoring tool ever 
sudo apt-get install tmux
sudo apt-get install glances


# INSTALL DOCKER
# refer here for more information https://docs.docker.com/engine/installation/linux/docker-ce/ubuntu/#install-using-the-repository and explanations
sudo apt-get remove docker docker-engine docker.io
sudo apt-get update

sudo apt-get install \
	linux-image-extra-$(uname -r) \
	linux-image-extra-virtual

sudo apt-get update

sudo apt-get install \
    apt-transport-https \
    ca-certificates \
    curl \
    software-properties-common

sudo apt-get install curl
curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add -
sudo apt-key fingerprint 0EBFCD88

sudo add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable"
sudo apt-get update
sudo apt-get install docker-ce

sudo docker run hello-world

# Do not forget to add your user to docker group to use docker wo sudo
sudo groupadd docker
sudo usermod -aG docker $USER

# LOG IN LOG OUT AFTER THIS STEP




# DEEP LEARNING NVIDIA DOCKER INSTALLATION

# Also we need to install nvidia docker toolkit
# https://github.com/NVIDIA/nvidia-docker
# Install nvidia-docker and nvidia-docker-plugin
wget -P /tmp https://github.com/NVIDIA/nvidia-docker/releases/download/v1.0.1/nvidia-docker_1.0.1-1_amd64.deb
sudo dpkg -i /tmp/nvidia-docker*.deb && rm /tmp/nvidia-docker*.deb

# Test nvidia-smi
sudo apt install nvidia-modprobe
nvidia-docker run --rm nvidia/cuda nvidia-smi

# if you are running as non-root add your user to docker group
sudo usermod -aG docker YOUR_USER_NAME



# START YOUR DOCKER CONTAINER
mkdir docker
cd docker
nano Dockerfile
# Copy - paste the below dockerfile
docker build -t ds_nn .
# ds_nn is the name of the image we are building
# if you want to add to the docker file - please add at the bottom

# Use this command to run your container
# not shm size is for pytorch data loaders to work properly
nvidia-docker run -it -v /some/folder:/home/keras/notebook -p 8888:8888  -p 8097:8097 --shm-size 8G av_image

# Run this in a console inside docker to start visdom
python -m visdom.server





# DOCKER FILE START 
FROM nvidia/cuda:8.0-cudnn6-devel

RUN apt-get update && apt-get install -y openssh-server

RUN apt-get install unrar-free && \
    apt-get install p7zip-full

RUN mkdir /var/run/sshd
RUN echo 'root:Ubuntu@41' | chpasswd
RUN sed -i 's/PermitRootLogin prohibit-password/PermitRootLogin yes/' /etc/ssh/sshd_config

# SSH login fix. Otherwise user is kicked off after login
RUN sed 's@session\s*required\s*pam_loginuid.so@session optional pam_loginuid.so@g' -i /etc/pam.d/sshd

ENV NOTVISIBLE "in users profile"
RUN echo "export VISIBLE=now" >> /etc/profile

ENV CONDA_DIR /opt/conda
ENV PATH $CONDA_DIR/bin:$PATH

# writing env variables to /etc/profile as mentioned here https://docs.docker.com/engine/examples/running_ssh_service/#run-a-test_sshd-container
RUN echo "export CONDA_DIR=/opt/conda" >> /etc/profile
RUN echo "export PATH=$CONDA_DIR/bin:$PATH" >> /etc/profile

RUN mkdir -p $CONDA_DIR && \
    echo export PATH=$CONDA_DIR/bin:'$PATH' > /etc/profile.d/conda.sh && \
    apt-get update && \
    apt-get install -y wget git libhdf5-dev g++ graphviz openmpi-bin nano && \
    wget --quiet https://repo.continuum.io/miniconda/Miniconda3-4.2.12-Linux-x86_64.sh && \
    echo "c59b3dd3cad550ac7596e0d599b91e75d88826db132e4146030ef471bb434e9a *Miniconda3-4.2.12-Linux-x86_64.sh" | sha256sum -c - && \
    /bin/bash /Miniconda3-4.2.12-Linux-x86_64.sh -f -b -p $CONDA_DIR && \
    ln /usr/lib/x86_64-linux-gnu/libcudnn.so /usr/local/cuda/lib64/libcudnn.so && \
    ln /usr/lib/x86_64-linux-gnu/libcudnn.so.6 /usr/local/cuda/lib64/libcudnn.so.6 && \
    ln /usr/include/cudnn.h /usr/local/cuda/include/cudnn.h  && \
    rm Miniconda3-4.2.12-Linux-x86_64.sh

ENV NB_USER keras
ENV NB_UID 1000

RUN echo "export NB_USER=keras" >> /etc/profile
RUN echo "export NB_UID=1000" >> /etc/profile

RUN echo "export LD_LIBRARY_PATH=/usr/local/cuda/lib64:/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH" >> /etc/profile
RUN echo "export CPATH=/usr/include:/usr/include/x86_64-linux-gnu:/usr/local/cuda/include:$CPATH" >> /etc/profile
RUN echo "export LIBRARY_PATH=/usr/local/cuda/lib64:/lib/x86_64-linux-gnu:$LIBRARY_PATH" >> /etc/profile
RUN echo "export CUDA_HOME=/usr/local/cuda" >> /etc/profile
RUN echo "export CPLUS_INCLUDE_PATH=$CPATH" >> /etc/profile
RUN echo "export KERAS_BACKEND=tensorflow" >> /etc/profile

RUN useradd -m -s /bin/bash -N -u $NB_UID $NB_USER && \
    mkdir -p $CONDA_DIR && \ 
    chown keras $CONDA_DIR -R  

USER keras

RUN  mkdir -p /home/keras/notebook

# Python
ARG python_version=3.5

RUN conda install -y python=${python_version} && \
    pip install --upgrade pip && \
    pip install tensorflow-gpu && \
    conda install Pillow scikit-learn notebook pandas matplotlib mkl nose pyyaml six h5py && \
    conda install theano pygpu bcolz && \
    pip install keras kaggle-cli lxml opencv-python requests scipy tqdm visdom && \
    conda install pytorch torchvision cuda80 -c soumith && \
    conda clean -yt

RUN pip install jupyter_contrib_nbextensions && \
    jupyter contrib nbextension install --user


ENV LD_LIBRARY_PATH /usr/local/cuda/lib64:/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH
ENV CPATH /usr/include:/usr/include/x86_64-linux-gnu:/usr/local/cuda/include:$CPATH
ENV LIBRARY_PATH /usr/local/cuda/lib64:/lib/x86_64-linux-gnu:$LIBRARY_PATH
ENV CUDA_HOME /usr/local/cuda
ENV CPLUS_INCLUDE_PATH $CPATH
ENV KERAS_BACKEND tensorflow

WORKDIR /home/keras/notebook

EXPOSE 8888 6006 22 8097

CMD jupyter notebook --port=8888 --ip=0.0.0.0 --no-browser

# DOCKERFILE END