hosea1008
4/30/2018 - 11:23 AM

Hadoop集群搭建

Hadoop集群搭建

旧教程

https://www.cnblogs.com/zishengY/p/6819160.html?utm_source=itdadao&utm_medium=referral

###启动

sudo /data/hadoop/sbin/start-dfs.sh

Resourcemanager要单独启动

sudo /data/hadoop/sbin/yarn-deamon.sh start resourcemanager

新的靠谱的教程,针对2.7.6

https://www.kancloud.cn/linshichun/haloop-cluster-install/606549

下载hadoop-2.7.6

cd /tmp
wget http://apache.fayea.com/hadoop/common/hadoop-2.7.6/hadoop-2.7.6.tar.gz

解压

tar -zxvf hadoop-2.7.6.tar.gz

复制

/usr/local/hadoop
cp -R /tmp/hadoop-2.7.6 /usr/local/hadoop

配置hadoop的环境变量,在/etc/profile下追加

HADOOP_HOME=/usr/local/hadoop
export HADOOP_INSTALL=$HADOOP_HOME
export HADOOP_MAPRED_HOME=$HADOOP_HOME
export HADOOP_COMMON_HOME=$HADOOP_HOME
export HADOOP_HDFS_HOME=$HADOOP_HOME
export YARN_HOME=$HADOOP_HOME
export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native
export HADOOP_OPTS="-Djava.library.path=$HADOOP_HOME/lib:$HADOOP_COMMON_LIB_NATIVE_DIR"
export PATH=$PATH:$HADOOP_HOME/sbin:$HADOOP_HOME/bin

修改 etc/hadoop/hadoop-env.sh 文件

# The java implementation to use.
#export JAVA_HOME=${JAVA_HOME}
export JAVA_HOME=/usr/local/java/jdk1.8.0_171

其它配置 slaves配置

cat etc/hadoop/slaves

bd01
bd02
bd03

配置公共属性

cat etc/hadoop/core-site.xml
<!-- Put site-specific property overrides in this file. -->

<configuration>
    <!-- 指定hadoop运行时产生文件的存储路径 -->
    <property>
        <name>hadoop.tmp.dir</name>
        <value>file:/usr/local/hadoop/tmp</value>
        <description>Abase for other temporary directories.</description>
    </property>
    <!-- 指定HDFS老大(namenode)的通信地址 -->
    <property>
        <name>fs.defaultFS</name>
        <value>hdfs://hdpmaster:9000</value>
    </property>
</configuration>

配置HDFS

cat etc/hadoop/hdfs-site.xml
<!-- Put site-specific property overrides in this file. -->

<configuration>
    <!-- 设置namenode的http通讯地址 -->
    <property>
            <name>dfs.namenode.http-address</name>
            <value>hdpmaster:50070</value>
    </property>
    <!-- 设置secondarynamenode的http通讯地址 -->
        <property>
                <name>dfs.namenode.secondary.http-address</name>
                <value>hdpslave1:50090</value>
        </property>
    <!-- 设置namenode存放的路径 -->
    <property>
            <name>dfs.namenode.name.dir</name>
            <value>file:/usr/local/hadoop/name</value>
        </property>
    <!-- 设置hdfs副本数量 -->
        <property>
                <name>dfs.replication</name>
                <value>2</value>
        </property>
    <!-- 设置datanode存放的路径 -->
        <property>
                <name>dfs.datanode.data.dir</name>
                <value>file:/usr/local/hadoop/tmp/dfs/data</value>
        </property>
</configuration>

配置YARN

cat etc/hadoop/mapred-site.xml
<!-- Put site-specific property overrides in this file. -->

<configuration>
    <!-- 通知框架MR使用YARN -->
        <property>
                <name>mapreduce.framework.name</name>
                <value>yarn</value>
        </property>
        <property>
                <name>mapreduce.jobhistory.address</name>
                <value>hdpmaster:10020</value>
        </property>
        <property>
                <name>mapreduce.jobhistory.webapp.address</name>
                <value>hdpmaster:19888</value>
        </property>
</configuration>

配置MapReduce

cat etc/hadoop/yarn-site.xml
<!-- Site specific YARN configuration properties -->
    <!-- 设置 resourcemanager 在哪个节点-->
    <property>
                <name>yarn.resourcemanager.hostname</name>
                <value>hdpmaster</value>
        </property>
    <!-- reducer取数据的方式是mapreduce_shuffle -->
        <property>
                <name>yarn.nodemanager.aux-services</name>
                <value>mapreduce_shuffle</value>
        </property>
    <property>
            <name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
            <value>org.apache.hadoop.mapred.ShuffleHandler</value>
        </property>
</configuration>

复制所有配置到另外节点

scp -r etc/hadoop hdpslave1:/usr/local/hadoop/etc/
scp -r etc/hadoop hdpslave1:/usr/local/hadoop/etc/