五台机器已经配好ip地址和免秘钥登录,文中不在赘述,有疑问的同学可自行百度,另外本集群采用非root用户搭建,很多命令都带有sudo,若使用root用户搭建,可忽视,如果你也使用非root用户,请特别注意,免秘钥也是非root用户
hadoop1 | hadoop2 | hadoop3 | hadoop4 | hadoop5 |
---|---|---|---|---|
zookeeper | zookeeper | zookeeper | zookeeper | zookeeper |
Namenode | Namenode | |||
ZKFailoverController | ZKFailoverController | |||
JournalNode | JournalNode | JournalNode | JournalNode | JournalNode |
DateNode | DateNode | DateNode | ||
NodeManager | NodeManager | NodeManager | ||
ResourceManager | ResourceManager |
sudo vim /etc/hosts
151.25.88.141 hadoop1
151.25.88.142 hadoop2
151.25.88.143 hadoop3
151.25.88.144 hadoop4
151.25.88.145 hadoop5
sudo systemctl stop firewalld
sudo systemctl disable firewalld
sudo vim /etc/selinux/config
SELINUX=enforcing --> SELINUX=disabled
sudo yum -y install ntp ntpdate
ntpd ntp.aliyun.com
我的所有组件都安装在/opt/soft/ 下,可根据个人习惯调整
sudo mkdir -p /opt/soft
sudo tar -zvxf /home/hadoop/download/jdk-8u251-linux-x64.tar.gz -C /opt/soft
sudo tar -zvxf /home/hadoop/download/apache-zookeeper-3.6.1-bin.tar.gz -C /opt/soft
sudo tar -zvxf /home/hadoop/download/hadoop-3.2.1.tar.gz -C /opt/soft
sudo chown hadoop:hadoop /opt/soft -R
vim ~/.bashrc
export JAVA\_HOME=/opt/soft/jdk1.8.0\_251
export CLASSPATH=.:$JAVA\_HOME/lib/dt.jar:$JAVA\_HOME/lib/tools.jar
export PATH=$JAVA\_HOME/bin:$PATH
export ZOOKEEPER\_HOME=/opt/soft/apache-zookeeper-3.6.1-bin
export PATH=$ZOOKEEPER\_HOME/bin:$PATH
export HADOOP\_HOME=/opt/soft/hadoop-3.2.1
export PATH=$HADOOP\_HOME/bin:$HADOOP\_HOME/sbin:$PATH
hadoop-2.7.7
保存退出后记得source
source ~/.bashrc
cd /opt/soft/apache-zookeeper-3.6.1-bin/conf
cp zoo\_sample.cfg zoo.cfg
vim zoo.cfg
更改数据目录和配置日志目录
dataDir=/home/zookeeper/data
dataLogDir=/home/zookeeper/datalog
配置服务器
server.1=hadoop1:2888:3888
server.2=hadoop2:2888:3888
server.3=hadoop3:2888:3888
server.4=hadoop4:2888:3888
server.5=hadoop5:2888:3888
保存退出后,创建数据目录和日志目录,和zoo.cfg对应
sudo mkdir -p /home/zookeeper/data
sudo mkdir -p /home/zookeeper/datalog
sudo chown hadoop:hadoop /home/zookeeper -R
配置zk ID(其他四个节点依次为2,3,4,5)
echo 1 > /home/zookeeper/data/myid
进到hadoop目录,主要更改6个配置文件
cd /opt/soft/hadoop-3.2.1/etc/hadoop
vim hadoop-env.sh
#指定JAVA_HOME
export JAVA_HOME=/opt/soft/jdk1.8.0\_251
#指定hadoop用户,hadoop3.x之后必须配置(我的用户名就叫hadoop)
export HDFS_NAMENODE_USER=hadoop
export HDFS_DATANODE_USER=hadoop
export HDFS_ZKFC_USER=hadoop
export HDFS_JOURNALNODE_USER=hadoop
export YARN_RESOURCEMANAGER_USER=hadoop
export YARN_NODEMANAGER_USER=hadoop
<!--集群名称-->
<property>
<name>fs.defaultFS</name>
<value>hdfs://mycluster</value> </property>
<!--临时目录-->
<property>
<name>hadoop.tmp.dir</name>
<value>/home/hadoop_data</value>
</property>
<!--webUI展示时的用户-->
<property>
<name>hadoop.http.staticuser.user</name>
<value>hadoop</value>
</property>
<!--高可用依赖的zookeeper的通讯地址-->
<property>
<name>ha.zookeeper.quorum</name>
<value>hadoop1:2181,hadoop2:2181,hadoop3:2181,hadoop4:2181,hadoop5:2181</value>
</property>
hdfs-site.xml
<property>
<name>dfs.nameservices</name>
<value>mycluster</value>
</property>
<!--定义hdfs集群中的namenode的ID号-->
<property>
<name>dfs.ha.namenodes.mycluster</name>
<value>nn1,nn2</value>
</property>
<!--定义namenode的主机名和rpc协议的端口-->
<property>
<name>dfs.namenode.rpc-address.mycluster.nn1</name>
<value>hadoop1:8020</value>
</property>
<property>
<name>dfs.namenode.rpc-address.mycluster.nn2</name>
<value>hadoop2:8020</value>
</property>
<!--定义namenode的主机名和http协议的端口-->
<property>
<name>dfs.namenode.http-address.mycluster.nn1</name>
<value>hadoop1:9870</value>
</property>
<property>
<name>dfs.namenode.http-address.mycluster.nn2</name>
<value>hadoop2:9870</value>
</property>
<!--定义共享edits的url-->
<property>
<name>dfs.namenode.shared.edits.dir</name>
<value>qjournal://hadoop1:8485;hadoop2:8485;hadoop3:8485;hadoop4:8485;hadoop5:8485/ljgk</value>
</property>
<!--定义hdfs的客户端连接hdfs集群时返回active namenode地址-->
<property>
<name>dfs.client.failover.proxy.provider.mycluster</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>
<!--hdfs集群中两个namenode切换状态时的隔离方法-->
<property>
<name>dfs.ha.fencing.methods</name>
<value>sshfence</value>
</property>
<!--hdfs集群中两个namenode切换状态时的隔离方法的秘钥-->
<property>
<name>dfs.ha.fencing.ssh.private-key-files</name>
<value>/root/.ssh/id_rsa</value>
</property>
<!--journalnode集群中用于保存edits文件的目录-->
<property>
<name>dfs.journalnode.edits.dir</name>
<value>/opt/journalnode/data</value>
</property>
<!--ha的hdfs集群自动切换namenode的开关-->
<property>
<name>dfs.ha.automatic-failover.enabled</name>
<value>true</value>
</property>
<property>
<name>dfs.safemode.threshold.pct</name>
<value>1</value>
</property>
hadoop3
hadoop4
hadoop5
<property>
<name>yarn.resourcemanager.ha.enabled</name>
<value>true</value>
<description>Enable RM high-availability</description>
</property>
<property>
<name>yarn.resourcemanager.cluster-id</name>
<value>cluster1</value>
<description>Name of the cluster</description>
</property>
<property>
<name>yarn.resourcemanager.ha.rm-ids</name>
<value>rm1,rm2</value>
<description>The list of RM nodes in the cluster when HA is enabled</description>
</property>
<property>
<name>yarn.resourcemanager.hostname.rm1</name>
<value>hadoop1</value>
<description>The hostname of the rm1</description>
</property>
<property>
<name>yarn.resourcemanager.hostname.rm2</name>
<value>hadoop2</value>
<description>The hostname of the rm2</description>
</property>
<property>
<name>yarn.resourcemanager.webapp.address.rm1</name>
<value>hadoop1:8088</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address.rm2</name>
<value>hadoop2:8088</value>
</property>
<property>
<name>yarn.resourcemanager.zk-address</name>
<value>hadoop1:2181,hadoop2:2181,hadoop3:2181,hadoop4:2181,hadoop5:2181</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce_shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
<property>
<name>yarn.nodemanager.env-whitelist</name>
<value>JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_MAPRED_HOME</value>
</property>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.application.classpath</name>
<value>$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/*:$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/lib/*</value>
</property>
zkServer.sh start
启动hadoop
hadoop-daemon.sh start journalnode
hdfs nomenode -format
hadoop-daemon.sh start namenode
hdfs namenode -bootstrapStandby
hadoop-daemon.sh start namenode
hdfs zkfc -formatZK
stop-dfs.sh
start-all.sh
原文链接:https://segmentfault.com/a/1190000023834334?utm_source=sf-similar-article
原文:https://www.cnblogs.com/gentlescholar/p/14720323.html