Ip |
?主机名 |
程序 |
进程 |
192.168.128.11 |
h1 |
Jdk Hadoop hbase |
Namenode DFSZKFailoverController Hamster |
192.168.128.12 |
h2 |
Jdk Hadoop hbase |
Namenode DFSZKFailoverController Hamster |
192.168.128.13 |
h3 |
Jdk Hadoop |
resourceManager |
192.168.128.14 |
h4 |
Jdk Hadoop ? |
resourceManager |
192.168.128.15 |
h5 |
Jdk Hadoop Zookeeper Hbase |
Datanode nodeManager JournalNode QuorumPeerMain HRegionServer |
192.168.128.16 |
h6 |
Jdk Hadoop Zookeeper Hbase |
Datanode nodeManager JournalNode QuorumPeerMain HRegionServer |
192.168.128.17 |
h7 |
Jdk Hadoop Zookeeper hbase |
Datanode nodeManager JournalNode QuorumPeerMain HRegionServer |
?
?
?
关于准备工作??我这里就不一一写出来了,总结一下有主机名,ip,主机名和ip的映射关系,防火墙,ssh免密码,jdk的安装及环境变量的设置。
修改?/home/zookeeper-3.4.8/conf的zoo_sample.cfg
cp zoo_sample.cfg zoo.cfg
?
?
# The number of milliseconds of each tick
tickTime=2000
# The number of ticks that the initial
# synchronization phase can take
initLimit=10
# The number of ticks that can pass between
# sending a request and getting an acknowledgement
syncLimit=5
# the directory where the snapshot is stored.
# do not use /tmp for storage, /tmp here is just
# example sakes.
dataDir=/home/zookeeper-3.4.8/data
# the port at which the clients will connect
clientPort=2181
# the maximum number of client connections.
# increase this if you need to handle more clients
#maxClientCnxns=60
#
# Be sure to read the maintenance section of the
# administrator guide before turning on autopurge.
#
# http://zookeeper.apache.org/doc/current/zookeeperAdmin.html#sc_maintenance
#
# The number of snapshots to retain in dataDir
#autopurge.snapRetainCount=3
# Purge task interval in hours
# Set to "0" to disable auto purge feature
#autopurge.purgeInterval=1
server.1=h5:2888:3888
server.2=h6:2888:3888
server.3=h7:2888:3888
?
?
?
?
创建?data文件夹??和在里面??创建文件myid??并写入数字1
touch data/myid
?
echo 1 > data/myid
?
拷贝整个zookeeper到另外两个节点上
?
scp -r /home/zookeeper-3.4.8? h6:/home/
scp -r /home/zookeeper-3.4.8? h7:/home/
其他两个节点的myid??修改为?2? 3
?
/home/hadoop-2.7.2/etc/Hadoop
?
hadoop-env.sh:
export JAVA_HOME=/home/jdk
?
core-site.xml:
?
?
<configuration>
<!-- 指定hdfs的nameservice为masters -->
<property>
<name>fs.defaultFS</name>
<value>hdfs://masters</value>
</property>
<!-- 指定hadoop临时目录 -->
<property>
<name>hadoop.tmp.dir</name>
<value>/home/hadoop-2.7.2/tmp</value>
</property>
<!-- 指定zookeeper地址 -->
<property>
<name>ha.zookeeper.quorum</name>
<value>h5:2181,h6:2181,h7:2181</value>
</property>
</configuration>
?
?
hdfs-site.xml:
?
?
?
<configuration>
<!--指定hdfs的nameservice为masters,需要和core-site.xml中的保持一致 -->
??????? <property>
??????????????? <name>dfs.nameservices</name>
??????????????? <value>masters</value>
??????? </property>
??????? <!-- h1下面有两个NameNode,分别是h1,h2 -->
??????? <property>
??????????????? <name>dfs.ha.namenodes.masters</name>
??????????????? <value>h1,h2</value>
??????? </property>
??????? <!-- h1的RPC通信地址 -->
??????? <property>
??????????????? <name>dfs.namenode.rpc-address.masters.h1</name>
??????????????? <value>h1:9000</value>
??????? </property>
??????? <!-- h1的http通信地址 -->
??????? <property>
??? ????????????<name>dfs.namenode.http-address.masters.h1</name>
??????????????? <value>h1:50070</value>
??????? </property>
??????? <!-- h2的RPC通信地址 -->
??????? <property>
??????????????? <name>dfs.namenode.rpc-address.masters.h2</name>
??????????????? <value>h2:9000</value>
??????? </property>
??????? <!-- h2的http通信地址 -->
??????? <property>
??????????????? <name>dfs.namenode.http-address.masters.h2</name>
??????????????? <value>h2:50070</value>
??????? </property>
??????? <!-- 指定NameNode的元数据在JournalNode上的存放位置 -->
??????? <property>
??????????????? <name>dfs.namenode.shared.edits.dir</name>
??????????????? <value>qjournal://h5:8485;h6:8485;h7:8485/masters</value>
??????? </property>
??????? <!-- 指定JournalNode在本地磁盘存放数据的位置 -->
??????? <property>
??????????????? <name>dfs.journalnode.edits.dir</name>
??????????????? <value>/home/hadoop-2.7.2/journal</value>
??????? </property>
??????? <!-- 开启NameNode失败自动切换 -->
??????? <property>
??????????????? <name>dfs.ha.automatic-failover.enabled</name>
??????????????? <value>true</value>
??????? </property>
??????? <!-- 配置失败自动切换实现方式 -->
??????? <property>
??????????????? <name>dfs.client.failover.proxy.provider.masters</name>
??????????????? <value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
??????? </property>
??????? <!-- 配置隔离机制方法,多个机制用换行分割,即每个机制暂用一行-->
??????? <property>
??????????????? <name>dfs.ha.fencing.methods</name>
??????????????? <value>
??????????????????????? sshfence
??????????????????????? shell(/bin/true)
??????????????? </value>
??????? </property>
??????? <!-- 使用sshfence隔离机制时需要ssh免登陆 -->
??????? <property>
??????????????? <name>dfs.ha.fencing.ssh.private-key-files</name>
??????????????? <value>/root/.ssh/id_rsa</value>
??????? </property>
??????? <!-- 配置sshfence隔离机制超时时间 -->
??????? <property>
??????????????? <name>dfs.ha.fencing.ssh.connect-timeout</name>
??????????????? <value>30000</value>
??????? </property>
</configuration>
?
?
?
mapred-site.xml:
?
?
<configuration>
<!-- 指定mr框架为yarn方式 -->
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
</configuration>
?
yarn-site.xml:
?
<configuration>
?
<!-- 开启RM高可靠 -->
??????? <property>
??????????????? <name>yarn.resourcemanager.ha.enabled</name>
??????????????? <value>true</value>
??????? </property>
??????? <!-- 指定RM的cluster id -->
??????? <property>
??????????????? <name>yarn.resourcemanager.cluster-id</name>
??????????????? <value>RM_HA_ID</value>
??????? </property>
??????? <!-- 指定RM的名字 -->
??????? <property>
??????????????? <name>yarn.resourcemanager.ha.rm-ids</name>
??????????????? <value>rm1,rm2</value>
??????? </property>
??????? <!-- 分别指定RM的地址 -->
??????? <property>
??????????????? <name>yarn.resourcemanager.hostname.rm1</name>
??????????????? <value>h3</value>
??????? </property>
??????? <property>
?? ?????????????<name>yarn.resourcemanager.hostname.rm2</name>
??????????????? <value>h4</value>
??????? </property>
??????? <property>
??????????????? <name>yarn.resourcemanager.recovery.enabled</name>
??????????????? <value>true</value>
??????? </property>
????????
??????? <property>
??????????????? <name>yarn.resourcemanager.store.class</name>
??????????????? <value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</value>
??????? </property>
??????? <!-- 指定zk集群地址 -->
??????? <property>
??????????????? <name>yarn.resourcemanager.zk-address</name>
??????????????? <value>h5:2181,h6:2181,h7:2181</value>
??????? </property>
??????? <property>
??????????????? <name>yarn.nodemanager.aux-services</name>
????????? ??????<value>mapreduce_shuffle</value>
??????? </property>
</configuration>
?
?
Slaves:
?
h5
h6
h7
?
然后 拷贝到其他节点
?
?
scp -r hadoop-2.7.2 h2:/home/????等等
?
?
这个地方说明一下 ?yarn 的HA ?是在 ?h3和h4 ?上面
?
?
###注意:严格按照下面的步骤
?
1.???????启动zookeeper集群
?
[root@h6 ~]# cd /home/zookeeper-3.4.8/bin/
[root@h6 bin]# ./zkServer.sh start
?
H5? h6? h7??都一样
[root@h6 bin]# ./zkServer.sh status
查看状态
?
2.???????启动journalnode
[root@h5 bin]# cd /home/hadoop-2.7.2/sbin/
[root@h5 sbin]# ./hadoop-daemons.sh start journalnode
h5: starting journalnode, logging to /home/hadoop-2.7.2/logs/hadoop-root-journalnode-h5.out
h7: starting journalnode, logging to /home/hadoop-2.7.2/logs/hadoop-root-journalnode-h7.out
h6: starting journalnode, logging to /home/hadoop-2.7.2/logs/hadoop-root-journalnode-h6.out
[root@h5 sbin]# jps
2420 JournalNode
2309 QuorumPeerMain
2461 Jps
[root@h5 sbin]# ^C
?
?
3.???????格式化HDFS
?
在h1上执行命令:
hdfs namenode -format
格式化后会在根据core-site.xml中的hadoop.tmp.dir配置生成个文件
拷贝tmp?到?h2
[root@h1 hadoop-2.7.2]# scp -r tmp/ h2:/home/hadoop-2.7.2/
?
4.?格式化ZK(在h1上执行即可)
?
[root@h1 hadoop-2.7.2]# hdfs zkfc -formatZK
?
5.?启动HDFS(在h1上执行)
?
[root@h1 hadoop-2.7.2]# sbin/start-dfs.sh
16/02/25 05:01:14 WARN hdfs.DFSUtil: Namenode for ns1 remains unresolved for ID null.? Check your hdfs-site.xml file to ensure namenodes are configured properly.
16/02/25 05:01:14 WARN hdfs.DFSUtil: Namenode for ns2 remains unresolved for ID null.? Check your hdfs-site.xml file to ensure namenodes are configured properly.
16/02/25 05:01:14 WARN hdfs.DFSUtil: Namenode for ns3 remains unresolved for ID null.? Check your hdfs-site.xml file to ensure namenodes are configured properly.
Starting namenodes on [h1 h2 masters masters masters]
masters: ssh: Could not resolve hostname masters: Name or service not known
masters: ssh: Could not resolve hostname masters: Name or service not known
masters: ssh: Could not resolve hostname masters: Name or service not known
h2: starting namenode, logging to /home/hadoop-2.7.2/logs/hadoop-root-namenode-h2.out
h1: starting namenode, logging to /home/hadoop-2.7.2/logs/hadoop-root-namenode-h1.out
h5: starting datanode, logging to /home/hadoop-2.7.2/logs/hadoop-root-datanode-h5.out
h7: starting datanode, logging to /home/hadoop-2.7.2/logs/hadoop-root-datanode-h7.out
h6: starting datanode, logging to /home/hadoop-2.7.2/logs/hadoop-root-datanode-h6.out
Starting journal nodes [h5 h6 h7]
h5: journalnode running as process 2420. Stop it first.
h6: journalnode running as process 2885. Stop it first.
h7: journalnode running as process 2896. Stop it first.
Starting ZK Failover Controllers on NN hosts [h1 h2 masters masters masters]
masters: ssh: Could not resolve hostname masters: Name or service not known
masters: ssh: Could not resolve hostname masters: Name or service not known
masters: ssh: Could not resolve hostname masters: Name or service not known
h2: starting zkfc, logging to /home/hadoop-2.7.2/logs/hadoop-root-zkfc-h2.out
h1: starting zkfc, logging to /home/hadoop-2.7.2/logs/hadoop-root-zkfc-h1.out
[root@h1 hadoop-2.7.2]#
?
6.?启动YARN(是在h3上执行start-yarn.sh,把namenode和resourcemanager分开是因为性能问题,因为他们都要占用大量资源,所以把他们分开了,他们分开了就要分别在不同的机器上启动)
?
[root@h3 sbin]# ./start-yarn.sh
?
[root@h4 sbin]# ./yarn-daemons.sh start resourcemanager
?
?
验证:
?
http://192.168.128.11:50070
?
Overview ‘h1:9000‘ (active)
?
?
http://192.168.128.12:50070
?
?
Overview ‘h2:9000‘ (standby)
?
上传文件
[root@h4 bin]# hadoop fs -put /etc/profile /profile
[root@h4 bin]# hadoop fs -ls
ls: `.‘: No such file or directory
[root@h4 bin]# hadoop fs -ls /
Found 1 items
-rw-r--r--?? 3 root supergroup?????? 1814 2016-02-26 19:08 /profile
[root@h4 bin]#
?
杀死h1
[root@h1 sbin]# jps
2480 NameNode
2868 Jps
2775 DFSZKFailoverController
[root@h1 sbin]# kill -9 2480
[root@h1 sbin]# jps
2880 Jps
2775 DFSZKFailoverController
[root@h1 sbin]# hadoop fs -ls /
Found 1 items
-rw-r--r--?? 3 root supergroup?????? 1814 2016-02-26 19:08 /profile
?
此时?h2??变为active
?
手动启动?h1的?namenode
?
[root@h1 sbin]# ./hadoop-daemon.sh start namenode
starting namenode, logging to /home/hadoop-2.7.2/logs/hadoop-root-namenode-h1.out
[root@h1 sbin]# hadoop jar /home/hadoop-2.7.2/s
?
观察? h1?状态为standby
?
验证yarn
?
[root@h1 sbin]# hadoop jar /home/hadoop-2.7.2/share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.2.jar wordcount /profile /out
16/02/26 19:14:23 INFO input.FileInputFormat: Total input paths to process : 1
16/02/26 19:14:23 INFO mapreduce.JobSubmitter: number of splits:1
16/02/26 19:14:23 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_1456484773347_0001
16/02/26 19:14:24 INFO impl.YarnClientImpl: Submitted application application_1456484773347_0001
16/02/26 19:14:24 INFO mapreduce.Job: The url to track the job: http://h3:8088/proxy/application_1456484773347_0001/
16/02/26 19:14:24 INFO mapreduce.Job: Running job: job_1456484773347_0001
16/02/26 19:14:49 INFO mapreduce.Job: Job job_1456484773347_0001 running in uber mode : false
16/02/26 19:14:49 INFO mapreduce.Job:? map 0% reduce 0%
16/02/26 19:15:05 INFO mapreduce.Job:? map 100% reduce 0%
16/02/26 19:15:22 INFO mapreduce.Job:? map 100% reduce 100%
16/02/26 19:15:23 INFO mapreduce.Job: Job job_1456484773347_0001 completed successfully
16/02/26 19:15:23 INFO mapreduce.Job: Counters: 49
??????? File System Counters
??????????????? FILE: Number of bytes read=2099
??????????????? FILE: Number of bytes written=243781
????????????? ??FILE: Number of read operations=0
??????????????? FILE: Number of large read operations=0
??????????????? FILE: Number of write operations=0
??????????????? HDFS: Number of bytes read=1901
??????????????? HDFS: Number of bytes written=1470
????????????? ??HDFS: Number of read operations=6
??????????????? HDFS: Number of large read operations=0
??????????????? HDFS: Number of write operations=2
??????? Job Counters
??????????????? Launched map tasks=1
??????????????? Launched reduce tasks=1
????????????? ??Data-local map tasks=1
??????????????? Total time spent by all maps in occupied slots (ms)=13014
??????????????? Total time spent by all reduces in occupied slots (ms)=13470
??????????????? Total time spent by all map tasks (ms)=13014
??????????????? Total time spent by all reduce tasks (ms)=13470
??????????????? Total vcore-milliseconds taken by all map tasks=13014
??????????????? Total vcore-milliseconds taken by all reduce tasks=13470
??????????????? Total megabyte-milliseconds taken by all map tasks=13326336
??????????????? Total megabyte-milliseconds taken by all reduce tasks=13793280
??????? Map-Reduce Framework
??????????????? Map input records=80
??????????????? Map output records=256
??????????????? Map output bytes=2588
??????????????? Map output materialized bytes=2099
??????????????? Input split bytes=87
??????????????? Combine input records=256
??????????????? Combine output records=156
??????????????? Reduce input groups=156
??????????????? Reduce shuffle bytes=2099
??????????????? Reduce input records=156
??????????????? Reduce output records=156
??????????????? Spilled Records=312
??????????????? Shuffled Maps =1
??????????????? Failed Shuffles=0
??????????????? Merged Map outputs=1
??????????????? GC time elapsed (ms)=395
??????????????? CPU time spent (ms)=4100
??????????????? Physical memory (bytes) snapshot=298807296
??????????????? Virtual memory (bytes) snapshot=4201771008
??????????????? Total committed heap usage (bytes)=138964992
??????? Shuffle Errors
??????????????? BAD_ID=0
?????? ?????????CONNECTION=0
??????????????? IO_ERROR=0
??????????????? WRONG_LENGTH=0
??????????????? WRONG_MAP=0
??????????????? WRONG_REDUCE=0
??????? File Input Format Counters
??????????????? Bytes Read=1814
??????? File Output Format Counters
??????????? ????Bytes Written=1470
[root@h1 sbin]# hadoop fs -ls /
Found 3 items
drwxr-xr-x?? - root supergroup????????? 0 2016-02-26 19:15 /out
-rw-r--r--?? 3 root supergroup?????? 1814 2016-02-26 19:08 /profile
drwx------?? - root supergroup????????? 0 2016-02-26 19:14 /tmp
[root@h1 sbin]#
?
Hadoop ha??集群搭建完成
?
hbase-env.sh:
?
export JAVA_HOME=/home/jdk
export HBASE_MANAGES_ZK=false
?
?
?
hbase-site.xml:
?
?
<configuration>
<property>
<name>hbase.rootdir</name>
<value>hdfs://h1:9000/hbase</value>
</property>
<property>
<name>hbase.cluster.distributed</name>
<value>true</value>
</property>
?
<property>
<name>hbase.master</name>
<value>h1:60000</value>
</property>
?<property>
?<name>hbase.master.port</name>
?<value>60000</value>
?<description>The port master should bind to.</description>
?</property>
?
?
<property>
<name>hbase.zookeeper.quorum</name>
<value>h5,h6,h7</value>
</property>
<property>
<name>dfs.replication</name>
<value>3</value>
</property>
</configuration>
?
注意:$HBASE_HOME/conf/hbase-site.xml的hbase.rootdir的主机和端口号与$HADOOP_HOME/conf/core-site.xml的fs.default.name的主机和端口号一致
?
?
Regionservers:内容为:
h5
h6
h7
?
复制到h2? h5,h6,h7上面
?
?
按照上面启动hadoop? ha??的顺序??先启动好
?
然后在h1,h2上启动hbase
?
./start-hbase.sh
?
?
测试进入?hbase
?
[root@h1 bin]# hbase shell
SLF4J: Class path contains multiple SLF4J bindings.
SLF4J: Found binding in [jar:file:/home/hbase-1.2.0/lib/slf4j-log4j12-1.7.5.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: Found binding in [jar:file:/home/hadoop-2.7.2/share/hadoop/common/lib/slf4j-log4j12-1.7.10.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation.
SLF4J: Actual binding is of type [org.slf4j.impl.Log4jLoggerFactory]
HBase Shell; enter ‘help<RETURN>‘ for list of supported commands.
Type "exit<RETURN>" to leave the HBase Shell
Version 1.2.0, r25b281972df2f5b15c426c8963cbf77dd853a5ad, Thu Feb 18 23:01:49 CST 2016
?
hbase(main):001:0> esit
NameError: undefined local variable or method `esit‘ for #<Object:0x7ad1caa2>
?
hbase(main):002:0> exit
?
至此全部结束。
最新Hadoop-2.7.2+hbase-1.2.0+zookeeper-3.4.8 HA高可用集群配置安装
原文:http://weir2009.iteye.com/blog/2279118