# useradd -d /opt/hadoop hadoop
# passwd hadoop
# chown -R hadoop:hadoop hadoop/
把hadoop用户分别加上sudo权限
# vim /etc/sudoers
root ALL=(ALL) ALL
hadoop ALL=(ALL) ALL
1) 分别切换到hadoop账户,并生成rsa公钥(三台机器上均执行):
# su – hadoop
$ ssh-keygen -t rsa -P "" //输入yes,一直回车
2) 将本机公钥加入authorized_keys文件中(三台机器上均执行):
$ cd ~/.ssh/
$ cat id_rsa.pub >> authorized_keys
3) 复制baseline02机器的公钥到baseline01机器上:
$ cd ~/.ssh/
$ cp id_rsa.pub id_rsa_104.pub
$ scp id_rsa_104.pub hadoop@baseline01:/opt/hadoop/.ssh/
4) 复制baseline03机器的公钥到baseline01机器上:
$ cd ~/.ssh/
$ cp id_rsa.pub id_rsa_105.pub
$ scp id_rsa_105.pub hadoop@baseline01:/opt/hadoop/.ssh/
5) 在baseline01上把另外两台机器的公钥都加入authorized_keys文件中,删除备份文件:
$ cat id_rsa_104.pub >> authorized_keys
$ cat id_rsa_105.pub >> authorized_keys
$ rm id_rsa_10*.pub
6) 把baseline01中的authorized_keys文件发送到其它两台机器上:
$ scp authorized_keys hadoop@baseline02:/opt/hadoop/.ssh/
$ scp authorized_keys hadoop@baseline03:/opt/hadoop/.ssh/
7) 三台机器分别修改authorized_keys文件权限
$ chmod 700 authorized_keys
8) 三台机器分别测试免密钥登陆:
$ ssh baseline01
$ ssh baseline02
$ ssh baseline03
免密码登录方法2:只适合机器1到机器2和3上是免密码的。机器2的机器1还是要输入密码的额。(推荐使用)
hadoop的安装
版本: 2.6.0,
安装机器:所有机器
安装过程:
# su hadoop
$ cd /opt/software
$ tar zxvf hadoop-${version}.tar.gz -C /opt/hadoop
$ cd /opt/hadoop
$ ln -s /opt/hadoop/hadoop-${version} /opt/hadoop/hadoop
$ cd /opt/hadoop/hadoop
$ mkdir -p tmpdir
配置hadoop-env.sh文件
$ cd /opt/hadoop/hadoop/etc/hadoop/
$ mkdir -p /opt/hadoop/hadoop/pids
$ vi hadoop-env.sh
在hadoop-env.sh文件中添加如下配置:
export JAVA_HOME=/opt/java
export HADOOP_PID_DIR=/opt/hadoop/hadoop/pids
配置mapred-env.sh文件
$ cd /opt/hadoop/hadoop/etc/hadoop/
$ vi mapred-env.sh
在mapred-env.sh文件中添加如下配置:
export JAVA_HOME=/opt/java
$ cd /opt/hadoop/hadoop/etc/hadoop/
$ vi core-site.xml
在core-site.xml文件中添加如下配置:
<configuration>
<property>
<name>hadoop.tmp.dir</name>
<value>/opt/hadoop/hadoop/tmpdir</value>
</property>
<property>
<name>fs.defaultFS</name>
<value>hdfs://baseline01:9000</value>
</property>
<property>
<name>io.file.buffer.size</name>
<value>131072</value>
</property>
<property>
<name>fs.trash.interval</name>
<value>1440</value>
</property>
</configuration>
注意:fs.defaultFS项配置的是master的ip地址。
配置hdfs-site.xml文件
$ cd /opt/hadoop/hadoop/etc/hadoop/
$ vi hdfs-site.xml
在hdfs-site.xml文件中添加如下配置:
<configuration>
<property>
<name>dfs.replication</name>
<value>2</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>file:/opt/hadoop/hadoop/tmpdir/dfs/name</value>
</property>
<property>
<name>dfs.datanode.name.dir</name>
<value>file:/opt/hadoop/hadoop/tmpdir/dfs/data</value>
</property>
<property>
<name>dfs.webhdfs.enabled</name>
<value>true</value>
</property>
<property>
<name>dfs.secondary.http.address</name>
<value>baseline01:50090</value>
</property>
</configuration>
配置mapred-site.xml文件
$ cd /opt/hadoop/hadoop/etc/hadoop/
$ vi mapred-site.xml
在mapred-site.xml文件中添加如下配置:
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.jobhistory.address</name>
<value>baseline01:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>baseline01:19888</value>
</property>
</configuration>
配置yarn-site.xml文件
$ cd /opt/hadoop/hadoop/etc/hadoop/
$ vi yarn-site.xml
在yarn-site.xml文件中添加如下配置:
<configuration>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.address</name>
<value>baseline01:8030</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address</name>
<value>baseline01:8031</value>
</property>
<property>
<name>yarn.resourcemanager.address</name>
<value>baseline01:8032</value>
</property>
<property>
<name>yarn.resourcemanager.admin.address</name>
<value>baseline01:8033</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address</name>
<value>baseline01:8088</value>
</property>
<!-- Site specific YARN configuration properties -->
</configuration>
配置hadoop运行的环境变量
# vim /etc/profile
export HADOOP_HOME=/opt/hadoop/hadoop
export PATH=$HADOOP_HOME/bin:$PATH
配置成功后,执行source /etc/profile使配置生效
修改slaves文件:
$ cd /opt/hadoop/hadoop/etc/hadoop
$ vi slaves
在slaves文件中添加
baseline01
baseline02
baseline03
$ scp -r /opt/hadoop/hadoop-${version} hadoop@baseline02:/opt/hadoop
$ ln -s /opt/hadoop/hadoop-${version} /opt/hadoop/hadoop
$ scp -r /opt/hadoop/hadoop-${version} hadoop@baseline03:/opt/hadoop
$ ln -s /opt/hadoop/hadoop-${version} /opt/hadoop/hadoop
$ hadoop namenode -format
$ ${HADOOP_HOME}/sbin/start-all.sh
$ ${HADOOP_HOME}/sbin/mr-jobhistory-daemon.sh start historyserver
start-all.sh包含dfs和yarn两个模块的启动,分别为start-dfs.sh 、 start-yarn.sh,所以dfs和yarn可以单独启动。
在103上输出:
24429 Jps
22898 ResourceManager
23229 NodeManager
24383 JobHistoryServer
22722 SecondaryNameNode
22488 NameNode
6945 DataNode
在baseline02上输出:
7650 DataNode
7788 NodeManager
8018 Jps
在baseline03上输出:
28407 Jps
28038 DataNode
28178 NodeManager
如果三台机器正常输出上述内容,则表示hadoop集群的服务正常工作。
登陆baseline01
su -hadoop
cd /opt/hadoop/hadoop
创建hdfs目录
$ hadoop fs -mkdir -p /tmp
$ hadoop chmod 777 /tmp
$ hadoop fs -mkdir -p /user/baseline
$ hadoop fs -chown -R baseline /user/baseline
$ touch file1.txt
file1.txt中输入Hello World
file2.txt中输入Hello Hadoop
将file1.txt和file2.txt导入到hdfs中
$ hadoop fs -mkdir -p /user/hadoop/input
$ hadoop fs -put file1.txt file2.txt /home/user/hadoop/input
运行hadoop自带word count程序
$ hadoop jar /opt/hadoop/hadoop/share/hadoop/mapreduce/hadoop-mapreduce-examples-2.6.0.jar wordcount /user/hadoop/input /user/hadoop/output
查看word count程序的运行结果
$ hadoop fs -cat /user/hadoop/output/part-r-00000
Hadoop 1
Hello 2
Word 1
表明mapreduce程序运行正常
访问hadoop的服务页面:在浏览器中输入如下地址
如能正常反应,则表明上述三类服务正常运行
原文:http://www.cnblogs.com/wyl9527/p/6389432.html