hdfs集群配置
#核心配置参数:
1)指定hadoop的默认文件系统为:hdfs
2)指定hdfs的namenode节点为哪台机器
3)指定namenode软件存储元数据的本地目录
4)指定datanode软件存放文件块的本地目录
1.vim hadoop-env.sh
export JAVA_HOME=/usr/local/software/jdk1.8.0_11/
2.vim core-site.xml
#指定HDFS协议和NameNode(pengyy22)
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://pengyy22:9000</value>
</property>
</configuration>
3.vim hdfs-site.xml
#指定namenode软件存储元数据的本地目录
<property>
<name>dfs.namenode.name.dir</name>
<value>/usr/local/hadoop/name</value>
</property>
#指定datanode软件存放文件块的本地目录
<property>
<name>dfs.datanode.data.dir</name>
<value>/usr/local/hadoop/data</value>
</property>
4.scp
#将整个hadoop目录远程复制到各自DataNode的服务器中
scp -r hadoop-2.8.1/ pengyy42:/usr/local/software/
scp -r hadoop-2.8.1/ pengyy43:/usr/local/software/
scp -r hadoop-2.8.1/ pengyy44:/usr/local/software/
5.vim /etc/profile
#配置环境变量
#pengyy start
JAVA_HOME=/usr/local/software/jdk1.8.0_11
CLASSPATH=.:$JAVA_HOME/lib.tools.jar
HADOOP_HOME=/usr/local/software/hadoop-2.8.1
ZOOKEEPER_HOME=/usr/local/software/zookeeper
PATH=$JAVA_HOME/bin:$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$ZOOKEEPER_HOME/bin
export JAVA_HOME CLASSPATH HADOOP_HOME ZOOKEEPER_HOME
#pengyy end
source /etc/profile
6.初始化 NameNode 的元数据目录
#初始化 namenode 的元数据存储目录 (具体生成的目录由 hdfs-site.xml 指定)
#创建一个全新的元数据存储目录
#生成记录元数据的文件fsimage
#?生成集群的相关标识:如:集群id——clusterID
hadoop namenode -format
7.启动进程
#启动NameNode进程
hadoop-daemon.sh start namenode
#关闭NameNode进程
hadoop-daemon.sh stop namenode
#查看进程 jps
[root@pengyy22 logs]# jps
2984 Jps
2907 NameNode
#查看端口号 netstat -anp|grep 2907
[root@pengyy22 logs]# netstat -anp|grep 2907 netstat -nltp|grep 2907
tcp 0 0 192.168.31.22:9000 0.0.0.0:* LISTEN 2907/java
tcp 0 0 0.0.0.0:50070 0.0.0.0:* LISTEN 2907/java
[root@pengyy22 logs]# netstat -nltp|grep 2907
tcp 0 0 192.168.31.22:9000 0.0.0.0:* LISTEN 2907/java
tcp 0 0 0.0.0.0:50070 0.0.0.0:* LISTEN 2907/java
#浏览器访问
http://192.168.31.22:50070 或 http://pengyy22:50070
#启动DataNode进程
hadoop-daemon.sh start datanode
#关闭DataNode进程
hadoop-daemon.sh stop datanode
[root@pengyy42 hadoop]# jps
6038 Jps
1515 QuorumPeerMain
5963 DataNode
#浏览器访问
http://pengyy42:50075
8.批量启动脚本
1)cd /usr/local/software/hadoop-2.8.1/etc/hadoop/
2)vim slaves
pengyy22
pengyy42
pengyy43
pengyy44
3)启动、关闭
start-dfs.sh
stop-dfs.sh
[root@pengyy22 hadoop-2.8.1]# start-dfs.sh
Starting namenodes on [pengyy22]
pengyy22: starting namenode, logging to /usr/local/software/hadoop-2.8.1/logs/hadoop-root-namenode-pengyy22.out
pengyy42: starting datanode, logging to /usr/local/software/hadoop-2.8.1/logs/hadoop-root-datanode-pengyy42.out
pengyy43: starting datanode, logging to /usr/local/software/hadoop-2.8.1/logs/hadoop-root-datanode-pengyy43.out
pengyy44: starting datanode, logging to /usr/local/software/hadoop-2.8.1/logs/hadoop-root-datanode-pengyy44.out
pengyy22: starting datanode, logging to /usr/local/software/hadoop-2.8.1/logs/hadoop-root-datanode-pengyy22.out
Starting secondary namenodes [0.0.0.0]
0.0.0.0: starting secondarynamenode, logging to /usr/local/software/hadoop-2.8.1/logs/hadoop-root-secondarynamenode-pengyy22.out
[root@pengyy22 hadoop-2.8.1]# jps
26514 SecondaryNameNode
26661 Jps
26246 NameNode
26379 DataNode
#Starting secondary namenodes [0.0.0.0] 对于 secondary namenodes 可以在 hdfs-site.xml 中指定
#以下是将 NameNode 和 SecondaryNameNode 分开指定,将 SecondaryNameNode 指定在 pengyy42 服务器上
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>pengyy42:50090</value>
</property>:
#将 hdfs-site.xml 传输到 DataNode 服务器上 $PWD : 表示当前目录
scp hdfs-site.xml pengyy42:$PWD
scp hdfs-site.xml pengyy43:$PWD
scp hdfs-site.xml pengyy44:$PWD
9.命令行操作hdfs
hadoop fs -ls /
hadoop fs -put /software/jdk-8u11-linux-x64.tar.gz /
hadoop fs -get /jdk-8u11-linux-x64.tar.gz
hadoop fs -mkdir /aaa
hadoop fs -mv /jdk-8u11-linux-x64.tar.gz /aaa/
hadoop fs -ls /aaa/
hadoop fs -rm -r /aaa
hadoop fs -appendToFile qinghsu.txt /install.log
hadoop fs -tail -f /install.log
hadoop fs -cat /install.log
java操作hdfs
1.引入jar包
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>2.8.1</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>2.8.1</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>2.8.1</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-core</artifactId>
<version>2.8.1</version>
</dependency>
/** Missing artifact jdk.tools:jdk.tools:jar:1.8
tools.jar包是JDK自带的,pom.xml中以来的包隐式依赖tools.jar包,而tools.jar并未在库中,
只需要将tools.jar包添加到jdk库中即可*/
<dependency>
<groupId>jdk.tools</groupId>
<artifactId>jdk.tools</artifactId>
<version>1.8</version>
<scope>system</scope>
<systemPath>${JAVA_HOME}/lib/tools.jar</systemPath>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.12</version>
</dependency>
//从hadoop上下载引出的问题
java.lang.RuntimeException: java.io.FileNotFoundException: java.io.FileNotFoundException: HADOOP_HOME and hadoop.home.dir are unset.
//解决方案:本地配置windows版本的hadoop HADOOP_HOME=D:\Learn\hadoop-2.8.1
原文:https://www.cnblogs.com/pengyy/p/11318254.html