HDFS 设计的主要目的是对海量数据进行存储,也就是说在其上能够存储很大量的文件。
HDFS 将这些文件分割之后,存储在不同的 DataNode 上,HDFS 提供了通过Java API 对 HDFS 里面的文件进行操作的功能,数据块在 DataNode 上的存放位置,对于开发者来说是透明的。
使用 Java API 可以完成对 HDFS 的各种操作,如新建文件、删除文件、读取文件内容等。下面将介绍 HDFS 常用的 Java API 及其编程实例。
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>3.1.1</version>
</dependency>
Configuration conf;
FileSystem fileSystem;
public HdfsAPI() {
conf = new Configuration();
conf.set("dfs.replication", "2");
conf.set("dfs.blocksize", "128m");
try {
fileSystem = FileSystem.get(new URI("hdfs://${NameNode}:9000"), conf, "hadoop");
} catch (Exception e) {
e.printStackTrace();
}
}
public void testGet() throws IllegalArgumentException, IOException {
fileSystem.copyToLocalFile(new Path("/output/part.txt"), new Path("~/Downloads"));
fileSystem.close();
}
public void testLs() throws IllegalArgumentException, IOException {
RemoteIterator<LocatedFileStatus> listFiles = fileSystem.listFiles(new Path("/"), true);
while (listFiles.hasNext()) {
LocatedFileStatus status = listFiles.next();
System.out.println("路径:" + status.getPath());
System.out.println("块大小:" + status.getBlockSize());
System.out.println("文件长度:" + status.getLen());
System.out.println("副本数:" + status.getReplication());
System.out.println("块的位置信息:" + Arrays.toString(status.getBlockLocations()) + "\n");
}
fileSystem.close();
}
public void testMkdir() throws IllegalArgumentException, IOException {
fileSystem.mkdirs(new Path("/output/test/testmk"));
fileSystem.close();
}
public void testDeldir() throws IllegalArgumentException, IOException {
boolean delete = fileSystem.delete(new Path("/output/test/testmk"), true);
if (delete) {
System.out.println("文件已经删除");
}
fileSystem.close();
}
public void testReadData() throws IOException {
FSDataInputStream in = fileSystem.open(new Path("/test.txt"));//hdfs自带流打开文件
BufferedReader br = new BufferedReader(new InputStreamReader(in, "utf-8"));//读入流并放在缓冲区
String line = null;
while ((line = br.readLine()) != null) {
System.out.println(line);
}
in.close();
br.close();
fileSystem.close();
}
public void testRandomReadData() throws IOException {
FSDataInputStream in = fileSystem.open(new Path("/test.txt"));
in.seek(12);//定位到12位置开始读
byte[] buf = new byte[16];//往后读取16个位
in.read(buf);//ba流读到buf中
System.out.println(new String(buf));
in.close();
fileSystem.close();
}
public void testWriteData() throws IOException {
FSDataOutputStream out = fileSystem.create(new Path("/yy.jpg"), false);
FileInputStream in = new FileInputStream("~/Download/wechatpic_20190309221605.jpg");
byte[] buf = new byte[1024];
int read = 0;
while ((read = in.read(buf)) != -1) {
out.write(buf, 0, read);
}
out.close();
fileSystem.close();
}
Hadoop系列(四)Hadoop三大核心之HDFS Java API
原文:https://www.cnblogs.com/valjeanshaw/p/11443124.html