import contextlib import pyhdfs class HdfsUtil(object): def __init__(self, hosts=‘namenode1:9870,namenode2:9870‘, user_name=‘hdfs‘): self.hosts = hosts self.user_name = user_name self.fs = pyhdfs.HdfsClient(hosts=self.hosts, user_name=self.user_name) def listdir(self, dir_name): return self.fs.listdir(dir_name) def copy_from_local(self, local_file, hdfs_file): return self.fs.copy_from_local(local_file, hdfs_file) def copy_to_local(self, hdfs_file, local_file): return self.fs.copy_to_local(hdfs_file, local_file) def delete(self, hdfs_file, recursive=True): return self.fs.delete(hdfs_file, recursive=recursive) def exists(self, hdfs_file): return self.fs.exists(hdfs_file) def get_fs(self): return self.fs def create(self, hdfs_file): return self.fs.create(hdfs_file, ‘‘.encode(‘utf8‘)) def read(self, hdfs_file): result = [] with contextlib.closing(self.fs.open(hdfs_file)) as f: line = f.readline() while line: result.append(str(line, encoding=‘utf8‘)) line = f.readline() return ‘‘.join(result) def write(self, hdfs_file, data: bytes): if not self.exists(hdfs_file): self.create(hdfs_file) self.fs.append(hdfs_file, data)
原文:https://www.cnblogs.com/wangbin2188/p/14591230.html