通过JAVA直接读取HDFS中的时候,一定会用到FSDataInputStream类,通过FSDataInputStream以流的形式从HDFS读数据代码如下:
import java.io.IOException; import java.net.URI; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; public class FileReadFromHdfs { public static void main(String[] args) { try { String dsf = "hdfs://hadoop1:9000/tmp/wordcount/kkk.txt"; Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(URI.create(dsf),conf); FSDataInputStream hdfsInStream = fs.open(new Path(dsf)); byte[] ioBuffer = new byte[1024]; int readLen = hdfsInStream.read(ioBuffer); while(readLen!=-1) { System.out.write(ioBuffer, 0, readLen); readLen = hdfsInStream.read(ioBuffer); } hdfsInStream.close(); fs.close(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } }
原文:http://www.cnblogs.com/gaopeng527/p/4992579.html