首先准备程序代码
import java.io.IOException; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; public class WCMapper extends Mapper<LongWritable,Text , Text, LongWritable> { @Override protected void map(LongWritable key, Text value,Context context) { //接受value数据 String line=value.toString(); //切分数据 String[] words=line.split(" "); //循环 for(String w : words) { //出现一次 寄一个1 输出 try { context.write(new Text(w), new LongWritable(1)); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (InterruptedException e) { // TODO Auto-generated catch block e.printStackTrace(); } } } } import java.io.IOException; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Reducer; public class WCReduce extends Reducer<Text, LongWritable, Text, LongWritable>{ @Override protected void reduce(Text key, Iterable<LongWritable> v2s,Context context) throws IOException, InterruptedException { //接收数据 //定义一个计算器 long counter =0; //循环 v2s for(LongWritable i : v2s) { counter+=i.get();//返回 long 类型 } //输出 context.write(key,new LongWritable(counter)); } } import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; public class WordCount { public static void main(String[] args) throws Exception { Job job=Job.getInstance(new Configuration()); //main 所在的类 job.setJarByClass(WordCount.class); job.setMapperClass(WCMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(LongWritable.class); FileInputFormat.setInputPaths(job, new Path("/words.txt")); //设置 reducer 相关 属性 job.setReducerClass(WCReduce.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); FileOutputFormat.setOutputPath(job, new Path("/wcout666")); job.waitForCompletion(true); } } <dependencies> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-common</artifactId> <version>2.9.0</version> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-hdfs</artifactId> <version>2.9.0</version> </dependency> <!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-mapreduce-client-core --> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-mapreduce-client-core</artifactId> <version>2.9.0</version> </dependency> </dependencies>
main方法的最后一个 waitforcomplete 表示将运行进度等信息及时输出给用户
编译器和集群 jdk版本要一样
maven打包
IDEA 先添加包文件 FILE ------> Project Structure ---------> Artifacts 打包 Build ------> Build Artifact ----------------> BUILD 生成jar包
运行
Hadoop jar xxxx.jar WordCount
原文:https://www.cnblogs.com/tangsonghuai/p/11029167.html