package com.eric.hadoop.map;
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
public class MaxTemperatureMapper extends MapReduceBase implements
Mapper<LongWritable, Text, Text, IntWritable> {
private static final int MISSING = 9999;
public void map(LongWritable fileOffset, Text lineRecord,
OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException {
System.out.println("##Processing Record:" + lineRecord.toString());
String line = lineRecord.toString();
String year = line.substring(15, 19);
int temperature;
if (line.charAt(87) == '+') {
temperature = Integer.parseInt(line.substring(88, 92));
} else {
temperature = Integer.parseInt(line.substring(87, 92));
}
String quality = line.substring(92, 93);
if (temperature != MISSING && quality.matches("[01459]")) {
output.collect(new Text(year), new IntWritable(temperature));
}
}
}
package com.eric.hadoop.reduce;
import java.io.IOException;
import java.util.Iterator;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
public class MaxTemperatureReduce extends MapReduceBase implements
Reducer<Text, IntWritable, Text, IntWritable> {
public void reduce(Text year, Iterator<IntWritable> temperatures,
OutputCollector<Text, IntWritable> output, Reporter arg3) throws IOException {
int maxTemperature = Integer.MIN_VALUE;
System.out.println("##Processing temperatures:" + temperatures);
while (temperatures.hasNext()) {
maxTemperature = Math.max(maxTemperature, temperatures.next().get());
}
output.collect(year, new IntWritable(maxTemperature));
}
}
package com.eric.hadoop.jobconfig;
import java.io.IOException;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import com.eric.hadoop.map.MaxTemperatureMapper;
import com.eric.hadoop.reduce.MaxTemperatureReduce;
public class MaxTemperature {
public static void main(String[] args) throws IOException {
JobConf conf = new JobConf(MaxTemperature.class);
conf.setJobName("Get Max Temperature!");
if (args.length != 2) {
System.err.print("Must contain 2 params:inputPath OutputPath");
System.exit(0);
}
FileInputFormat.addInputPaths(conf, args[0]);
FileOutputFormat.setOutputPath(conf, new Path(args[1]));
conf.setMapperClass(MaxTemperatureMapper.class);
conf.setReducerClass(MaxTemperatureReduce.class);
conf.setOutputKeyClass(Text.class);
conf.setOutputValueClass(IntWritable.class);
JobClient.runJob(conf);
}
}
1. 将/etc/hosts中的主机名与/etc/sysconfig/network中的HOSTNAME一致,修改对应的文件后重启系统
原文:http://blog.csdn.net/eric_sunah/article/details/41515085