1. 测试文件生成程序,参考
MapReduce程序示例如下:
2. 新建Maven项目 hadoop
3. pom.xml
4.0.0 com.java hadoop 1.0.0 org.apache.hadoop hadoop-common 3.2.0 org.apache.hadoop hadoop-hdfs 3.2.0 org.apache.hadoop hadoop-client 3.2.0 ${project.artifactId} org.apache.maven.plugins maven-compiler-plugin 3.8.0
4. MaxMapper.java
package com.java.mapreduce;import java.io.IOException;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Mapper;/** * 按年份映射分组 * * @author Logan * @createDate 2019-03-18 * @version 1.0.0 * */public class MaxMapper extends Mapper{ @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String line = value.toString(); String year = line.substring(0, 4); int num = Integer.parseInt(line.substring(8, 12)); context.write(new Text(year), new IntWritable(num)); }}
5. MaxReducer.java
package com.java.mapreduce;import java.io.IOException;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Reducer;/** * 计算每年数据中的最大值 * * @author Logan * @createDate 2019-03-18 * @version 1.0.0 * */public class MaxReducer extends Reducer{ @Override protected void reduce(Text key, Iterable values, Context context) throws IOException, InterruptedException { int max = Integer.MIN_VALUE; for (IntWritable value : values) { max = Math.max(max, value.get()); } context.write(key, new IntWritable(max)); }}
6. MaxJob.java
package com.java.mapreduce;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;/** * 主程序入口类 * * @author Logan * @createDate 2019-03-18 * @version 1.0.0 * */public class MaxJob { public static void main(String[] args) { try { Job job = Job.getInstance(); job.setJarByClass(MaxJob.class); job.setJobName("Get Max"); // 输入第一个参数为文件输入路径 FileInputFormat.addInputPath(job, new Path(args[0])); // 输入第二个参数为输出结果文件路径 FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(MaxMapper.class); job.setReducerClass(MaxReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.waitForCompletion(true); } catch (Exception e) { e.printStackTrace(); } }}
.