MapReduce案例-上行流量倒序排序(递减排序)
需求:按照上行流量倒序排序(递减排序)
分析:自定义FlowBean,以FlowBean为map输出的key,以手机号作为Map输出的value,因为MapReduce程序会对Map阶段输出的key进行排序。
原始数据:
从左到右分别为
手机号 上行流量 下行流量 上行数据包 下行数据包
1348025310431803180135024688235711034910273351356043965833589224203413600217502372037042662257136028465651529101219381366057799124690969601371919941940024013726230503242468127248113760778710212021201382307000161803360138265441014002641392231446612372012300813925057413694824363110581392625110640024013926435656215124132150136858582835382736591592013325720293620315615989002119318031938182115759611521061215271832017338221241218953119984138413201432164116
Step 1: 定义FlowBean实现WritableComparable实现比较排序
Java 的 compareTo 方法说明:
- compareTo 方法用于将当前对象与方法的参数进行比较。
- 如果指定的数与参数相等返回 0。
- 如果指定的数小于参数返回 -1。
- 如果指定的数大于参数返回 1。
例如:o1.compareTo(o2); 返回正数的话,当前对象(调用 compareTo 方法的对象 o1)要排在比较对象(compareTo 传参对象 o2)后面,返回负数的话,放在前面。如果我们比较的值均为数字则可以直接相减,比较对象减去当前对象,返回正数的话比较对象更大,放在当前对象前。通过这种方式我们的到的即为递减排序。
package org.example.mapreduce.FlowSort;import org.apache.hadoop.io.WritableComparable;import java.io.DataInput;import java.io.DataOutput;import java.io.IOException;public class FlowSortBean implements WritableComparable <FlowSortBean>{ private Integer upFlow; // 上行数据包数 private Integer downFlow; //下行数据包数 private Integer upCountFlow; //上行流量总和 private Integer downCountFlow; //下行流量总和 public Integer getUpFlow() { return upFlow; } public void setUpFlow(Integer upFlow) { this.upFlow = upFlow; } public Integer getDownFlow() { return downFlow; } public void setDownFlow(Integer downFlow) { this.downFlow = downFlow; } public Integer getUpCountFlow() { return upCountFlow; } public void setUpCountFlow(Integer upCountFlow) { this.upCountFlow = upCountFlow; } public Integer getDownCountFlow() { return downCountFlow; } public void setDownCountFlow(Integer downCountFlow) { this.downCountFlow = downCountFlow; } @Override public String toString() { return upFlow "\t" downFlow "\t" upCountFlow "\t" downCountFlow; } //排序 @Override public int compareTo(FlowSortBean flowSortBean) { return flowSortBean.upFlow - this.upFlow ; } //序列化 @Override public void write(DataOutput dataOutput) throws IOException { dataOutput.writeInt(upFlow); dataOutput.writeInt(downFlow); dataOutput.writeInt(upCountFlow); dataOutput.writeInt(downCountFlow); } //反序列化 @Override public void readFields(DataInput dataInput) throws IOException { this.upFlow = dataInput.readInt(); this.downFlow = dataInput.readInt(); this.upCountFlow = dataInput.readInt(); this.downCountFlow = dataInput.readInt(); }}
Step 2: 定义FlowSortMapper类
package org.example.mapreduce.FlowSort;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Mapper;import org.example.mapreduce.Flow.FlowBean;import java.io.IOException;public class FlowSortMapper extends Mapper<LongWritable, Text,FlowSortBean,Text> { @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { //K1:偏移量 //V1: 一行字符串,首先对字符串进行切分 String[] split = value.toString().split("\t"); String phone = split[0]; //创建FlowBean对象 FlowSortBean flowSortBean = new FlowSortBean(); //将字符串转换为数字 flowSortBean.setUpFlow(Integer.parseInt(split[1])); flowSortBean.setDownFlow(Integer.parseInt(split[2])); flowSortBean.setUpCountFlow(Integer.parseInt(split[3])); flowSortBean.setDownCountFlow(Integer.parseInt(split[4])); context.write(flowSortBean,new Text(phone)); }}
Step 3:自定义FlowReducer类
package org.example.mapreduce.FlowSort;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Reducer;import java.io.IOException;public class FlowSortReducer extends Reducer<FlowSortBean,Text,Text,FlowSortBean> { @Override protected void reduce(FlowSortBean key, Iterable<Text> values, Context context) throws IOException, InterruptedException { //在经历过mapper之后我们的数据 : // k2 V2 // flowSortBean1 <手机号1,手机号2,手机号3> // flowSortBean2 <手机号4,手机号5> //因此我们对Value进行遍历,写入到上下文。 for (Text value:values) { context.write(value, key); } }}
Step 4: 程序main函数入口FlowSortMain
package org.example.mapreduce.FlowSort;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.conf.Configured;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.NullWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;import org.apache.hadoop.util.Tool;import org.apache.hadoop.util.ToolRunner;public class FlowSortJobMain extends Configured implements Tool { @Override public int run(String[] strings) throws Exception { //步骤一:创建一个Job任务对象 Job job = Job.getInstance(super.getConf(),"mapreduce_flowSort"); //步骤二:配置JOb任务对象 //1,指定文件的读取方式和读取路径 job.setInputFormatClass(TextInputFormat.class); TextInputFormat.addInputPath(job,new Path("file:///C:\\Myprogram\\IN\\FlowCount")); //2,指定map阶段的处理方式和数据类型 job.setMapperClass(FlowSortMapper.class); //设置map阶段K2的数据类型 job.setMapOutputKeyClass(FlowSortBean.class); //设置map阶段V2的数据类型 job.setMapOutputValueClass(Text.class); //3,分区 4,排序 5,规约 6,分组 //7,设置reduce阶段的处理方式和数据类型 job.setReducerClass(FlowSortReducer.class); //设置reduce阶段的K3的数据类型 job.setOutputKeyClass(Text.class); //设置reduce阶段的V3的数据类型 job.setOutputValueClass(FlowSortBean.class); //8,设置输出路径 job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job,new Path("file:///C:\\Myprogram\\OUT\\flowsort_out5")); boolean over = job.waitForCompletion(true); return over ? 0 : 1; } public static void main(String[] args) throws Exception { Configuration configuration = new Configuration(); int run = ToolRunner.run(configuration,new FlowSortJobMain(),args); System.exit(run); }}
完美撒花!
赞 (0)