MapReduce案例-上行流量倒序排序(递减排序)

需求:按照上行流量倒序排序(递减排序)
分析:自定义FlowBean,以FlowBean为map输出的key,以手机号作为Map输出的value,因为MapReduce程序会对Map阶段输出的key进行排序。
原始数据:
从左到右分别为
手机号 上行流量 下行流量 上行数据包 下行数据包

1348025310431803180135024688235711034910273351356043965833589224203413600217502372037042662257136028465651529101219381366057799124690969601371919941940024013726230503242468127248113760778710212021201382307000161803360138265441014002641392231446612372012300813925057413694824363110581392625110640024013926435656215124132150136858582835382736591592013325720293620315615989002119318031938182115759611521061215271832017338221241218953119984138413201432164116

Step 1: 定义FlowBean实现WritableComparable实现比较排序
Java 的 compareTo 方法说明:

  • compareTo 方法用于将当前对象与方法的参数进行比较。
  • 如果指定的数与参数相等返回 0。
  • 如果指定的数小于参数返回 -1。
  • 如果指定的数大于参数返回 1。

例如:o1.compareTo(o2); 返回正数的话,当前对象(调用 compareTo 方法的对象 o1)要排在比较对象(compareTo 传参对象 o2)后面,返回负数的话,放在前面。如果我们比较的值均为数字则可以直接相减,比较对象减去当前对象,返回正数的话比较对象更大,放在当前对象前。通过这种方式我们的到的即为递减排序。

package org.example.mapreduce.FlowSort;import org.apache.hadoop.io.WritableComparable;import java.io.DataInput;import java.io.DataOutput;import java.io.IOException;public class FlowSortBean implements WritableComparable <FlowSortBean>{    private Integer upFlow; // 上行数据包数    private Integer downFlow; //下行数据包数    private Integer upCountFlow; //上行流量总和    private Integer downCountFlow; //下行流量总和    public Integer getUpFlow() { return upFlow; }    public void setUpFlow(Integer upFlow) {        this.upFlow = upFlow;    }    public Integer getDownFlow() {        return downFlow;    }    public void setDownFlow(Integer downFlow) {        this.downFlow = downFlow;    }    public Integer getUpCountFlow() {        return upCountFlow;    }    public void setUpCountFlow(Integer upCountFlow) {        this.upCountFlow = upCountFlow;    }    public Integer getDownCountFlow() {        return downCountFlow;    }    public void setDownCountFlow(Integer downCountFlow) {        this.downCountFlow = downCountFlow;    }    @Override    public String toString() {        return upFlow   "\t"   downFlow   "\t"   upCountFlow   "\t"  downCountFlow;    }    //排序    @Override    public int compareTo(FlowSortBean flowSortBean) {        return  flowSortBean.upFlow - this.upFlow ;    }    //序列化    @Override    public void write(DataOutput dataOutput) throws IOException {        dataOutput.writeInt(upFlow);        dataOutput.writeInt(downFlow);        dataOutput.writeInt(upCountFlow);        dataOutput.writeInt(downCountFlow);    }    //反序列化    @Override    public void readFields(DataInput dataInput) throws IOException {        this.upFlow = dataInput.readInt();        this.downFlow = dataInput.readInt();        this.upCountFlow = dataInput.readInt();        this.downCountFlow = dataInput.readInt();    }}

Step 2: 定义FlowSortMapper类

package org.example.mapreduce.FlowSort;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Mapper;import org.example.mapreduce.Flow.FlowBean;import java.io.IOException;public class FlowSortMapper extends Mapper<LongWritable, Text,FlowSortBean,Text> {    @Override    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {        //K1:偏移量        //V1: 一行字符串,首先对字符串进行切分        String[] split = value.toString().split("\t");        String phone = split[0];        //创建FlowBean对象        FlowSortBean flowSortBean = new FlowSortBean();        //将字符串转换为数字        flowSortBean.setUpFlow(Integer.parseInt(split[1]));        flowSortBean.setDownFlow(Integer.parseInt(split[2]));        flowSortBean.setUpCountFlow(Integer.parseInt(split[3]));        flowSortBean.setDownCountFlow(Integer.parseInt(split[4]));        context.write(flowSortBean,new Text(phone));    }}

Step 3:自定义FlowReducer类

package org.example.mapreduce.FlowSort;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Reducer;import java.io.IOException;public class FlowSortReducer extends Reducer<FlowSortBean,Text,Text,FlowSortBean> {    @Override    protected void reduce(FlowSortBean key, Iterable<Text> values, Context context) throws IOException, InterruptedException {        //在经历过mapper之后我们的数据  :        //      k2                   V2        //  flowSortBean1   <手机号1,手机号2,手机号3>        //  flowSortBean2   <手机号4,手机号5>        //因此我们对Value进行遍历,写入到上下文。        for (Text value:values) {            context.write(value, key);        }    }}

Step 4: 程序main函数入口FlowSortMain

package org.example.mapreduce.FlowSort;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.conf.Configured;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.NullWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;import org.apache.hadoop.util.Tool;import org.apache.hadoop.util.ToolRunner;public class FlowSortJobMain extends Configured implements Tool {    @Override    public int run(String[] strings) throws Exception {        //步骤一:创建一个Job任务对象        Job job = Job.getInstance(super.getConf(),"mapreduce_flowSort");        //步骤二:配置JOb任务对象        //1,指定文件的读取方式和读取路径        job.setInputFormatClass(TextInputFormat.class);        TextInputFormat.addInputPath(job,new Path("file:///C:\\Myprogram\\IN\\FlowCount"));        //2,指定map阶段的处理方式和数据类型        job.setMapperClass(FlowSortMapper.class);        //设置map阶段K2的数据类型        job.setMapOutputKeyClass(FlowSortBean.class);        //设置map阶段V2的数据类型        job.setMapOutputValueClass(Text.class);        //3,分区 4,排序 5,规约 6,分组        //7,设置reduce阶段的处理方式和数据类型        job.setReducerClass(FlowSortReducer.class);        //设置reduce阶段的K3的数据类型        job.setOutputKeyClass(Text.class);        //设置reduce阶段的V3的数据类型        job.setOutputValueClass(FlowSortBean.class);        //8,设置输出路径        job.setOutputFormatClass(TextOutputFormat.class);        TextOutputFormat.setOutputPath(job,new Path("file:///C:\\Myprogram\\OUT\\flowsort_out5"));        boolean over = job.waitForCompletion(true);        return over ? 0 : 1;    }    public static void main(String[] args) throws Exception {        Configuration configuration = new Configuration();        int run = ToolRunner.run(configuration,new FlowSortJobMain(),args);        System.exit(run);    }}

完美撒花!

来源:https://www.icode9.com/content-4-840151.html

(0)

相关推荐