Traffic statistics of MapReduce

Posted by Hellomonkey on Tue, 19 Nov 2019 18:39:37 +0100

Traffic statistics of MapReduce

means of preparation

  1. A virtual machine with hadoop installed
  2. An online traffic data

Open hadoop

start-all.sh
Upload flow.log file
hadoop fs -put /flow.log /

pojo level

import lombok.Data;
import org.apache.hadoop.io.Writable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

//Writable is a class that can be serialized and deserialized
@Data
public class TelBean implements Writable {
    private String tel;
    private Long up;
    private Long down;
    private Long total;

    public TelBean() {
    }

    public TelBean(String tel, Long up, Long down) {
        this.tel = tel;
        this.up = up;
        this.down = down;
        this.total = up + down;
    }

    //serialize
    public void write(DataOutput out) throws IOException {
        out.writeUTF(tel);
        out.writeLong(up);
        out.writeLong(down);
        out.writeLong(total);
    }

    //De serialization
    public void readFields(DataInput in) throws IOException {
        this.tel = in.readUTF();
        this.up = in.readLong();
        this.down = in.readLong();
        this.total = in.readLong();
    }
}

mapper level

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;

public class TCMapper extends Mapper<LongWritable, Text,Text,TelBean> {
    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        String line = value.toString();
        String[] split = line.split("\t");//Each column is tab separated
        String tel = split[1];
        Long up = Long.parseLong(split[split.length - 3]);
        Long down = Long.parseLong(split[split.length - 2]);
        TelBean telBean = new TelBean(tel, up, down);
        context.write(new Text(tel),telBean);
    }
}

reducer level

import com.zhiyou100.pojo.TelBean;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;

public class TCReducer extends Reducer<Text, TelBean,Text,TelBean> {
    @Override
    protected void reduce(Text key, Iterable<TelBean> values, Context context) throws IOException, InterruptedException {
        Long up = 0L;
        Long down = 0L;
        for (TelBean telBean:values) {
            up = up + telBean.getUp();
            down = down + telBean.getDown();
        }
        TelBean telBean = new TelBean(key.toString(),up,down);
        context.write(key,telBean);
    }
}

partition layer

import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Partitioner;

//Parameters: these two parameters are the parameters output by reducer
public class MyPartition extends Partitioner<Text, TelBean> {
    @Override
    public int getPartition(Text text, TelBean telBean, int numPartitions) {
        String tel = text.toString();
        if(tel.startsWith("138")||tel.startsWith("136")){
            return 1;
        }else{
            return 0;
        }
    }
}

job level

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class TCJob {
    public static void main(String[] args) throws Exception {
        Configuration conf=new Configuration();
        Job job=Job.getInstance(conf);
        //2. Specify the class used by the job
        job.setJarByClass(TCJob.class);
        //3. Set the mapper's class and properties
        job.setMapperClass(TCMapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(TelBean.class);
        //4. Set the class and property of reducer
        job.setReducerClass(TCReducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(TelBean.class);
        //Set partition
        job.setPartitionerClass(MyPartition.class);
        job.setNumReduceTasks(2);
        //5. Set dynamic transfer parameters of input file when calling
        FileInputFormat.setInputPaths(job, new Path(args[0]));
        //6. Set output directory
        FileOutputFormat.setOutputPath(job, new Path(args[1]));
        //7. Submit task
        job.waitForCompletion(true);
    }
}

Package it into jar package and upload it to the server

hadoop jar hdfs1-1.0-SNAPSHOT.jar com.zhiyou100.job.TCJob /flow.log /a3

Check the contents of two documents

summary

mapper layer: it is responsible for cleaning the incoming data and converting it to the format of key value pairs that we need to process
reducer layer: responsible for data processing
job layer: responsible for controlling the execution sequence of the whole project code
Note: both sorting and partitioning operate on key

Sort by total flow

First, query the total uplink, downlink and total traffic without partition, and then sort the generated results

pojo level

Revised as follows

mapper level

Revised as follows

reducer level

Zoning layer

job level


Operation result:

Topics: Hadoop Apache Java Lombok