Case 1 of HBase custom HBase MapReduce

Posted by jacobsdad on Wed, 11 Dec 2019 05:50:12 +0100

1. Demand scenario

Migrate part of the data in the ys table in HBase to the ys? Mr table through Mapreduce

2. Coding

1) build ReadysMapreduce class to read data in ys table

package cn.ysjh;

import java.io.IOException;

import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.mapreduce.Mapper;



public class ReadysMapreduce extends TableMapper<ImmutableBytesWritable,Put>{
	
@Override
protected void map(ImmutableBytesWritable key, Result value,
		Mapper<ImmutableBytesWritable, Result, ImmutableBytesWritable, Put>.Context context)
		throws IOException, InterruptedException {
	//Extracting the name and color of fruit is equivalent to reading out each row of data and putting it into the Put object.
    Put put = new Put(key.get());
//Traverse add column row
    for(Cell cell: value.rawCells()){
//Add / clone column family: info
        if("info".equals(Bytes.toString(CellUtil.cloneFamily(cell)))){
//Add / clone column: name
            if("name".equals(Bytes.toString(CellUtil.cloneQualifier(cell)))){
//Add the column cell to the put object
                put.add(cell);
//Add / clone columns: color
            }else if("color".equals(Bytes.toString(CellUtil.cloneQualifier(cell)))){
//Add the column cell to the put object
                put.add(cell);
            }
        }
    }
    //Write each row of data read from fruit to context as the output of map
    context.write(key, put);

}
	
}

2) build the WriteysReduce class to write the data in the read fruit table to the fruit "Mr table

package cn.ysjh;

import java.io.IOException;

import org.apache.hadoop.hbase.client.Mutation;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Reducer;

public class WriteysReduce extends TableReducer<ImmutableBytesWritable, Put, NullWritable>{

	@Override
	protected void reduce(ImmutableBytesWritable key, Iterable<Put> values,
			Context context)
			throws IOException, InterruptedException {
		//Each read row of data is written to the fruit_umr table
        for(Put put: values){
            context.write(NullWritable.get(), put);
	}
}
}

3) build JobysMapreduce class to create Job task

package cn.ysjh;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

public class JobysMapreduce extends Configured implements Tool{
	
	public int run(String[] args) throws Exception {
		//Get Configuration
        Configuration conf = this.getConf();
//Create Job task
        Job job = Job.getInstance(conf, this.getClass().getSimpleName());
        job.setJarByClass(JobysMapreduce.class);
//Configure Job
        Scan scan = new Scan();
        scan.setCacheBlocks(false);
        scan.setCaching(500);
//Set Mapper. Note that Mapper is imported under mapreduce package, not mapred package. The latter is the old version
        TableMapReduceUtil.initTableMapperJob(
                "ys", //Table name of the data source
                scan, //Scan scan controller
                ReadysMapreduce.class,//Set Mapper class
                ImmutableBytesWritable.class,//Set Mapper output key type
                Put.class,//Set Mapper output value value type
                job//Set to which JOB
        );
//Set up Reducer
        TableMapReduceUtil.initTableReducerJob("ys_mr", WriteysReduce.class,
                job);
//Set the number of Reduce, at least 1
        job.setNumReduceTasks(1);
        boolean isSuccess = job.waitForCompletion(true);
        if(!isSuccess){
            throw new IOException("Job running with error");
        }
        return isSuccess ? 0 : 1;
    }
	
	public static void main( String[] args ) throws Exception{
        Configuration conf = HBaseConfiguration.create();
        int status = ToolRunner.run(conf, new JobysMapreduce(), args);
        System.exit(status);
    }
	
	}

3. Package operation

Use the maven packaging command: - P local clean package, and then upload the jar package to the cluster to run the test

Note: if the table of data to be imported does not exist, it needs to be created in advance

 

 

 

Topics: Big Data Apache Hadoop HBase Java