HBase custom MapReduce

Posted by brooky on Sun, 03 Nov 2019 19:16:43 +0100

Transfer of HBase table data

In the Hadoop phase, the MR task we wrote has two classes: Mapper and Reducer. In HBase, we need to inherit two classes: TableMapper and TableReducer.

Objective: to migrate part of the data in the fruit table to the fruit_mr table through MR

Step 1. Build the ReadFruitMapper class to read the data in the fruit table

package com.z.hbase_mr;

import java.io.IOException;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.util.Bytes;


public class ReadFruitMapper extends TableMapper<ImmutableBytesWritable, Put> {


@Override

protected void map(ImmutableBytesWritable key, Result value, Context context)

throws IOException, InterruptedException {

//take fruit Of name and color Extracting is equivalent to reading out each row of data and putting it into the Put Object.
Put put = new Put(key.get());

//Ergodic addition column That's ok
for(Cell cell: value.rawCells()){

//Add to/Cloned families:info
if("info".equals(Bytes.toString(CellUtil.cloneFamily(cell)))){

//Add to/Clone columns: name
if("name".equals(Bytes.toString(CellUtil.cloneQualifier(cell)))){

//The column cell Add to put In object
put.add(cell);

//Add to/Clone column:color
}else if("color".equals(Bytes.toString(CellUtil.cloneQualifier(cell)))){

//To the column cell Add to put In object
put.add(cell);
}
}
}

//Will from fruit Each read row of data is written to context Act as map Output
context.write(key, put);
}

}

Step2. Build the WriteFruitMRReducer class, which is used to write the data in the read fruit table to the fruit_mr table

package com.z.hbase_mr;

import java.io.IOException;

import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.io.NullWritable;


public class WriteFruitMRReducer extends TableReducer<ImmutableBytesWritable, Put, NullWritable> {

@Override
protected void reduce(ImmutableBytesWritable key, Iterable<Put> values, Context context)
throws IOException, InterruptedException {
//Each read row of data is written to the fruit_mr In the table

for(Put put: values){

context.write(NullWritable.get(), put);

}
}

}

Step3. Build the Fruit2FruitMRJob extends Configured implements Tool for assembling and running Job tasks

//Assemble Job
public int run(String[] args) throws Exception {

//obtain Configuration
Configuration conf = this.getConf();

//Establish Job task
Job job = Job.getInstance(conf, this.getClass().getSimpleName());

job.setJarByClass(Fruit2FruitMRJob.class);


//To configure Job
Scan scan = new Scan();

scan.setCacheBlocks(false);

scan.setCaching(500);

//Set up Mapper，Note that the imported mapreduce Under the bag, no mapred Package, the latter is the old version
TableMapReduceUtil.initTableMapperJob(

"fruit", //Table name of the data source

scan, //scan Scan controller

ReadFruitMapper.class,//Set up Mapper class

ImmutableBytesWritable.class,//Set up Mapper output key type

Put.class,//Set up Mapper output value Value type

job//To which JOB

);

//Set up Reducer

TableMapReduceUtil.initTableReducerJob("fruit_mr", WriteFruitMRReducer.class, job);


//Set up Reduce Quantity, minimum 1
job.setNumReduceTasks(1);

boolean isSuccess = job.waitForCompletion(true);

if(!isSuccess){

throw new IOException("Job running with error");

}

return isSuccess ? 0 : 1;

}

Step4, calling in main function to run the Job task.

public static void main( String[] args ) throws Exception{

Configuration conf = HBaseConfiguration.create();

int status = ToolRunner.run(conf, new Fruit2FruitMRJob(), args);

System.exit(status);

}

Topics: PHP HBase Hadoop Apache Java

Programmer Think