1. Demand scenario
Migrate part of the data in the ys table in HBase to the ys? Mr table through Mapreduce
2. Coding
1) build ReadysMapreduce class to read data in ys table
package cn.ysjh; import java.io.IOException; import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.CellUtil; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.io.ImmutableBytesWritable; import org.apache.hadoop.hbase.mapreduce.TableMapper; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.mapreduce.Mapper; public class ReadysMapreduce extends TableMapper<ImmutableBytesWritable,Put>{ @Override protected void map(ImmutableBytesWritable key, Result value, Mapper<ImmutableBytesWritable, Result, ImmutableBytesWritable, Put>.Context context) throws IOException, InterruptedException { //Extracting the name and color of fruit is equivalent to reading out each row of data and putting it into the Put object. Put put = new Put(key.get()); //Traverse add column row for(Cell cell: value.rawCells()){ //Add / clone column family: info if("info".equals(Bytes.toString(CellUtil.cloneFamily(cell)))){ //Add / clone column: name if("name".equals(Bytes.toString(CellUtil.cloneQualifier(cell)))){ //Add the column cell to the put object put.add(cell); //Add / clone columns: color }else if("color".equals(Bytes.toString(CellUtil.cloneQualifier(cell)))){ //Add the column cell to the put object put.add(cell); } } } //Write each row of data read from fruit to context as the output of map context.write(key, put); } }
2) build the WriteysReduce class to write the data in the read fruit table to the fruit "Mr table
package cn.ysjh; import java.io.IOException; import org.apache.hadoop.hbase.client.Mutation; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.io.ImmutableBytesWritable; import org.apache.hadoop.hbase.mapreduce.TableReducer; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.mapreduce.Reducer; public class WriteysReduce extends TableReducer<ImmutableBytesWritable, Put, NullWritable>{ @Override protected void reduce(ImmutableBytesWritable key, Iterable<Put> values, Context context) throws IOException, InterruptedException { //Each read row of data is written to the fruit_umr table for(Put put: values){ context.write(NullWritable.get(), put); } } }
3) build JobysMapreduce class to create Job task
package cn.ysjh; import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.io.ImmutableBytesWritable; import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; public class JobysMapreduce extends Configured implements Tool{ public int run(String[] args) throws Exception { //Get Configuration Configuration conf = this.getConf(); //Create Job task Job job = Job.getInstance(conf, this.getClass().getSimpleName()); job.setJarByClass(JobysMapreduce.class); //Configure Job Scan scan = new Scan(); scan.setCacheBlocks(false); scan.setCaching(500); //Set Mapper. Note that Mapper is imported under mapreduce package, not mapred package. The latter is the old version TableMapReduceUtil.initTableMapperJob( "ys", //Table name of the data source scan, //Scan scan controller ReadysMapreduce.class,//Set Mapper class ImmutableBytesWritable.class,//Set Mapper output key type Put.class,//Set Mapper output value value type job//Set to which JOB ); //Set up Reducer TableMapReduceUtil.initTableReducerJob("ys_mr", WriteysReduce.class, job); //Set the number of Reduce, at least 1 job.setNumReduceTasks(1); boolean isSuccess = job.waitForCompletion(true); if(!isSuccess){ throw new IOException("Job running with error"); } return isSuccess ? 0 : 1; } public static void main( String[] args ) throws Exception{ Configuration conf = HBaseConfiguration.create(); int status = ToolRunner.run(conf, new JobysMapreduce(), args); System.exit(status); } }
3. Package operation
Use the maven packaging command: - P local clean package, and then upload the jar package to the cluster to run the test
Note: if the table of data to be imported does not exist, it needs to be created in advance