Source code interpretation - (1)hbase client source code | http://aperise.iteye.com/blog/2372350 |
Source code interpretation - (2) HBase examples bufferedmutator example | http://aperise.iteye.com/blog/2372505 |
Source code interpretation - (3) HBase examples multithreadedclientexample | http://aperise.iteye.com/blog/2372534 |
1. Lightweight table, heavyweight connection
Another example provided in hbase examples, MultiThreadedClientExample, explains another example of using hbase client. In this example, table is a lightweight object, which is destroyed when the thread is created and exited, while the connection after table has never been closed. Connection is a heavyweight object, which maintains the link and difference with zookeeper Step operation and other states, we can learn another example of multithreading hbase client.
2.MultiThreadedClientExample
- import com.google.common.util.concurrent.ThreadFactoryBuilder;
- import org.apache.commons.logging.Log;
- import org.apache.commons.logging.LogFactory;
- import org.apache.hadoop.conf.Configured;
- import org.apache.hadoop.hbase.TableName;
- import org.apache.hadoop.hbase.client.Connection;
- import org.apache.hadoop.hbase.client.ConnectionFactory;
- import org.apache.hadoop.hbase.client.Put;
- import org.apache.hadoop.hbase.client.RegionLocator;
- import org.apache.hadoop.hbase.client.Result;
- import org.apache.hadoop.hbase.client.ResultScanner;
- import org.apache.hadoop.hbase.client.Scan;
- import org.apache.hadoop.hbase.client.Table;
- import org.apache.hadoop.hbase.filter.KeyOnlyFilter;
- import org.apache.hadoop.hbase.util.Bytes;
- import org.apache.hadoop.util.Tool;
- import org.apache.hadoop.util.ToolRunner;
- import java.io.IOException;
- import java.util.ArrayList;
- import java.util.List;
- import java.util.concurrent.Callable;
- import java.util.concurrent.ExecutorService;
- import java.util.concurrent.Executors;
- import java.util.concurrent.ForkJoinPool;
- import java.util.concurrent.Future;
- import java.util.concurrent.ThreadFactory;
- import java.util.concurrent.ThreadLocalRandom;
- import java.util.concurrent.TimeUnit;
- /**
- * This example is used to show how to operate hbase client in multithreading
- * In this case, table is a lightweight object, which is destroyed when the thread is created and exited, while the connection after table is never closed
- * In this case, connection is a heavyweight object, which maintains the link, asynchronous operation and other states with zookeeper
- * In this example, 500000 requests are submitted to hbase server (30% of them are written in batch, 20% are written in single file, and 50% are used for scans)
- *
- */
- public class MultiThreadedClientExample extends Configured implements Tool {
- private static final Log LOG = LogFactory.getLog(MultiThreadedClientExample.class);
- private static final int DEFAULT_NUM_OPERATIONS = 500000;
- //The default test is hbase data table test column cluster d
- private static final byte[] FAMILY = Bytes.toBytes("d");
- private static final byte[] QUAL = Bytes.toBytes("test");
- private final ExecutorService internalPool;//Thread pool
- private final int threads;//Thread pool size
- public MultiThreadedClientExample() throws IOException {
- //Runtime.getRuntime().availableProcessors() is the number of CPU cores of the current machine. Here, the number of CPU cores * 4
- this.threads = Runtime.getRuntime().availableProcessors() * 4;
- //Here, we call the thread factorybuilder of google's guava-12.0.0.1.jar. The default is Executors.defaultThreadFactory(), which creates the background thread factory class and normalizes the thread name
- ThreadFactory threadFactory = new ThreadFactoryBuilder().setDaemon(true).setNameFormat("internal-pol-%d").build();
- //Initialize thread pool
- this.internalPool = Executors.newFixedThreadPool(threads, threadFactory);
- }
- @Override
- public int run(String[] args) throws Exception {
- //The number of parameters can only be 2, the first is the table name, and the second is the number of operations
- if (args.length < 1 || args.length > 2) {
- System.out.println("Usage: " + this.getClass().getName() + " tableName [num_operations]");
- return -1;
- }
- final TableName tableName = TableName.valueOf(args[0]);//If a table name is passed in, the hbase table name passed in is used
- int numOperations = DEFAULT_NUM_OPERATIONS;
- if (args.length == 2) {
- numOperations = Integer.parseInt(args[1]);//If the number of operations passed in, the number of operations passed in is used
- }
- //The Fork/Join framework is a framework provided by Java 7 for parallel task execution. It is a framework that divides large tasks into several small tasks, and finally summarizes the results of each small task to get the results of large tasks.
- //Here, ForkJoinPool is successively introduced to org.apache.hadoop.hbase.client.ConnectionManager.HConnectionImplementation, org.apache.hadoop.hbase.client.HTable, org.apache.hadoop.hbase.client.AsyncProcess for use
- ExecutorService service = new ForkJoinPool(threads * 2);
- //Create a separate link for the write operation writeConnection
- final Connection writeConnection = ConnectionFactory.createConnection(getConf(), service);
- //Create a separate link for the read operation readConnection
- final Connection readConnection = ConnectionFactory.createConnection(getConf(), service);
- //The region information of tableName in hbase table is loaded into cache
- //Do not operate this operation when the number of region s exceeds 250000
- warmUpConnectionCache(readConnection, tableName);
- warmUpConnectionCache(writeConnection, tableName);
- List<Future<Boolean>> futures = new ArrayList<Future<Boolean>>(numOperations);
- for (int i = 0; i < numOperations; i++) {
- //Generating thread safe random floating point number r
- double r = ThreadLocalRandom.current().nextDouble();
- Future<Boolean> f;
- if (r < .30) {//30% batch write
- f = internalPool.submit(new WriteExampleCallable(writeConnection, tableName));
- } else if (r < .50) {//20% single writing
- f = internalPool.submit(new SingleWriteExampleCallable(writeConnection, tableName));
- } else {//50% for scans
- f = internalPool.submit(new ReadExampleCallable(writeConnection, tableName));
- }
- futures.add(f);
- }
- //Wait for each operation to complete, if not, wait for 10 minutes
- for (Future<Boolean> f : futures) {
- f.get(10, TimeUnit.MINUTES);
- }
- //Close thread pool internalPool and service
- internalPool.shutdownNow();
- service.shutdownNow();
- return 0;
- }
- //The region information of tableName in hbase table is loaded into cache
- //Do not operate this operation when the number of region s exceeds 250000
- private void warmUpConnectionCache(Connection connection, TableName tn) throws IOException {
- try (RegionLocator locator = connection.getRegionLocator(tn)) {
- LOG.info("Warmed up region location cache for " + tn + " got " + locator.getAllRegionLocations().size());
- }
- }
- /**
- * 30% Batch write task
- */
- public static class WriteExampleCallable implements Callable<Boolean> {
- private final Connection connection;
- private final TableName tableName;
- public WriteExampleCallable(Connection connection, TableName tableName) {
- this.connection = connection;
- this.tableName = tableName;
- }
- @Override
- public Boolean call() throws Exception {
- // https://docs.oracle.com/javase/tutorial/essential/exceptions/tryResourceClose.html
- //Here we take advantage of the new feature try in jdk1.7 (the object that must implement java.io.Closeable) {} catch (Exception e) {}
- //It is equivalent to calling the finally function, calling the close() method (the object that must implement java.io.Closeable), that is, calling table.close()
- try (Table t = connection.getTable(tableName)) {
- byte[] value = Bytes.toBytes(Double.toString(ThreadLocalRandom.current().nextDouble()));
- int rows = 30;
- // Array to put the batch
- ArrayList<Put> puts = new ArrayList<>(rows);
- for (int i = 0; i < 30; i++) {
- byte[] rk = Bytes.toBytes(ThreadLocalRandom.current().nextLong());
- Put p = new Put(rk);
- p.addImmutable(FAMILY, QUAL, value);
- puts.add(p);
- }
- //Batch submit to hbase server
- t.put(puts);
- }
- return true;
- }
- }
- /**
- * 20%Single write task
- */
- public static class SingleWriteExampleCallable implements Callable<Boolean> {
- private final Connection connection;
- private final TableName tableName;
- public SingleWriteExampleCallable(Connection connection, TableName tableName) {
- this.connection = connection;
- this.tableName = tableName;
- }
- @Override
- public Boolean call() throws Exception {
- // https://docs.oracle.com/javase/tutorial/essential/exceptions/tryResourceClose.html
- //Here we take advantage of the new feature try in jdk1.7 (the object that must implement java.io.Closeable) {} catch (Exception e) {}
- //It is equivalent to calling the finally function, calling the close() method (the object that must implement java.io.Closeable), that is, calling table.close()
- try (Table t = connection.getTable(tableName)) {
- byte[] value = Bytes.toBytes(Double.toString(ThreadLocalRandom.current().nextDouble()));
- byte[] rk = Bytes.toBytes(ThreadLocalRandom.current().nextLong());
- Put p = new Put(rk);
- p.addImmutable(FAMILY, QUAL, value);
- t.put(p);
- }
- return true;
- }
- }
- /**
- * 50%For scans
- */
- public static class ReadExampleCallable implements Callable<Boolean> {
- private final Connection connection;
- private final TableName tableName;
- public ReadExampleCallable(Connection connection, TableName tableName) {
- this.connection = connection;
- this.tableName = tableName;
- }
- @Override
- public Boolean call() throws Exception {
- // total length in bytes of all read rows.
- int result = 0;
- // Number of rows the scan will read before being considered done.
- int toRead = 100;
- try (Table t = connection.getTable(tableName)) {
- //Starting value of rowkey to be found
- byte[] rk = Bytes.toBytes(ThreadLocalRandom.current().nextLong());
- Scan s = new Scan(rk);
- //Set the filter of scan to KeyOnlyFilter, which means that only rowkey will be compared during scan comparison
- s.setFilter(new KeyOnlyFilter());
- //Take only 20 pieces of data at a time
- s.setCaching(20);
- //Setting hbase does not apply to caching. Caching is to place the previous data in the block cache of hbase server in order to get these data faster next time
- s.setCacheBlocks(false);
- // https://docs.oracle.com/javase/tutorial/essential/exceptions/tryResourceClose.html
- //Here we take advantage of the new feature try in jdk1.7 (the object that must implement java.io.Closeable) {} catch (Exception e) {}
- //Equivalent to calling the finally function, calling the close() method of (the object that must implement java.io.Closeable), that is, calling ResultScanner.close()
- try (ResultScanner rs = t.getScanner(s)) {
- //Traversal of hbase rows
- for (Result r : rs) {
- result += r.getRow().length;
- toRead -= 1;
- //Take only 100 pieces of data and exit when it reaches 100 pieces
- if (toRead <= 0) {
- break;
- }
- }
- }
- }
- return result > 0;
- }
- }
- public static void main(String[] args) throws Exception {
- //Call the Tool class ToolRunner to execute the run method of MultiThreadedClientExample, which implements the object MultiThreadedClientExample of the interface Tool. At the same time, String [] args will be passed into the run method of MultiThreadedClientExample
- ToolRunner.run(new MultiThreadedClientExample(), args);
- }
- }