Source code interpretation - (3) HBase examples multithreadedclientexample

Posted by point86 on Thu, 02 Apr 2020 02:58:55 +0200

Address: http://aperise.iteye.com/blog/2372534
Source code interpretation - (1)hbase client source code http://aperise.iteye.com/blog/2372350
Source code interpretation - (2) HBase examples bufferedmutator example http://aperise.iteye.com/blog/2372505
Source code interpretation - (3) HBase examples multithreadedclientexample http://aperise.iteye.com/blog/2372534

1. Lightweight table, heavyweight connection

    Another example provided in hbase examples, MultiThreadedClientExample, explains another example of using hbase client. In this example, table is a lightweight object, which is destroyed when the thread is created and exited, while the connection after table has never been closed. Connection is a heavyweight object, which maintains the link and difference with zookeeper Step operation and other states, we can learn another example of multithreading hbase client.

 

2.MultiThreadedClientExample

  1. import com.google.common.util.concurrent.ThreadFactoryBuilder;  
  2. import org.apache.commons.logging.Log;  
  3. import org.apache.commons.logging.LogFactory;  
  4. import org.apache.hadoop.conf.Configured;  
  5. import org.apache.hadoop.hbase.TableName;  
  6. import org.apache.hadoop.hbase.client.Connection;  
  7. import org.apache.hadoop.hbase.client.ConnectionFactory;  
  8. import org.apache.hadoop.hbase.client.Put;  
  9. import org.apache.hadoop.hbase.client.RegionLocator;  
  10. import org.apache.hadoop.hbase.client.Result;  
  11. import org.apache.hadoop.hbase.client.ResultScanner;  
  12. import org.apache.hadoop.hbase.client.Scan;  
  13. import org.apache.hadoop.hbase.client.Table;  
  14. import org.apache.hadoop.hbase.filter.KeyOnlyFilter;  
  15. import org.apache.hadoop.hbase.util.Bytes;  
  16. import org.apache.hadoop.util.Tool;  
  17. import org.apache.hadoop.util.ToolRunner;  
  18.   
  19. import java.io.IOException;  
  20. import java.util.ArrayList;  
  21. import java.util.List;  
  22. import java.util.concurrent.Callable;  
  23. import java.util.concurrent.ExecutorService;  
  24. import java.util.concurrent.Executors;  
  25. import java.util.concurrent.ForkJoinPool;  
  26. import java.util.concurrent.Future;  
  27. import java.util.concurrent.ThreadFactory;  
  28. import java.util.concurrent.ThreadLocalRandom;  
  29. import java.util.concurrent.TimeUnit;  
  30. /** 
  31.  * This example is used to show how to operate hbase client in multithreading 
  32.  * In this case, table is a lightweight object, which is destroyed when the thread is created and exited, while the connection after table is never closed 
  33.  * In this case, connection is a heavyweight object, which maintains the link, asynchronous operation and other states with zookeeper 
  34.  * In this example, 500000 requests are submitted to hbase server (30% of them are written in batch, 20% are written in single file, and 50% are used for scans) 
  35.  * 
  36.  */  
  37. public class MultiThreadedClientExample extends Configured implements Tool {  
  38.     private static final Log LOG = LogFactory.getLog(MultiThreadedClientExample.class);  
  39.     private static final int DEFAULT_NUM_OPERATIONS = 500000;  
  40.   
  41.     //The default test is hbase data table test column cluster d  
  42.     private static final byte[] FAMILY = Bytes.toBytes("d");  
  43.     private static final byte[] QUAL = Bytes.toBytes("test");  
  44.   
  45.     private final ExecutorService internalPool;//Thread pool  
  46.     private final int threads;//Thread pool size  
  47.   
  48.     public MultiThreadedClientExample() throws IOException {  
  49.         //Runtime.getRuntime().availableProcessors() is the number of CPU cores of the current machine. Here, the number of CPU cores * 4  
  50.         this.threads = Runtime.getRuntime().availableProcessors() * 4;  
  51.   
  52.         //Here, we call the thread factorybuilder of google's guava-12.0.0.1.jar. The default is Executors.defaultThreadFactory(), which creates the background thread factory class and normalizes the thread name  
  53.         ThreadFactory threadFactory = new ThreadFactoryBuilder().setDaemon(true).setNameFormat("internal-pol-%d").build();  
  54.         //Initialize thread pool  
  55.         this.internalPool = Executors.newFixedThreadPool(threads, threadFactory);  
  56.     }  
  57.   
  58.     @Override  
  59.     public int run(String[] args) throws Exception {  
  60.         //The number of parameters can only be 2, the first is the table name, and the second is the number of operations  
  61.         if (args.length < 1 || args.length > 2) {  
  62.             System.out.println("Usage: " + this.getClass().getName() + " tableName [num_operations]");  
  63.             return -1;  
  64.         }  
  65.   
  66.         final TableName tableName = TableName.valueOf(args[0]);//If a table name is passed in, the hbase table name passed in is used  
  67.         int numOperations = DEFAULT_NUM_OPERATIONS;  
  68.         if (args.length == 2) {  
  69.             numOperations = Integer.parseInt(args[1]);//If the number of operations passed in, the number of operations passed in is used  
  70.         }  
  71.   
  72.         //The Fork/Join framework is a framework provided by Java 7 for parallel task execution. It is a framework that divides large tasks into several small tasks, and finally summarizes the results of each small task to get the results of large tasks.  
  73.         //Here, ForkJoinPool is successively introduced to org.apache.hadoop.hbase.client.ConnectionManager.HConnectionImplementation, org.apache.hadoop.hbase.client.HTable, org.apache.hadoop.hbase.client.AsyncProcess for use  
  74.         ExecutorService service = new ForkJoinPool(threads * 2);  
  75.   
  76.         //Create a separate link for the write operation writeConnection  
  77.         final Connection writeConnection = ConnectionFactory.createConnection(getConf(), service);  
  78.         //Create a separate link for the read operation readConnection  
  79.         final Connection readConnection = ConnectionFactory.createConnection(getConf(), service);  
  80.   
  81.         //The region information of tableName in hbase table is loaded into cache  
  82.         //Do not operate this operation when the number of region s exceeds 250000  
  83.         warmUpConnectionCache(readConnection, tableName);  
  84.         warmUpConnectionCache(writeConnection, tableName);  
  85.   
  86.         List<Future<Boolean>> futures = new ArrayList<Future<Boolean>>(numOperations);  
  87.         for (int i = 0; i < numOperations; i++) {  
  88.             //Generating thread safe random floating point number r  
  89.             double r = ThreadLocalRandom.current().nextDouble();  
  90.             Future<Boolean> f;  
  91.               
  92.             if (r < .30) {//30% batch write  
  93.                 f = internalPool.submit(new WriteExampleCallable(writeConnection, tableName));  
  94.             } else if (r < .50) {//20% single writing  
  95.                 f = internalPool.submit(new SingleWriteExampleCallable(writeConnection, tableName));  
  96.             } else {//50% for scans  
  97.                 f = internalPool.submit(new ReadExampleCallable(writeConnection, tableName));  
  98.             }  
  99.             futures.add(f);  
  100.         }  
  101.   
  102.         //Wait for each operation to complete, if not, wait for 10 minutes  
  103.         for (Future<Boolean> f : futures) {  
  104.             f.get(10, TimeUnit.MINUTES);  
  105.         }  
  106.   
  107.         //Close thread pool internalPool and service  
  108.         internalPool.shutdownNow();  
  109.         service.shutdownNow();  
  110.         return 0;  
  111.     }  
  112.       
  113.     //The region information of tableName in hbase table is loaded into cache  
  114.     //Do not operate this operation when the number of region s exceeds 250000  
  115.     private void warmUpConnectionCache(Connection connection, TableName tn) throws IOException {  
  116.         try (RegionLocator locator = connection.getRegionLocator(tn)) {  
  117.             LOG.info("Warmed up region location cache for " + tn + " got " + locator.getAllRegionLocations().size());  
  118.         }  
  119.     }  
  120.   
  121.     /** 
  122.      * 30% Batch write task 
  123.      */  
  124.     public static class WriteExampleCallable implements Callable<Boolean> {  
  125.         private final Connection connection;  
  126.         private final TableName tableName;  
  127.   
  128.         public WriteExampleCallable(Connection connection, TableName tableName) {  
  129.             this.connection = connection;  
  130.             this.tableName = tableName;  
  131.         }  
  132.   
  133.         @Override  
  134.         public Boolean call() throws Exception {  
  135.             // https://docs.oracle.com/javase/tutorial/essential/exceptions/tryResourceClose.html  
  136.             //Here we take advantage of the new feature try in jdk1.7 (the object that must implement java.io.Closeable) {} catch (Exception e) {}  
  137.             //It is equivalent to calling the finally function, calling the close() method (the object that must implement java.io.Closeable), that is, calling table.close()  
  138.             try (Table t = connection.getTable(tableName)) {  
  139.                 byte[] value = Bytes.toBytes(Double.toString(ThreadLocalRandom.current().nextDouble()));  
  140.                 int rows = 30;  
  141.   
  142.                 // Array to put the batch  
  143.                 ArrayList<Put> puts = new ArrayList<>(rows);  
  144.                 for (int i = 0; i < 30; i++) {  
  145.                     byte[] rk = Bytes.toBytes(ThreadLocalRandom.current().nextLong());  
  146.                     Put p = new Put(rk);  
  147.                     p.addImmutable(FAMILY, QUAL, value);  
  148.                     puts.add(p);  
  149.                 }  
  150.   
  151.                 //Batch submit to hbase server  
  152.                 t.put(puts);  
  153.             }  
  154.             return true;  
  155.         }  
  156.     }  
  157.   
  158.     /** 
  159.      * 20%Single write task 
  160.      */  
  161.     public static class SingleWriteExampleCallable implements Callable<Boolean> {  
  162.         private final Connection connection;  
  163.         private final TableName tableName;  
  164.   
  165.         public SingleWriteExampleCallable(Connection connection, TableName tableName) {  
  166.             this.connection = connection;  
  167.             this.tableName = tableName;  
  168.         }  
  169.   
  170.         @Override  
  171.         public Boolean call() throws Exception {  
  172.             // https://docs.oracle.com/javase/tutorial/essential/exceptions/tryResourceClose.html  
  173.             //Here we take advantage of the new feature try in jdk1.7 (the object that must implement java.io.Closeable) {} catch (Exception e) {}  
  174.             //It is equivalent to calling the finally function, calling the close() method (the object that must implement java.io.Closeable), that is, calling table.close()  
  175.             try (Table t = connection.getTable(tableName)) {  
  176.                 byte[] value = Bytes.toBytes(Double.toString(ThreadLocalRandom.current().nextDouble()));  
  177.                 byte[] rk = Bytes.toBytes(ThreadLocalRandom.current().nextLong());  
  178.                 Put p = new Put(rk);  
  179.                 p.addImmutable(FAMILY, QUAL, value);  
  180.                 t.put(p);  
  181.             }  
  182.             return true;  
  183.         }  
  184.     }  
  185.   
  186.     /** 
  187.      * 50%For scans 
  188.      */  
  189.     public static class ReadExampleCallable implements Callable<Boolean> {  
  190.         private final Connection connection;  
  191.         private final TableName tableName;  
  192.   
  193.         public ReadExampleCallable(Connection connection, TableName tableName) {  
  194.             this.connection = connection;  
  195.             this.tableName = tableName;  
  196.         }  
  197.   
  198.         @Override  
  199.         public Boolean call() throws Exception {  
  200.             // total length in bytes of all read rows.  
  201.             int result = 0;  
  202.   
  203.             // Number of rows the scan will read before being considered done.  
  204.             int toRead = 100;  
  205.             try (Table t = connection.getTable(tableName)) {  
  206.                 //Starting value of rowkey to be found  
  207.                 byte[] rk = Bytes.toBytes(ThreadLocalRandom.current().nextLong());  
  208.                 Scan s = new Scan(rk);  
  209.   
  210.                 //Set the filter of scan to KeyOnlyFilter, which means that only rowkey will be compared during scan comparison  
  211.                 s.setFilter(new KeyOnlyFilter());  
  212.   
  213.                 //Take only 20 pieces of data at a time  
  214.                 s.setCaching(20);  
  215.   
  216.                 //Setting hbase does not apply to caching. Caching is to place the previous data in the block cache of hbase server in order to get these data faster next time  
  217.                 s.setCacheBlocks(false);  
  218.   
  219.                 // https://docs.oracle.com/javase/tutorial/essential/exceptions/tryResourceClose.html  
  220.                 //Here we take advantage of the new feature try in jdk1.7 (the object that must implement java.io.Closeable) {} catch (Exception e) {}  
  221.                 //Equivalent to calling the finally function, calling the close() method of (the object that must implement java.io.Closeable), that is, calling ResultScanner.close()  
  222.                 try (ResultScanner rs = t.getScanner(s)) {  
  223.                     //Traversal of hbase rows  
  224.                     for (Result r : rs) {  
  225.                         result += r.getRow().length;  
  226.                         toRead -= 1;  
  227.   
  228.                         //Take only 100 pieces of data and exit when it reaches 100 pieces  
  229.                         if (toRead <= 0) {  
  230.                             break;  
  231.                         }  
  232.                     }  
  233.                 }  
  234.             }  
  235.             return result > 0;  
  236.         }  
  237.     }  
  238.   
  239.     public static void main(String[] args) throws Exception {  
  240.         //Call the Tool class ToolRunner to execute the run method of MultiThreadedClientExample, which implements the object MultiThreadedClientExample of the interface Tool. At the same time, String [] args will be passed into the run method of MultiThreadedClientExample  
  241.         ToolRunner.run(new MultiThreadedClientExample(), args);  
  242.     }  
  243. }  

Topics: HBase Apache Java Hadoop