Source code interpretation - (3) HBase examples multithreadedclientexample

Posted by point86 on Thu, 02 Apr 2020 02:58:55 +0200

Address: http://aperise.iteye.com/blog/2372534

Source code interpretation - (1)hbase client source code	http://aperise.iteye.com/blog/2372350
Source code interpretation - (2) HBase examples bufferedmutator example	http://aperise.iteye.com/blog/2372505
Source code interpretation - (3) HBase examples multithreadedclientexample	http://aperise.iteye.com/blog/2372534

1. Lightweight table, heavyweight connection

Another example provided in hbase examples, MultiThreadedClientExample, explains another example of using hbase client. In this example, table is a lightweight object, which is destroyed when the thread is created and exited, while the connection after table has never been closed. Connection is a heavyweight object, which maintains the link and difference with zookeeper Step operation and other states, we can learn another example of multithreading hbase client.

2.MultiThreadedClientExample

import com.google.common.util.concurrent.ThreadFactoryBuilder;

import org.apache.commons.logging.Log;

import org.apache.commons.logging.LogFactory;

import org.apache.hadoop.conf.Configured;

import org.apache.hadoop.hbase.TableName;

import org.apache.hadoop.hbase.client.Connection;

import org.apache.hadoop.hbase.client.ConnectionFactory;

import org.apache.hadoop.hbase.client.Put;

import org.apache.hadoop.hbase.client.RegionLocator;

import org.apache.hadoop.hbase.client.Result;

import org.apache.hadoop.hbase.client.ResultScanner;

import org.apache.hadoop.hbase.client.Scan;

import org.apache.hadoop.hbase.client.Table;

import org.apache.hadoop.hbase.filter.KeyOnlyFilter;

import org.apache.hadoop.hbase.util.Bytes;

import org.apache.hadoop.util.Tool;

import org.apache.hadoop.util.ToolRunner;



import java.io.IOException;

import java.util.ArrayList;

import java.util.List;

import java.util.concurrent.Callable;

import java.util.concurrent.ExecutorService;

import java.util.concurrent.Executors;

import java.util.concurrent.ForkJoinPool;

import java.util.concurrent.Future;

import java.util.concurrent.ThreadFactory;

import java.util.concurrent.ThreadLocalRandom;

import java.util.concurrent.TimeUnit;

/**

* This example is used to show how to operate hbase client in multithreading

* In this case, table is a lightweight object, which is destroyed when the thread is created and exited, while the connection after table is never closed

* In this case, connection is a heavyweight object, which maintains the link, asynchronous operation and other states with zookeeper

* In this example, 500000 requests are submitted to hbase server (30% of them are written in batch, 20% are written in single file, and 50% are used for scans)

*

*/

public class MultiThreadedClientExample extends Configured implements Tool {

    private static final Log LOG = LogFactory.getLog(MultiThreadedClientExample.class);

    private static final int DEFAULT_NUM_OPERATIONS = 500000;



    //The default test is hbase data table test column cluster d

    private static final byte[] FAMILY = Bytes.toBytes("d");

    private static final byte[] QUAL = Bytes.toBytes("test");



    private final ExecutorService internalPool;//Thread pool

    private final int threads;//Thread pool size



    public MultiThreadedClientExample() throws IOException {

        //Runtime.getRuntime().availableProcessors() is the number of CPU cores of the current machine. Here, the number of CPU cores * 4

        this.threads = Runtime.getRuntime().availableProcessors() * 4;



        //Here, we call the thread factorybuilder of google's guava-12.0.0.1.jar. The default is Executors.defaultThreadFactory(), which creates the background thread factory class and normalizes the thread name

        ThreadFactory threadFactory = new ThreadFactoryBuilder().setDaemon(true).setNameFormat("internal-pol-%d").build();

        //Initialize thread pool

        this.internalPool = Executors.newFixedThreadPool(threads, threadFactory);

    }



    @Override

    public int run(String[] args) throws Exception {

        //The number of parameters can only be 2, the first is the table name, and the second is the number of operations

        if (args.length < 1 || args.length > 2) {

            System.out.println("Usage: " + this.getClass().getName() + " tableName [num_operations]");

            return -1;

        }



        final TableName tableName = TableName.valueOf(args[0]);//If a table name is passed in, the hbase table name passed in is used

        int numOperations = DEFAULT_NUM_OPERATIONS;

        if (args.length == 2) {

            numOperations = Integer.parseInt(args[1]);//If the number of operations passed in, the number of operations passed in is used

        }



        //The Fork/Join framework is a framework provided by Java 7 for parallel task execution. It is a framework that divides large tasks into several small tasks, and finally summarizes the results of each small task to get the results of large tasks.

        //Here, ForkJoinPool is successively introduced to org.apache.hadoop.hbase.client.ConnectionManager.HConnectionImplementation, org.apache.hadoop.hbase.client.HTable, org.apache.hadoop.hbase.client.AsyncProcess for use

        ExecutorService service = new ForkJoinPool(threads * 2);



        //Create a separate link for the write operation writeConnection

        final Connection writeConnection = ConnectionFactory.createConnection(getConf(), service);

        //Create a separate link for the read operation readConnection

        final Connection readConnection = ConnectionFactory.createConnection(getConf(), service);



        //The region information of tableName in hbase table is loaded into cache

        //Do not operate this operation when the number of region s exceeds 250000

        warmUpConnectionCache(readConnection, tableName);

        warmUpConnectionCache(writeConnection, tableName);



        List<Future<Boolean>> futures = new ArrayList<Future<Boolean>>(numOperations);

        for (int i = 0; i < numOperations; i++) {

            //Generating thread safe random floating point number r

            double r = ThreadLocalRandom.current().nextDouble();

            Future<Boolean> f;



            if (r < .30) {//30% batch write

                f = internalPool.submit(new WriteExampleCallable(writeConnection, tableName));

            } else if (r < .50) {//20% single writing

                f = internalPool.submit(new SingleWriteExampleCallable(writeConnection, tableName));

            } else {//50% for scans

                f = internalPool.submit(new ReadExampleCallable(writeConnection, tableName));

            }

            futures.add(f);

        }



        //Wait for each operation to complete, if not, wait for 10 minutes

        for (Future<Boolean> f : futures) {

            f.get(10, TimeUnit.MINUTES);

        }



        //Close thread pool internalPool and service

        internalPool.shutdownNow();

        service.shutdownNow();

        return 0;

    }



    //The region information of tableName in hbase table is loaded into cache

    //Do not operate this operation when the number of region s exceeds 250000

    private void warmUpConnectionCache(Connection connection, TableName tn) throws IOException {

        try (RegionLocator locator = connection.getRegionLocator(tn)) {

            LOG.info("Warmed up region location cache for " + tn + " got " + locator.getAllRegionLocations().size());

        }

    }



    /**

     * 30% Batch write task

     */

    public static class WriteExampleCallable implements Callable<Boolean> {

        private final Connection connection;

        private final TableName tableName;



        public WriteExampleCallable(Connection connection, TableName tableName) {

            this.connection = connection;

            this.tableName = tableName;

        }



        @Override

        public Boolean call() throws Exception {

            // https://docs.oracle.com/javase/tutorial/essential/exceptions/tryResourceClose.html

            //Here we take advantage of the new feature try in jdk1.7 (the object that must implement java.io.Closeable) {} catch (Exception e) {}

            //It is equivalent to calling the finally function, calling the close() method (the object that must implement java.io.Closeable), that is, calling table.close()

            try (Table t = connection.getTable(tableName)) {

                byte[] value = Bytes.toBytes(Double.toString(ThreadLocalRandom.current().nextDouble()));

                int rows = 30;



                // Array to put the batch

                ArrayList<Put> puts = new ArrayList<>(rows);

                for (int i = 0; i < 30; i++) {

                    byte[] rk = Bytes.toBytes(ThreadLocalRandom.current().nextLong());

                    Put p = new Put(rk);

                    p.addImmutable(FAMILY, QUAL, value);

                    puts.add(p);

                }



                //Batch submit to hbase server

                t.put(puts);

            }

            return true;

        }

    }



    /**

     * 20%Single write task

     */

    public static class SingleWriteExampleCallable implements Callable<Boolean> {

        private final Connection connection;

        private final TableName tableName;



        public SingleWriteExampleCallable(Connection connection, TableName tableName) {

            this.connection = connection;

            this.tableName = tableName;

        }



        @Override

        public Boolean call() throws Exception {

            // https://docs.oracle.com/javase/tutorial/essential/exceptions/tryResourceClose.html

            //Here we take advantage of the new feature try in jdk1.7 (the object that must implement java.io.Closeable) {} catch (Exception e) {}

            //It is equivalent to calling the finally function, calling the close() method (the object that must implement java.io.Closeable), that is, calling table.close()

            try (Table t = connection.getTable(tableName)) {

                byte[] value = Bytes.toBytes(Double.toString(ThreadLocalRandom.current().nextDouble()));

                byte[] rk = Bytes.toBytes(ThreadLocalRandom.current().nextLong());

                Put p = new Put(rk);

                p.addImmutable(FAMILY, QUAL, value);

                t.put(p);

            }

            return true;

        }

    }



    /**

     * 50%For scans

     */

    public static class ReadExampleCallable implements Callable<Boolean> {

        private final Connection connection;

        private final TableName tableName;



        public ReadExampleCallable(Connection connection, TableName tableName) {

            this.connection = connection;

            this.tableName = tableName;

        }



        @Override

        public Boolean call() throws Exception {

            // total length in bytes of all read rows.

            int result = 0;



            // Number of rows the scan will read before being considered done.

            int toRead = 100;

            try (Table t = connection.getTable(tableName)) {

                //Starting value of rowkey to be found

                byte[] rk = Bytes.toBytes(ThreadLocalRandom.current().nextLong());

                Scan s = new Scan(rk);



                //Set the filter of scan to KeyOnlyFilter, which means that only rowkey will be compared during scan comparison

                s.setFilter(new KeyOnlyFilter());



                //Take only 20 pieces of data at a time

                s.setCaching(20);



                //Setting hbase does not apply to caching. Caching is to place the previous data in the block cache of hbase server in order to get these data faster next time

                s.setCacheBlocks(false);



                // https://docs.oracle.com/javase/tutorial/essential/exceptions/tryResourceClose.html

                //Here we take advantage of the new feature try in jdk1.7 (the object that must implement java.io.Closeable) {} catch (Exception e) {}

                //Equivalent to calling the finally function, calling the close() method of (the object that must implement java.io.Closeable), that is, calling ResultScanner.close()

                try (ResultScanner rs = t.getScanner(s)) {

                    //Traversal of hbase rows

                    for (Result r : rs) {

                        result += r.getRow().length;

                        toRead -= 1;



                        //Take only 100 pieces of data and exit when it reaches 100 pieces

                        if (toRead <= 0) {

                            break;

                        }

                    }

                }

            }

            return result > 0;

        }

    }



    public static void main(String[] args) throws Exception {

        //Call the Tool class ToolRunner to execute the run method of MultiThreadedClientExample, which implements the object MultiThreadedClientExample of the interface Tool. At the same time, String [] args will be passed into the run method of MultiThreadedClientExample

        ToolRunner.run(new MultiThreadedClientExample(), args);

    }

}

Topics: HBase Apache Java Hadoop

Programmer Think

Source code interpretation - (3) HBase examples multithreadedclientexample

1. Lightweight table, heavyweight connection

2.MultiThreadedClientExample

Hot Topics