Using application program (Java/Python) to access MaxCompute Lightning for data development

Posted by BobRoberts on Tue, 03 Dec 2019 00:51:50 +0100

MaxCompute Lightning It is an interactive query service of maxcompute products. It supports accessing the maxcompute project through PostgreSQL protocol and syntax connection. It enables you to use familiar tools to query and analyze the data in the maxcompute project with standard SQL, and quickly obtain the query results.
Many developers want to use the features of Lightning to develop data applications. This article will introduce how Java and Python can connect to Lightning for application development with examples (for reference, you need to replace the Endpoint and user authentication information of the region where your project is located).
I. Java uses JDBC to access Lightning
An example is as follows:

import java.sql.*;

public class Main {

    private static Connection connection;

    public static void main(String[] args) throws SQLException {

        String url = "jdbc:postgresql://lightning.cn-shanghai.maxcompute.aliyun.com:443/your_project_name?prepareThreshold=0&sslmode=require";
        String accessId = "<your_maxcompute_access_id>";
        String accessKey = "<your_maxcompute_access_key>";
        String sql = "select * from dual";

        try {
            Connection conn = getCon(url, accessId, accessKey);
            Statement st = conn.createStatement();
            System.out.println("Send Lightning query");
            ResultSet rs = st.executeQuery(sql);
            while (rs.next()) {
                System.out.println(rs.getString(1)+ "\t");
            }
            System.out.println("End Lightning query");
            conn.close();
        } catch (SQLException e) {
            e.printStackTrace();
        }
    }

    public static Connection getCon(String lightningsHost, String lightningUser, String lightningPwd) {
        try {
            if (connection == null || connection.isClosed()) {
                try {
                    Class.forName("org.postgresql.Driver").newInstance();
                    DriverManager.setLoginTimeout(1);
                    connection = DriverManager.getConnection(lightningsHost, lightningUser, lightningPwd);
                } catch (Exception ex) {
                    ex.printStackTrace();
                }
            }
        } catch (Exception ex) {
            ex.printStackTrace();
        }
        return connection;
    }
}

2. Java uses druid to access Lightning
1.pom dependence

<dependency>
            <groupId>com.alibaba</groupId>
            <artifactId>druid</artifactId>
            <version>1.0.23</version>
        </dependency>
        <dependency>
            <groupId>org.postgresql</groupId>
            <artifactId>postgresql</artifactId>
            <version>9.3-1101-jdbc4</version>
        </dependency>

2.spring configuration

    <bean id="LightningDataSource" class="com.alibaba.druid.pool.DruidDataSource" init-method="init" destroy-method="close">
        < property name = "URL" value = "JDBC: postgresql://lightning.cn-shanghai.maxcompute.aliyun.com: 443 / project name? Preparethreshold = 0 & sslmode = require" / > <! -- replace with the Endpoint of the region where your project is located - >
        < property name = "username" value = "access key ID" / >
        < property name = "password" value = "access key secret" / >
        <property name="driverClassName" value="org.postgresql.Driver"/>
        <property name="dbType" value="postgresql"/>
        <property name="initialSize" value="1" />  
        <property name="minIdle" value="1" />
        < property name = "maxactive" value = "5" / > <! - the Lightning service has a limit of 20 connections per project, so do not configure it too large, or configure it on demand, otherwise it is easy to get query ﹣ wait ﹣ timeout error -- >
 
        <! -- the following two configurations check the validity of the connection and repair the occasional create connection holder error -- >
        <property name="testWhileIdle" value="true" />
        <property name="validationQuery" value="SELECT 1" />
    </bean>

  <bean class="com.xxx.xxx.LightningProvider">
    <property name="druidDataSource" ref="LightningDataSource"/>
  </bean>

3. Code access

public class LightningProvider {

    DruidDataSource druidDataSource;
    /**
     * Implementation of sql
     * @param sql
     * @return
     * @throws Exception
     */
    public void execute(String sql) throws SQLException {
        DruidPooledConnection connection = null ;
        Statement st = null;
        try{
            connection = druidDataSource.getConnection();
            st = connection.createStatement();

            ResultSet resultSet = st.executeQuery(sql);
            //Code for parsing and processing return value
            //Process by line, put the data of each line into a map
            ResultSetMetaData metaData = resultSet.getMetaData();
            int columnCount = metaData.getColumnCount();
            List<LinkedHashMap> rows = Lists.newArrayList();
            while(resultSet.next()){
            LinkedHashMap map = Maps.newLinkedHashMap();
            for(int i=1;i<=columnCount;i++){
                String label = resultSet.getMetaData().getColumnLabel(i);
                map.put(label,resultSet.getString(i));
            }
            rows.add(map);
        }   
        }catch (Exception e){
             e.printStackTrace();
        }finally {
            try {
                if(st!=null) {
                    st.close();
                }
            } catch (SQLException e) {
                e.printStackTrace();
            }

            try {
                if(connection!=null) {
                    connection.close();
                }
            } catch (SQLException e) {
                e.printStackTrace();
            }
        }
    }
}

3. Python uses pyscopg2 to access Lightning
An example is as follows:

#!/usr/bin/env python
# coding=utf-8

import psycopg2
import sys

def query_lightning(lightning_conf, sql):
    """Query data through Lightning by sql

    Args:
        lightning_conf: a map contains settings of 'dbname', 'user', 'password', 'host', 'port'
        sql:  query submit to Lightning

    Returns:
        result: the query result in format of list of rows
    """
    result = None
    conn = None
    conn_str = None
    try:
        conn_str = ("dbname={dbname} "
                    "user={user} "
                    "password={password} "
                    "host={host} "
                    "port={port}").format(**lightning_conf)
    except Exception, e:
        print >> sys.stderr, ("Invalid Lightning' configuration "
                       "{}".format(e))
        sys.exit(1)

    try:
        conn = psycopg2.connect(conn_str)
        conn.set_session(autocommit=True) # This will disable transaction
                                   # started with keyword BEGIN,
                                   # which is currently not
                                   # supported by Lightning' public service

        cur = conn.cursor()
        # execute Lightning' query
        cur.execute(sql)
        # get result
        result = cur.fetchall()
    except Exception, e:
        print >> sys.stderr, ("Failed to query data through "
                       "Lightning: {}".format(e))
    finally:
        if conn:
            conn.close()

    return result

if __name__ == "__main__":
    # step1. setup configuration
    lightning_conf = {
        "dbname": "your_project_name",
        "user": "<your_maxcompute_access_id>", 
        "password": "<your_maxcompute_access_key>", 
        "host": "lightning.cn-shanghai.maxcompute.aliyun.com",  #your region lightning endpoint
        "port": 443
    }

    # step2. issue a query
    result = query_lightning(lightning_conf, "select * from test")
    # step3. print result
    if result:
        for i in xrange(0, len(result)):
            print "Got %d row from Lightning:%s" % (i + 1, result[i])

4. Python uses ODBC to access Lightning
You need to install and configure the odbc driver on your computer now. The code example is as follows:

import pyodbc
conn_str = (
    "DRIVER={PostgreSQL Unicode};"
    "DATABASE=your_project_name;"
    "UID=your_maxcompute_access_id;"
    "PWD=your_maxcompute_access_key;"
    "SERVER=lightning.cn-shanghai.maxcompute.aliyun.com;" #your region lightning endpoint
    "PORT=443;"
    )
conn = pyodbc.connect(conn_str)
crsr = conn.execute("SELECT * from test")
row = crsr.fetchone()
print(row)
crsr.close()
conn.close()

Because Lightning provides a PostgreSQL compatible interface, you can develop Lightning applications just like PostgreSQL applications.

Topics: Python SQL PostgreSQL Java