Realize your own Protobuf Any

Posted by Sikk Industries on Fri, 01 Nov 2019 07:03:05 +0100

Preface

In some cases, google.protobuf.Any is not so convenient. I hope it has more convenient design. From the source code of protobuf, we can easily know that google.protobuf.Any is also a proto class, which can be completely replaced by its own defined proto class.

Any of Protobuf: google.Protobuf.any

google.protobuf.Any is also defined by the proto file

Without all the comments, google/protobuf/any.proto has only the following content, which can be customized completely.

syntax = "proto3";

package google.protobuf;

option csharp_namespace = "Google.Protobuf.WellKnownTypes";
option go_package = "github.com/golang/protobuf/ptypes/any";
option java_package = "com.google.protobuf";
option java_outer_classname = "AnyProto";
option java_multiple_files = true;
option objc_class_prefix = "GPB";

message Any {
    string type_url = 1;
    bytes value = 2;
}

After any.proto is compiled, you can get a Message class, and protobuf adds some necessary methods for any. We can see the difference between Any.java and other Message classes from the source code of any.proto compiled class below.

google.protobuf.Any itself is also a generated message V3

Briefly speaking, the source code of Any is not many. Delete the generated message V3 and Builder related codes. There are probably the following codes:

public  final class Any 
    extends GeneratedMessageV3 implements AnyOrBuilder {

    // typeUrl UU will be a java.lang.String value
    private volatile Object typeUrl_;
    private ByteString value_;
    
    private static String getTypeUrl(String typeUrlPrefix, Descriptors.Descriptor descriptor) {
        return typeUrlPrefix.endsWith("/")
            ? typeUrlPrefix + descriptor.getFullName()
            : typeUrlPrefix + "/" + descriptor.getFullName();
    }

    public static <T extends com.google.protobuf.Message> Any pack(T message) {
        return Any.newBuilder()
            .setTypeUrl(getTypeUrl("type.googleapis.com",
                                message.getDescriptorForType()))
            .setValue(message.toByteString())
            .build();
    }

    public static <T extends Message> Any pack(T message, String typeUrlPrefix) {
        return Any.newBuilder()
            .setTypeUrl(getTypeUrl(typeUrlPrefix,
                                message.getDescriptorForType()))
            .setValue(message.toByteString())
            .build();
    }

    public <T extends Message> boolean is(Class<T> clazz) {
        T defaultInstance = com.google.protobuf.Internal.getDefaultInstance(clazz);
            return getTypeNameFromTypeUrl(getTypeUrl()).equals(
                defaultInstance.getDescriptorForType().getFullName());
    }

    private volatile Message cachedUnpackValue;

    @java.lang.SuppressWarnings("unchecked")
    public <T extends Message> T unpack(Class<T> clazz) throws InvalidProtocolBufferException {
        if (!is(clazz)) {
            throw new InvalidProtocolBufferException("Type of the Any message does not match the given class.");
        }
        if (cachedUnpackValue != null) {
            return (T) cachedUnpackValue;
        }
        T defaultInstance = com.google.protobuf.Internal.getDefaultInstance(clazz);
        T result = (T) defaultInstance.getParserForType().parseFrom(getValue());
        cachedUnpackValue = result;
        return result;
    }
    ...
}

Any has two fields: typeUrl and value.

The value saved by typeUrl is the description type of Message class. The Message of the original proto file has the value of package. For example, the typeUrl of any is type.googleapis.com/google.protobuf.Any. Value is ByteString of Message object saved in any object, which is obtained by calling method toByteString(). Once you know the information, you can make a new one.

Custom AnyData

common/any_data.proto

syntax = "proto3";

package donespeak.protobuf;

option java_package = "io.gitlab.donespeak.proto.common";
option java_outer_classname = "AnyDataProto";

// https://github.com/protocolbuffers/protobuf/blob/master/src/google/protobuf/any.proto
message AnyData {
    // The value is < package >. < messagename >, such as api.donespeak.cn/data.proto.DataTypeProto
    string type_url = 1;
    // The value is message.toByteString();
    bytes value = 2;
}

Encoding and parsing of AnyData

The custom AnyData is just a common Message class, which needs to implement another tool class of Pack and Unpack.

package io.gitlab.donespeak.javatool.toolprotobuf.anydata;

import com.google.protobuf.Descriptors;
import com.google.protobuf.InvalidProtocolBufferException;
import com.google.protobuf.Message;
import io.gitlab.donespeak.proto.common.AnyDataProto;

public class AnyDataPacker {
    private static final String COMPANY_TYPE_URL_PREFIX = "type.donespeakapi.cn";

    private final AnyDataProto.AnyData anyData;

    public AnyDataPacker(AnyDataProto.AnyData anyData) {
        this.anyData = anyData;
    }

    public static <T extends com.google.protobuf.Message> AnyDataProto.AnyData pack(T message) {
        final String typeUrl = getTypeUrl(message.getDescriptorForType());

        return AnyDataProto.AnyData.newBuilder()
            .setTypeUrl(typeUrl)
            .setValue(message.toByteString())
            .build();
    }

    public static <T extends Message> AnyDataProto.AnyData pack(T message, String typeUrlPrefix) {
        String typeUrl = getTypeUrl(typeUrlPrefix, message.getDescriptorForType());

        return AnyDataProto.AnyData.newBuilder()
            .setTypeUrl(typeUrl)
            .setValue(message.toByteString())
            .build();
    }

    public <T extends Message> boolean is(Class<T> clazz) {
        T defaultInstance = com.google.protobuf.Internal.getDefaultInstance(clazz);
        return getTypeNameFromTypeUrl(anyData.getTypeUrl()).equals(
            defaultInstance.getDescriptorForType().getFullName());
    }

    private static String getTypeNameFromTypeUrl(String typeUrl) {
        int pos = typeUrl.lastIndexOf('/');
        return pos == -1 ? "" : typeUrl.substring(pos + 1);
    }

    private volatile Message cachedUnpackValue;

    public <T extends Message> T unpack(Class<T> clazz) throws InvalidProtocolBufferException {
        if (!is(clazz)) {
            throw new InvalidProtocolBufferException("Type of the Any message does not match the given class.");
        }
        if (cachedUnpackValue != null) {
            return (T) cachedUnpackValue;
        }
        T defaultInstance = com.google.protobuf.Internal.getDefaultInstance(clazz);
        T result = (T) defaultInstance.getParserForType().parseFrom(anyData.getValue());
        cachedUnpackValue = result;
        return result;
    }

    private static String getTypeUrl(final Descriptors.Descriptor descriptor) {
        return getTypeUrl(COMPANY_TYPE_URL_PREFIX, descriptor);
    }

    private static String getTypeUrl(String typeUrlPrefix, Descriptors.Descriptor descriptor) {
        return typeUrlPrefix.endsWith("/")
            ? typeUrlPrefix + descriptor.getFullName()
            : typeUrlPrefix + "/" + descriptor.getFullName();
    }
}

It's easy to see that this class is basically the same as the implementation in google.protobuf.Any. Yes, this class is actually extracted directly from any class. You can also design the unpack mode as static, so this tool class is a complete static tool class. The original implementation is reserved here to make a cache when unpacking. Because Message classes are immutable, such a strategy will work well for multiple unpacks.

Define a lookup utility Class that maps typeUrl and Class

According to the previous description, a unpacking tool is provided here independently to provide more unpacking methods. The utility Class has a static unpacking method, which is called directly without instantiation. The other method requires the help of the MessageTypeLookup Class. The MessageTypeLookup Class is a registered Class that holds the mapping relationship between the Descriptor and Class of the Class Message. The existence of this Class allows all possible Message classes to be registered and then unpacked in a general way, without trying to find the corresponding Class of AnyData.value data.

MessageTypeUnpacker.java

package io.gitlab.donespeak.javatool.toolprotobuf.anydata;

import com.google.protobuf.InvalidProtocolBufferException;
import com.google.protobuf.Message;
import io.gitlab.donespeak.proto.common.AnyDataProto;

public class MessageTypeUnpacker {
    private final MessageTypeLookup messageTypeLookup;

    public MessageTypeUnpacker(MessageTypeLookup messageTypeLookup) {
        this.messageTypeLookup = messageTypeLookup;
    }

    public Message unpack(AnyDataProto.AnyData anyData) throws InvalidProtocolBufferException {
        AnyDataPacker anyDataPacker = new AnyDataPacker(anyData);
        Class<? extends Message> messageClass = messageTypeLookup.lookup(anyData.getTypeUrl());
        return anyDataPacker.unpack(messageClass);
    }

    public static <T extends Message> T unpack(AnyDataProto.AnyData anyData, Class<T> messageClass)
        throws InvalidProtocolBufferException {
        AnyDataPacker anyDataPacker = new AnyDataPacker(anyData);
        return anyDataPacker.unpack(messageClass);
    }
}

The MessageTypeLookup is used to register the mapping relationship between the typeUrl and the Class of Message, so as to find the corresponding Class through the typeUrl.

MessageTypeLookup.java

package io.gitlab.donespeak.javatool.toolprotobuf.anydata;

import com.google.protobuf.Descriptors;
import com.google.protobuf.Message;

import java.util.HashMap;
import java.util.Map;

public class MessageTypeLookup {

    private final Map<String, Class<? extends Message>> TYPE_MESSAGE_CLASS_MAP;

    private MessageTypeLookup(Map<String, Class<? extends Message>> typeMessageClassMap) {
        this.TYPE_MESSAGE_CLASS_MAP = typeMessageClassMap;
    }

    public Class<? extends Message> lookup(final String typeUrl) {
        String type = typeUrl;
        if(type.contains("/")) {
            type = getTypeUrlSuffix(type);
        }
        return TYPE_MESSAGE_CLASS_MAP.get(type);
    }

    public static Builder newBuilder() {
        return new Builder();
    }

    private static String getTypeUrlSuffix(String fullTypeUrl) {
        String[] parts = fullTypeUrl.split("/");
        return parts[parts.length - 1];
    }

    public static class Builder {

        private final Map<String, Class<? extends Message>> TYPE_MESSAGE_CLASS_BUILDER_MAP;

        public Builder() {
            TYPE_MESSAGE_CLASS_BUILDER_MAP = new HashMap<>();
        }

        public Builder addMessageTypeMapping(final Descriptors.Descriptor descriptor,
            final Class<? extends Message> messageClass) {
            TYPE_MESSAGE_CLASS_BUILDER_MAP.put(descriptor.getFullName(), messageClass);
            return this;
        }

        public MessageTypeLookup build() {
            return new MessageTypeLookup(TYPE_MESSAGE_CLASS_BUILDER_MAP);
        }
    }
}

With MessageTypeLookup, all possible messages can be registered in this class in advance, and then unpacked with this class, so that a general implementation of AnyData package and unpacking can be realized. But the registration of this class will be very troublesome. All messages need to be added manually, which is laborious and error prone. Every time a new class is added in the future, it will be very troublesome.

Find the class and its inner class under the specified path

To solve the problem of the above Message type lookup, you can add a method to find the qualified class according to the path of the package. In development, generally all PROTOS are put under a unified package name, so you only need to know the package name, and then scan all classes under the package to find the subclass of GeneratedMessageV3. Then register the result to MessageTypeLookup. After this implementation, even if a new Message class is added, it does not need to be manually added to the MessageTypeLookup to automatically implement the registration.

Find all classes under a package

In order to find all classes under a package, the Reflection library provides many useful Reflection methods. If you want to implement such a Reflection method by yourself, it's really troublesome, and there will be many holes. It will be interesting to have time to further explain Reflection and class loading.

This part is inspired by Spring's @ ComponentScan annotation. Similarly, two scanning methods are provided here, one is the package name prefix, and the other is to specify the package where the class is located as the scanned package. Both allow multiple paths.

<!-- https://mvnrepository.com/artifact/org.reflections/reflections -->
<dependency>
    <groupId>org.reflections</groupId>
    <artifactId>reflections</artifactId>
    <version>0.9.11</version>
</dependency>

ClassScanner.java

package io.gitlab.donespeak.javatool.toolprotobuf.anydata;

import java.util.Set;
import com.google.protobuf.GeneratedMessageV3;
import org.reflections.Reflections;

public class ClassScanner {

    public static <T> Set<Class<? extends T>> lookupClasses(Class<T> subType, String... basePackages) {
        Reflections reflections = new Reflections(basePackages);
        return reflections.getSubTypesOf(subType);
    }

    public static <T> Set<Class<? extends T>> lookupClasses(Class<T> subType, Class<?>... basePackageClasses) {

        String[] basePackages = new String[basePackageClasses.length];
        for(int i = 0; i < basePackageClasses.length; i ++) {
            basePackages[i] = basePackageClasses[i].getPackage().getName();
        }
        return lookupClasses(subType, basePackages);
    }
}

Register the subclass of GeneratedMessageV3 under a package into MessageTypeLookup

After we have the class scan tool class, the requirement of "registering the subclass of GeneratedMessageV3 under a package into MessageTypeLookup" becomes very easy.

With ClassScanner, we can get all the class objects of GeneratedMessageV3 class, and also need to get typeUrl. Because the Message ා getDescriptorForType() method is an object method, after getting the class object of the required class, you need to get an instance with the reflected method, and then call the getDescriptorForType() method to get the typeUrl. It is also known that Message classes are immutable, and all construction methods are private, so they can only be created through Builder classes. Here, we first call the static method Message ා newbuilder() through reflection to create a Builder, and then get the Message instance through Builder. Here, all the work needed is done.

MessageTypeLookupUtil.java

package io.gitlab.donespeak.javatool.toolprotobuf.anydata;

import com.google.protobuf.GeneratedMessageV3;
import com.google.protobuf.Message;

import java.lang.reflect.InvocationTargetException;
import java.util.Set;

public class MessageTypeLookupUtil {

    public static MessageTypeLookup getMessageTypeLookup(String... messageBasePackages) {

        // Here, GeneratedMessageV3 is used as the parent class to prevent classes like com.google.protobuf.AbstractMessage from appearing
        Set<Class<? extends GeneratedMessageV3>>
            klasses = ClassScanner.lookupClasses(GeneratedMessageV3.class, messageBasePackages);

        return generateMessageTypeLookup(klasses);
    }

    private static MessageTypeLookup generateMessageTypeLookup(Set<Class<? extends GeneratedMessageV3>> klasses) {
        MessageTypeLookup.Builder messageTypeLookupBuilder = MessageTypeLookup.newBuilder();
        try {
            for (Class<? extends GeneratedMessageV3> klass : klasses) {
                Message.Builder builder = (Message.Builder)klass.getMethod("newBuilder").invoke(null);
                Message messageV3 = builder.build();
                messageTypeLookupBuilder.addMessageTypeMapping(messageV3.getDescriptorForType(), klass);
            }
        } catch (NoSuchMethodException | IllegalAccessException | InvocationTargetException e) {
            // will never happen
            throw new RuntimeException(e.getMessage(), e);
        }
        return messageTypeLookupBuilder.build();
    }

    public static MessageTypeLookup getMessageTypeLookup(Class<?>... messageBasePackageClasses) {

        // Here, GeneratedMessageV3 is used as the parent class to prevent classes like com.google.protobuf.AbstractMessage from appearing
        Set<Class<? extends GeneratedMessageV3>>
            klasses = ClassScanner.lookupClasses(GeneratedMessageV3.class, messageBasePackageClasses);
        return generateMessageTypeLookup(klasses);
    }
}

Reference resources

Topics: Java Google GitLab github