[Yugong series] January 2022 MinIO file storage server - object operation (Python version)

Posted by wolfrock on Sun, 30 Jan 2022 12:10:02 +0100

What is an object?

Object is the basic unit for MinIO to store Data, also known as MinIO file. The object is composed of Object Meta, user Data and file name (Key). The object is identified by a unique Key inside the storage space. Object Meta information is a set of Key value pairs, which represents some attributes of the object, such as the last modification time, size and other information. At the same time, you can also store some customized information in the meta information.

Objects are stored in a Bucket.

1, Object operation

1. Acquisition of object data

1.1 get data from the offset to the length of the object

# Gets the data of the object.
try:
    response = client.get_object("my-bucket", "my-object")
    # Read data from the response.
finally:
    response.close()
    response.release_conn()

# Get the data of the object with version ID:.
try:
    response = client.get_object(
        "my-bucket", "my-object",
        version_id="dfbd25b3-abec-4184-a4e8-5a35a5c1174d",
    )
    # Read data from the response.
finally:
    response.close()
    response.release_conn()

# Gets the data of the object from the offset and length.
try:
    response = client.get_object(
        "my-bucket", "my-object", offset=512, length=1024,
    )
    # Read data from the response.
finally:
    response.close()
    response.release_conn()

# Get the data of SSE-C encrypted object.
try:
    response = client.get_object(
        "my-bucket", "my-object",
        ssec=SseCustomerKey(b"32byteslongsecretkeymustprovided"),
    )
    # Read data from the response.
finally:
    response.close()
    response.release_conn()

1.2 select the content of the object through SQL expression

with client.select_object_content(
        "my-bucket",
        "my-object.csv",
        SelectRequest(
            "select * from S3Object",
            CSVInputSerialization(),
            CSVOutputSerialization(),
            request_progress=True,
        ),
) as result:
    for data in result.stream():
        print(data.decode())
    print(result.stats())

1.3 get the object information and metadata of the object

# Get object information
result = client.stat_object("my-bucket", "my-object")
print(
    "last-modified: {0}, size: {1}".format(
        result.last_modified, result.size,
    ),
)

# Get the object information of version ID
result = client.stat_object(
    "my-bucket", "my-object",
    version_id="dfbd25b3-abec-4184-a4e8-5a35a5c1174d",
)
print(
    "last-modified: {0}, size: {1}".format(
        result.last_modified, result.size,
    ),
)

# Get the object information encrypted by SSE-C
result = client.stat_object(
    "my-bucket", "my-object",
    ssec=SseCustomerKey(b"32byteslongsecretkeymustprovided"),
)
print(
    "last-modified: {0}, size: {1}".format(
        result.last_modified, result.size,
    ),
)

2. Object creation

2.1 download the data of the object to the file

# Download object data
client.fget_object("my-bucket", "my-object", "my-filename")

# Download data for object with version ID
client.fget_object(
    "my-bucket", "my-object", "my-filename",
    version_id="dfbd25b3-abec-4184-a4e8-5a35a5c1174d",
)

# Download data of SSE-C encrypted object
client.fget_object(
    "my-bucket", "my-object", "my-filename",
    ssec=SseCustomerKey(b"32byteslongsecretkeymustprovided"),
)

2.2 copy object data

from datetime import datetime, timezone
from minio.commonconfig import REPLACE, CopySource

# Copy an object from one bucket to another
result = client.copy_object(
    "my-bucket",
    "my-object",
    CopySource("my-sourcebucket", "my-sourceobject"),
)
print(result.object_name, result.version_id)

# Copy conditional objects
result = client.copy_object(
    "my-bucket",
    "my-object",
    CopySource(
        "my-sourcebucket",
        "my-sourceobject",
        modified_since=datetime(2014, 4, 1, tzinfo=timezone.utc),
    ),
)
print(result.object_name, result.version_id)

# Copy objects from bucket with replacement metadata
metadata = {"test_meta_key": "test_meta_value"}
result = client.copy_object(
    "my-bucket",
    "my-object",
    CopySource("my-sourcebucket", "my-sourceobject"),
    metadata=metadata,
    metadata_directive=REPLACE,
)
print(result.object_name, result.version_id)

2.3 replica combination creation object

from minio.commonconfig import ComposeSource
from minio.sse import SseS3

sources = [
    ComposeSource("my-job-bucket", "my-object-part-one"),
    ComposeSource("my-job-bucket", "my-object-part-two"),
    ComposeSource("my-job-bucket", "my-object-part-three"),
]

#Create my bucket/my object by combining source objects
#name list
result = client.compose_object("my-bucket", "my-object", sources)
print(result.object_name, result.version_id)

#Create my bucket/my object by combining user metadata
#List of source objects.
result = client.compose_object(
    "my-bucket",
    "my-object",
    sources,
    metadata={"test_meta_key": "test_meta_value"},
)
print(result.object_name, result.version_id)

#Using user metadata and
#Server side encryption by combining the source object list.
client.compose_object("my-bucket", "my-object", sources, sse=SseS3())
print(result.object_name, result.version_id)

2.4 uploading local data streams to objects

# Upload data
result = client.put_object(
    "my-bucket", "my-object", io.BytesIO(b"hello"), 5,
)
print(
    "created {0} object; etag: {1}, version-id: {2}".format(
        result.object_name, result.etag, result.version_id,
    ),
)

# Upload data of unknown size
data = urlopen(
    "https://cdn.kernel.org/pub/linux/kernel/v5.x/linux-5.4.81.tar.xz",
)
result = client.put_object(
    "my-bucket", "my-object", data, length=-1, part_size=10*1024*1024,
)
print(
    "created {0} object; etag: {1}, version-id: {2}".format(
        result.object_name, result.etag, result.version_id,
    ),
)

# Upload data with content type of application/csv
result = client.put_object(
    "my-bucket", "my-object", io.BytesIO(b"hello"), 5,
    content_type="application/csv",
)
print(
    "created {0} object; etag: {1}, version-id: {2}".format(
        result.object_name, result.etag, result.version_id,
    ),
)

# Upload data with metadata
result = client.put_object(
    "my-bucket", "my-object", io.BytesIO(b"hello"), 5,
    metadata={"My-Project": "one"},
)
print(
    "created {0} object; etag: {1}, version-id: {2}".format(
        result.object_name, result.etag, result.version_id,
    ),
)

# Upload data using server-side encrypted client key type
result = client.put_object(
    "my-bucket", "my-object", io.BytesIO(b"hello"), 5,
    sse=SseCustomerKey(b"32byteslongsecretkeymustprovided"),
)
print(
    "created {0} object; etag: {1}, version-id: {2}".format(
        result.object_name, result.etag, result.version_id,
    ),
)

# Use KMS type server-side encryption to upload data.
result = client.put_object(
    "my-bucket", "my-object", io.BytesIO(b"hello"), 5,
    sse=SseKMS("KMS-KEY-ID", {"Key1": "Value1", "Key2": "Value2"}),
)
print(
    "created {0} object; etag: {1}, version-id: {2}".format(
        result.object_name, result.etag, result.version_id,
    ),
)

# Use S3 type server-side encryption to upload data.
result = client.put_object(
    "my-bucket", "my-object", io.BytesIO(b"hello"), 5,
    sse=SseS3(),
)
print(
    "created {0} object; etag: {1}, version-id: {2}".format(
        result.object_name, result.etag, result.version_id,
    ),
)

# Upload data with labels, retention and legal hold.
date = datetime.utcnow().replace(
    hour=0, minute=0, second=0, microsecond=0,
) + timedelta(days=30)
tags = Tags(for_object=True)
tags["User"] = "jsmith"
result = client.put_object(
    "my-bucket", "my-object", io.BytesIO(b"hello"), 5,
    tags=tags,
    retention=Retention(GOVERNANCE, date),
    legal_hold=True,
)
print(
    "created {0} object; etag: {1}, version-id: {2}".format(
        result.object_name, result.etag, result.version_id,
    ),
)

# Upload data with progress bar.
result = client.put_object(
    "my-bucket", "my-object", io.BytesIO(b"hello"), 5,
    progress=Progress(),
)
print(
    "created {0} object; etag: {1}, version-id: {2}".format(
        result.object_name, result.etag, result.version_id,
    ),
)

2.5 upload the data in the file to the object in the bucket

# Upload data
result = client.fput_object(
    "my-bucket", "my-object", "my-filename",
)
print(
    "created {0} object; etag: {1}, version-id: {2}".format(
        result.object_name, result.etag, result.version_id,
    ),
)

# Upload data with content type of application/csv
result = client.fput_object(
    "my-bucket", "my-object", "my-filename",
    content_type="application/csv",
)
print(
    "created {0} object; etag: {1}, version-id: {2}".format(
        result.object_name, result.etag, result.version_id,
    ),
)

# Upload data with metadata
result = client.fput_object(
    "my-bucket", "my-object", "my-filename",
    metadata={"My-Project": "one"},
)
print(
    "created {0} object; etag: {1}, version-id: {2}".format(
        result.object_name, result.etag, result.version_id,
    ),
)

# Upload data using server-side encrypted client key type
result = client.fput_object(
    "my-bucket", "my-object", "my-filename",
    sse=SseCustomerKey(b"32byteslongsecretkeymustprovided"),
)
print(
    "created {0} object; etag: {1}, version-id: {2}".format(
        result.object_name, result.etag, result.version_id,
    ),
)

# Use KMS type server-side encryption to upload data.
result = client.fput_object(
    "my-bucket", "my-object", "my-filename",
    sse=SseKMS("KMS-KEY-ID", {"Key1": "Value1", "Key2": "Value2"}),
)
print(
    "created {0} object; etag: {1}, version-id: {2}".format(
        result.object_name, result.etag, result.version_id,
    ),
)

# Use S3 type server-side encryption to upload data.
result = client.fput_object(
    "my-bucket", "my-object", "my-filename",
    sse=SseS3(),
)
print(
    "created {0} object; etag: {1}, version-id: {2}".format(
        result.object_name, result.etag, result.version_id,
    ),
)

# Upload data with labels, retention and legal hold.
date = datetime.utcnow().replace(
    hour=0, minute=0, second=0, microsecond=0,
) + timedelta(days=30)
tags = Tags(for_object=True)
tags["User"] = "jsmith"
result = client.fput_object(
    "my-bucket", "my-object", "my-filename",
    tags=tags,
    retention=Retention(GOVERNANCE, date),
    legal_hold=True,
)
print(
    "created {0} object; etag: {1}, version-id: {2}".format(
        result.object_name, result.etag, result.version_id,
    ),
)

# Upload data with progress bar
result = client.fput_object(
    "my-bucket", "my-object", "my-filename",
    progress=Progress(),
)
print(
    "created {0} object; etag: {1}, version-id: {2}".format(
        result.object_name, result.etag, result.version_id,
    ),
)

3. Object deletion

3.1 removing an object

# Remove object
client.remove_object("my-bucket", "my-object")

# Delete the version of the object
client.remove_object(
    "my-bucket", "my-object",
    version_id="dfbd25b3-abec-4184-a4e8-5a35a5c1174d",
)

3.2 removing multiple objects

# Delete object list
errors = client.remove_objects(
    "my-bucket",
    [
        DeleteObject("my-object1"),
        DeleteObject("my-object2"),
        DeleteObject("my-object3", "13f88b18-8dcd-4c83-88f2-8631fdb6250c"),
    ],
)
for error in errors:
    print("error occured when deleting object", error)

# Recursively delete prefix
delete_object_list = map(
    lambda x: DeleteObject(x.object_name),
    client.list_objects("my-bucket", "my/prefix/", recursive=True),
)
errors = client.remove_objects("my-bucket", delete_object_list)
for error in errors:
    print("error occured when deleting object", error)

2, Object label configuration

1. Delete the label configuration of the object

client.delete_object_tags("my-bucket", "my-object")

2. Get the label configuration of the object

tags = client.get_object_tags("my-bucket", "my-object")

3. Set the label configuration of the object

tags = Tags.new_object_tags()
tags["Project"] = "Project One"
tags["User"] = "jsmith"
client.set_object_tags("my-bucket", "my-object", tags)

3, Object legal reservation

1. Disable legal reservation of objects

client.disable_object_legal_hold("my-bucket", "my-object")

2. Enable legal reservation of objects

client.enable_object_legal_hold("my-bucket", "my-object")

3. Judge whether the legal reservation of the object exists

if client.is_object_legal_hold_enabled("my-bucket", "my-object"):
    print("legal hold is enabled on my-object")
else:
    print("legal hold is not enabled on my-object")

3. Obtain the legal reservation information of the object

config = client.get_object_retention("my-bucket", "my-object")

4. Set the legal reservation information of the object

config = Retention(GOVERNANCE, datetime.utcnow() + timedelta(days=10))
client.set_object_retention("my-bucket", "my-object", config)

4, Object pre signature URL

1. Get the pre signed URL of the object to download its data with expiration time and custom request parameters

#Gets a pre signed URL string to download my objects in
#My bucket expires by default (i.e. 7 days).
url = client.presigned_get_object("my-bucket", "my-object")
print(url)

#Gets a pre signed URL string to download my objects in
#My bucket is two hours overdue.
url = client.presigned_get_object(
    "my-bucket", "my-object", expires=timedelta(hours=2),
)
print(url)

2. Obtain the pre signed URL of the object and upload the data with expiration time and user-defined request parameters

#Gets a pre signed URL string to download my objects in
#My bucket expires by default (i.e. 7 days).
url = client.presigned_put_object("my-bucket", "my-object")
print(url)

#Gets a pre signed URL string to download my objects in
#My bucket is two hours overdue.
url = client.presigned_put_object(
    "my-bucket", "my-object", expires=timedelta(hours=2),
)
print(url)

5, Object PostPolicy

1. Get the form data of the object PostPolicy to upload its data using the POST method

policy = PostPolicy(
    "my-bucket", datetime.utcnow() + timedelta(days=10),
)
policy.add_starts_with_condition("key", "my/object/prefix/")
policy.add_content_length_range_condition(
    1*1024*1024, 10*1024*1024,
)
form_data = client.presigned_post_policy(policy)

6, HTTP method

1. Get the pre signed URL of the object of HTTP method, expiration time and custom request parameters

#Gets a pre signed URL string to delete my objects in
#My bucket is one day overdue.
url = client.get_presigned_url(
    "DELETE",
    "my-bucket",
    "my-object",
    expires=timedelta(days=1),
)
print(url)

#Gets a pre signed URL string to upload my objects in
#Respond to "my bucket" with content type of application/json
#Due in one day.
url = client.get_presigned_url(
    "PUT",
    "my-bucket",
    "my-object",
    expires=timedelta(days=1),
    response_headers={"response-content-type": "application/json"},
)
print(url)

#Gets a pre signed URL string to download my objects in
#My bucket is two hours overdue.
url = client.get_presigned_url(
    "GET",
    "my-bucket",
    "my-object",
    expires=timedelta(hours=2),
)
print(url)

Topics: Python Operation & Maintenance server