Source code for io_collection.load.load_buffer

import io
from pathlib import Path

import boto3

MAX_CONTENT_LENGTH = 2**31 - 1



[docs]
def load_buffer(location: str, key: str) -> io.BytesIO:
    """
    Load key into in-memory bytes buffer from specified location.

    Method will load from the S3 bucket if the location begins with the
    **s3://** protocol, otherwise it assumes the location is a local path.

    Parameters
    ----------
    location
        Object location (local path or S3 bucket).
    key
        Object key.

    Returns
    -------
    :
        Loaded object buffer.
    """

    if location[:5] == "s3://":
        return _load_buffer_from_s3(location[5:], key)
    return _load_buffer_from_fs(location, key)



def _load_buffer_from_fs(path: str, key: str) -> io.BytesIO:
    """
    Load key into in-memory bytes buffer from local file system.

    Parameters
    ----------
    path
        Local object path.
    key
        Object key.

    Returns
    -------
    :
        Loaded object buffer.
    """

    full_path = Path(path) / key
    with full_path.open("rb") as fileobj:
        return io.BytesIO(fileobj.read())


def _load_buffer_from_s3(bucket: str, key: str) -> io.BytesIO:
    """
    Load key into in-memory bytes buffer from AWS S3 bucket.

    Objects larger than `MAX_CONTENT_LENGTH` are loaded in chunks.

    Parameters
    ----------
    bucket
        AWS S3 bucket name.
    key
        Object key.

    Returns
    -------
    :
        Loaded object buffer.
    """

    s3_client = boto3.client("s3")
    obj = s3_client.get_object(Bucket=bucket, Key=key)

    # Check if body needs to be loaded in chunks.
    content_length = obj["ContentLength"]

    if content_length > MAX_CONTENT_LENGTH:
        body = bytearray()
        for chunk in obj["Body"].iter_chunks(chunk_size=MAX_CONTENT_LENGTH):
            body += chunk
        return io.BytesIO(body)

    return io.BytesIO(obj["Body"].read())