Source code for io_collection.load.load_buffer

import io
from pathlib import Path

import boto3

MAX_CONTENT_LENGTH = 2**31 - 1


[docs] def load_buffer(location: str, key: str) -> io.BytesIO: """ Load key into in-memory bytes buffer from specified location. Method will load from the S3 bucket if the location begins with the **s3://** protocol, otherwise it assumes the location is a local path. Parameters ---------- location Object location (local path or S3 bucket). key Object key. Returns ------- : Loaded object buffer. """ if location[:5] == "s3://": return _load_buffer_from_s3(location[5:], key) return _load_buffer_from_fs(location, key)
def _load_buffer_from_fs(path: str, key: str) -> io.BytesIO: """ Load key into in-memory bytes buffer from local file system. Parameters ---------- path Local object path. key Object key. Returns ------- : Loaded object buffer. """ full_path = Path(path) / key with full_path.open("rb") as fileobj: return io.BytesIO(fileobj.read()) def _load_buffer_from_s3(bucket: str, key: str) -> io.BytesIO: """ Load key into in-memory bytes buffer from AWS S3 bucket. Objects larger than `MAX_CONTENT_LENGTH` are loaded in chunks. Parameters ---------- bucket AWS S3 bucket name. key Object key. Returns ------- : Loaded object buffer. """ s3_client = boto3.client("s3") obj = s3_client.get_object(Bucket=bucket, Key=key) # Check if body needs to be loaded in chunks. content_length = obj["ContentLength"] if content_length > MAX_CONTENT_LENGTH: body = bytearray() for chunk in obj["Body"].iter_chunks(chunk_size=MAX_CONTENT_LENGTH): body += chunk return io.BytesIO(body) return io.BytesIO(obj["Body"].read())