Source code for io_collection.load.load_dataframe
from __future__ import annotations
from pathlib import Path
import pandas as pd
from io_collection.load.load_buffer import _load_buffer_from_s3
[docs]
def load_dataframe(
location: str, key: str, **kwargs: int | str | list | dict | bool
) -> pd.DataFrame:
"""
Load key as dataframe from specified location.
Method will load from the S3 bucket if the location begins with the
**s3://** protocol, otherwise it assumes the location is a local path.
Parameters
----------
location
Object location (local path or S3 bucket).
key
Object key ending in `.csv`.
**kwargs
Additional parameters for loading dataframe. The keyword arguments are
passed to `pandas.read_csv`.
Returns
-------
:
Loaded dataframe.
"""
if not key.endswith(".csv"):
message = f"key [ {key} ] must have [ csv ] extension"
raise ValueError(message)
if location[:5] == "s3://":
return _load_dataframe_from_s3(location[5:], key, **kwargs)
return _load_dataframe_from_fs(location, key, **kwargs)
def _load_dataframe_from_fs(
path: str, key: str, **kwargs: int | str | list | dict | bool
) -> pd.DataFrame:
"""
Load key as dataframe from local file system.
Parameters
----------
path
Local object path.
key
Object key ending in `.csv`.
**kwargs
Additional parameters for loading dataframe. The keyword arguments are
passed to `pandas.read_csv`.
Returns
-------
:
Loaded dataframe.
"""
full_path = Path(path) / key
return pd.read_csv(full_path, **kwargs)
def _load_dataframe_from_s3(
bucket: str, key: str, **kwargs: int | str | list | dict | bool
) -> pd.DataFrame:
"""
Load key as dataframe from AWS S3 bucket.
Parameters
----------
bucket
AWS S3 bucket name.
key
Object key ending in `.csv`.
**kwargs
Additional parameters for loading dataframe. The keyword arguments are
passed to `pandas.read_csv`.
Returns
-------
:
Loaded dataframe.
"""
buffer = _load_buffer_from_s3(bucket, key)
return pd.read_csv(buffer, **kwargs)