Source code for io_collection.save.save_dataframe

from __future__ import annotations

import io
from pathlib import Path
from typing import TYPE_CHECKING

from io_collection.save.save_buffer import _save_buffer_to_s3

if TYPE_CHECKING:
    import pandas as pd


[docs] def save_dataframe( location: str, key: str, dataframe: pd.DataFrame, **kwargs: int | str | list | dict | bool ) -> None: """ Save dataframe to key at specified location. Method will save to the S3 bucket if the location begins with the **s3://** protocol, otherwise it assumes the location is a local path. Parameters ---------- location Object location (local path or S3 bucket). key Object key ending in `.csv`. dataframe Dataframe to save. **kwargs Additional parameters for saving dataframe. The keyword arguments are passed to `pandas.to_csv`. """ if not key.endswith(".csv"): message = f"key [ {key} ] must have [ csv ] extension" raise ValueError(message) if location[:5] == "s3://": _save_dataframe_to_s3(location[5:], key, dataframe, **kwargs) else: _save_dataframe_to_fs(location, key, dataframe, **kwargs)
def _save_dataframe_to_fs( path: str, key: str, dataframe: pd.DataFrame, **kwargs: int | str | list | dict | bool ) -> None: """ Save dataframe to key on local file system. Parameters ---------- path Local object path. key Object key ending in `.csv`. dataframe Dataframe to save. **kwargs Additional parameters for saving dataframe. The keyword arguments are passed to `pandas.to_csv`. """ full_path = Path(path) / key full_path.parent.mkdir(parents=True, exist_ok=True) dataframe.to_csv(full_path, **kwargs) def _save_dataframe_to_s3( bucket: str, key: str, dataframe: pd.DataFrame, **kwargs: int | str | list | dict | bool ) -> None: """ Save dataframe to key in AWS S3 bucket. Parameters ---------- bucket AWS S3 bucket name. key Object key ending in `.csv`. dataframe Dataframe to save. **kwargs Additional parameters for saving dataframe. The keyword arguments are passed to `pandas.to_csv`. """ with io.BytesIO() as buffer: dataframe.to_csv(buffer, **kwargs) _save_buffer_to_s3(bucket, key, buffer, "text/csv")