Coverage for src/io_collection/save/save_dataframe.py: 100%
20 statements
« prev ^ index » next coverage.py v7.5.1, created at 2024-09-25 19:09 +0000
« prev ^ index » next coverage.py v7.5.1, created at 2024-09-25 19:09 +0000
1from __future__ import annotations
3import io
4from pathlib import Path
5from typing import TYPE_CHECKING
7from io_collection.save.save_buffer import _save_buffer_to_s3
9if TYPE_CHECKING:
10 import pandas as pd
13def save_dataframe(
14 location: str, key: str, dataframe: pd.DataFrame, **kwargs: int | str | list | dict | bool
15) -> None:
16 """
17 Save dataframe to key at specified location.
19 Method will save to the S3 bucket if the location begins with the **s3://**
20 protocol, otherwise it assumes the location is a local path.
22 Parameters
23 ----------
24 location
25 Object location (local path or S3 bucket).
26 key
27 Object key ending in `.csv`.
28 dataframe
29 Dataframe to save.
30 **kwargs
31 Additional parameters for saving dataframe. The keyword arguments are
32 passed to `pandas.to_csv`.
33 """
35 if not key.endswith(".csv"):
36 message = f"key [ {key} ] must have [ csv ] extension"
37 raise ValueError(message)
39 if location[:5] == "s3://":
40 _save_dataframe_to_s3(location[5:], key, dataframe, **kwargs)
41 else:
42 _save_dataframe_to_fs(location, key, dataframe, **kwargs)
45def _save_dataframe_to_fs(
46 path: str, key: str, dataframe: pd.DataFrame, **kwargs: int | str | list | dict | bool
47) -> None:
48 """
49 Save dataframe to key on local file system.
51 Parameters
52 ----------
53 path
54 Local object path.
55 key
56 Object key ending in `.csv`.
57 dataframe
58 Dataframe to save.
59 **kwargs
60 Additional parameters for saving dataframe. The keyword arguments are
61 passed to `pandas.to_csv`.
62 """
64 full_path = Path(path) / key
65 full_path.parent.mkdir(parents=True, exist_ok=True)
66 dataframe.to_csv(full_path, **kwargs)
69def _save_dataframe_to_s3(
70 bucket: str, key: str, dataframe: pd.DataFrame, **kwargs: int | str | list | dict | bool
71) -> None:
72 """
73 Save dataframe to key in AWS S3 bucket.
75 Parameters
76 ----------
77 bucket
78 AWS S3 bucket name.
79 key
80 Object key ending in `.csv`.
81 dataframe
82 Dataframe to save.
83 **kwargs
84 Additional parameters for saving dataframe. The keyword arguments are
85 passed to `pandas.to_csv`.
86 """
88 with io.BytesIO() as buffer:
89 dataframe.to_csv(buffer, **kwargs)
90 _save_buffer_to_s3(bucket, key, buffer, "text/csv")