Coverage for src/io_collection/load/load_dataframe.py: 100%
17 statements
« prev ^ index » next coverage.py v7.5.1, created at 2024-09-25 19:09 +0000
« prev ^ index » next coverage.py v7.5.1, created at 2024-09-25 19:09 +0000
1from __future__ import annotations
3from pathlib import Path
5import pandas as pd
7from io_collection.load.load_buffer import _load_buffer_from_s3
10def load_dataframe(
11 location: str, key: str, **kwargs: int | str | list | dict | bool
12) -> pd.DataFrame:
13 """
14 Load key as dataframe from specified location.
16 Method will load from the S3 bucket if the location begins with the
17 **s3://** protocol, otherwise it assumes the location is a local path.
19 Parameters
20 ----------
21 location
22 Object location (local path or S3 bucket).
23 key
24 Object key ending in `.csv`.
25 **kwargs
26 Additional parameters for loading dataframe. The keyword arguments are
27 passed to `pandas.read_csv`.
29 Returns
30 -------
31 :
32 Loaded dataframe.
33 """
35 if not key.endswith(".csv"):
36 message = f"key [ {key} ] must have [ csv ] extension"
37 raise ValueError(message)
39 if location[:5] == "s3://":
40 return _load_dataframe_from_s3(location[5:], key, **kwargs)
41 return _load_dataframe_from_fs(location, key, **kwargs)
44def _load_dataframe_from_fs(
45 path: str, key: str, **kwargs: int | str | list | dict | bool
46) -> pd.DataFrame:
47 """
48 Load key as dataframe from local file system.
50 Parameters
51 ----------
52 path
53 Local object path.
54 key
55 Object key ending in `.csv`.
56 **kwargs
57 Additional parameters for loading dataframe. The keyword arguments are
58 passed to `pandas.read_csv`.
60 Returns
61 -------
62 :
63 Loaded dataframe.
64 """
66 full_path = Path(path) / key
67 return pd.read_csv(full_path, **kwargs)
70def _load_dataframe_from_s3(
71 bucket: str, key: str, **kwargs: int | str | list | dict | bool
72) -> pd.DataFrame:
73 """
74 Load key as dataframe from AWS S3 bucket.
76 Parameters
77 ----------
78 bucket
79 AWS S3 bucket name.
80 key
81 Object key ending in `.csv`.
82 **kwargs
83 Additional parameters for loading dataframe. The keyword arguments are
84 passed to `pandas.read_csv`.
86 Returns
87 -------
88 :
89 Loaded dataframe.
90 """
92 buffer = _load_buffer_from_s3(bucket, key)
93 return pd.read_csv(buffer, **kwargs)