Coverage for src/io_collection/save/save_dataframe.py: 100%

20 statements  

« prev     ^ index     » next       coverage.py v7.5.1, created at 2024-09-25 19:09 +0000

1from __future__ import annotations 

2 

3import io 

4from pathlib import Path 

5from typing import TYPE_CHECKING 

6 

7from io_collection.save.save_buffer import _save_buffer_to_s3 

8 

9if TYPE_CHECKING: 

10 import pandas as pd 

11 

12 

13def save_dataframe( 

14 location: str, key: str, dataframe: pd.DataFrame, **kwargs: int | str | list | dict | bool 

15) -> None: 

16 """ 

17 Save dataframe to key at specified location. 

18 

19 Method will save to the S3 bucket if the location begins with the **s3://** 

20 protocol, otherwise it assumes the location is a local path. 

21 

22 Parameters 

23 ---------- 

24 location 

25 Object location (local path or S3 bucket). 

26 key 

27 Object key ending in `.csv`. 

28 dataframe 

29 Dataframe to save. 

30 **kwargs 

31 Additional parameters for saving dataframe. The keyword arguments are 

32 passed to `pandas.to_csv`. 

33 """ 

34 

35 if not key.endswith(".csv"): 

36 message = f"key [ {key} ] must have [ csv ] extension" 

37 raise ValueError(message) 

38 

39 if location[:5] == "s3://": 

40 _save_dataframe_to_s3(location[5:], key, dataframe, **kwargs) 

41 else: 

42 _save_dataframe_to_fs(location, key, dataframe, **kwargs) 

43 

44 

45def _save_dataframe_to_fs( 

46 path: str, key: str, dataframe: pd.DataFrame, **kwargs: int | str | list | dict | bool 

47) -> None: 

48 """ 

49 Save dataframe to key on local file system. 

50 

51 Parameters 

52 ---------- 

53 path 

54 Local object path. 

55 key 

56 Object key ending in `.csv`. 

57 dataframe 

58 Dataframe to save. 

59 **kwargs 

60 Additional parameters for saving dataframe. The keyword arguments are 

61 passed to `pandas.to_csv`. 

62 """ 

63 

64 full_path = Path(path) / key 

65 full_path.parent.mkdir(parents=True, exist_ok=True) 

66 dataframe.to_csv(full_path, **kwargs) 

67 

68 

69def _save_dataframe_to_s3( 

70 bucket: str, key: str, dataframe: pd.DataFrame, **kwargs: int | str | list | dict | bool 

71) -> None: 

72 """ 

73 Save dataframe to key in AWS S3 bucket. 

74 

75 Parameters 

76 ---------- 

77 bucket 

78 AWS S3 bucket name. 

79 key 

80 Object key ending in `.csv`. 

81 dataframe 

82 Dataframe to save. 

83 **kwargs 

84 Additional parameters for saving dataframe. The keyword arguments are 

85 passed to `pandas.to_csv`. 

86 """ 

87 

88 with io.BytesIO() as buffer: 

89 dataframe.to_csv(buffer, **kwargs) 

90 _save_buffer_to_s3(bucket, key, buffer, "text/csv")