Coverage for src/io_collection/load/load_dataframe.py: 100%

17 statements  

« prev     ^ index     » next       coverage.py v7.5.1, created at 2024-09-25 19:09 +0000

1from __future__ import annotations 

2 

3from pathlib import Path 

4 

5import pandas as pd 

6 

7from io_collection.load.load_buffer import _load_buffer_from_s3 

8 

9 

10def load_dataframe( 

11 location: str, key: str, **kwargs: int | str | list | dict | bool 

12) -> pd.DataFrame: 

13 """ 

14 Load key as dataframe from specified location. 

15 

16 Method will load from the S3 bucket if the location begins with the 

17 **s3://** protocol, otherwise it assumes the location is a local path. 

18 

19 Parameters 

20 ---------- 

21 location 

22 Object location (local path or S3 bucket). 

23 key 

24 Object key ending in `.csv`. 

25 **kwargs 

26 Additional parameters for loading dataframe. The keyword arguments are 

27 passed to `pandas.read_csv`. 

28 

29 Returns 

30 ------- 

31 : 

32 Loaded dataframe. 

33 """ 

34 

35 if not key.endswith(".csv"): 

36 message = f"key [ {key} ] must have [ csv ] extension" 

37 raise ValueError(message) 

38 

39 if location[:5] == "s3://": 

40 return _load_dataframe_from_s3(location[5:], key, **kwargs) 

41 return _load_dataframe_from_fs(location, key, **kwargs) 

42 

43 

44def _load_dataframe_from_fs( 

45 path: str, key: str, **kwargs: int | str | list | dict | bool 

46) -> pd.DataFrame: 

47 """ 

48 Load key as dataframe from local file system. 

49 

50 Parameters 

51 ---------- 

52 path 

53 Local object path. 

54 key 

55 Object key ending in `.csv`. 

56 **kwargs 

57 Additional parameters for loading dataframe. The keyword arguments are 

58 passed to `pandas.read_csv`. 

59 

60 Returns 

61 ------- 

62 : 

63 Loaded dataframe. 

64 """ 

65 

66 full_path = Path(path) / key 

67 return pd.read_csv(full_path, **kwargs) 

68 

69 

70def _load_dataframe_from_s3( 

71 bucket: str, key: str, **kwargs: int | str | list | dict | bool 

72) -> pd.DataFrame: 

73 """ 

74 Load key as dataframe from AWS S3 bucket. 

75 

76 Parameters 

77 ---------- 

78 bucket 

79 AWS S3 bucket name. 

80 key 

81 Object key ending in `.csv`. 

82 **kwargs 

83 Additional parameters for loading dataframe. The keyword arguments are 

84 passed to `pandas.read_csv`. 

85 

86 Returns 

87 ------- 

88 : 

89 Loaded dataframe. 

90 """ 

91 

92 buffer = _load_buffer_from_s3(bucket, key) 

93 return pd.read_csv(buffer, **kwargs)