Coverage for src/io_collection/load/load_tar.py: 100%

16 statements  

« prev     ^ index     » next       coverage.py v7.5.1, created at 2024-09-25 19:09 +0000

1import tarfile 

2from pathlib import Path 

3 

4from io_collection.load.load_buffer import _load_buffer_from_s3 

5 

6 

7def load_tar(location: str, key: str) -> tarfile.TarFile: 

8 """ 

9 Load key as tar archive from specified location. 

10 

11 Method will load from the S3 bucket if the location begins with the 

12 **s3://** protocol, otherwise it assumes the location is a local path. 

13 

14 Method currently only supports `xz` compression. 

15 

16 Parameters 

17 ---------- 

18 location 

19 Object location (local path or S3 bucket). 

20 key 

21 Object key ending in `.tar.xz`. 

22 

23 Returns 

24 ------- 

25 : 

26 Loaded tar archive. 

27 """ 

28 

29 if not key.endswith(".tar.xz"): 

30 message = f"key [ {key} ] must have [ tar.xz ] extension" 

31 raise ValueError(message) 

32 

33 if location[:5] == "s3://": 

34 return _load_tar_from_s3(location[5:], key) 

35 return _load_tar_from_fs(location, key) 

36 

37 

38def _load_tar_from_fs(path: str, key: str) -> tarfile.TarFile: 

39 """ 

40 Load key as tar archive from local file system. 

41 

42 Parameters 

43 ---------- 

44 path 

45 Local object path. 

46 key 

47 Object key ending in `.tar.xz`. 

48 

49 Returns 

50 ------- 

51 : 

52 Loaded tar archive. 

53 """ 

54 

55 full_path = Path(path) / key 

56 return tarfile.open(full_path, mode="r:xz") 

57 

58 

59def _load_tar_from_s3(bucket: str, key: str) -> tarfile.TarFile: 

60 """ 

61 Load key as tar archive from AWS S3 bucket. 

62 

63 Parameters 

64 ---------- 

65 bucket 

66 AWS S3 bucket name. 

67 key 

68 Object key ending in `.tar.xz`. 

69 

70 Returns 

71 ------- 

72 : 

73 Loaded tar archive. 

74 """ 

75 

76 buffer = _load_buffer_from_s3(bucket, key) 

77 return tarfile.open(fileobj=buffer, mode="r:xz")