Coverage for src/io_collection/load/load_tar.py: 100%
16 statements
« prev ^ index » next coverage.py v7.5.1, created at 2024-09-25 19:09 +0000
« prev ^ index » next coverage.py v7.5.1, created at 2024-09-25 19:09 +0000
1import tarfile
2from pathlib import Path
4from io_collection.load.load_buffer import _load_buffer_from_s3
7def load_tar(location: str, key: str) -> tarfile.TarFile:
8 """
9 Load key as tar archive from specified location.
11 Method will load from the S3 bucket if the location begins with the
12 **s3://** protocol, otherwise it assumes the location is a local path.
14 Method currently only supports `xz` compression.
16 Parameters
17 ----------
18 location
19 Object location (local path or S3 bucket).
20 key
21 Object key ending in `.tar.xz`.
23 Returns
24 -------
25 :
26 Loaded tar archive.
27 """
29 if not key.endswith(".tar.xz"):
30 message = f"key [ {key} ] must have [ tar.xz ] extension"
31 raise ValueError(message)
33 if location[:5] == "s3://":
34 return _load_tar_from_s3(location[5:], key)
35 return _load_tar_from_fs(location, key)
38def _load_tar_from_fs(path: str, key: str) -> tarfile.TarFile:
39 """
40 Load key as tar archive from local file system.
42 Parameters
43 ----------
44 path
45 Local object path.
46 key
47 Object key ending in `.tar.xz`.
49 Returns
50 -------
51 :
52 Loaded tar archive.
53 """
55 full_path = Path(path) / key
56 return tarfile.open(full_path, mode="r:xz")
59def _load_tar_from_s3(bucket: str, key: str) -> tarfile.TarFile:
60 """
61 Load key as tar archive from AWS S3 bucket.
63 Parameters
64 ----------
65 bucket
66 AWS S3 bucket name.
67 key
68 Object key ending in `.tar.xz`.
70 Returns
71 -------
72 :
73 Loaded tar archive.
74 """
76 buffer = _load_buffer_from_s3(bucket, key)
77 return tarfile.open(fileobj=buffer, mode="r:xz")