Coverage for src/cell_abm_pipeline/flows/download_images.py: 0%
45 statements
« prev ^ index » next coverage.py v7.1.0, created at 2024-06-05 19:14 +0000
« prev ^ index » next coverage.py v7.1.0, created at 2024-06-05 19:14 +0000
1"""
2Workflow for downloading images from Quilt.
4Image metadata is loaded from the Quilt package and used to filter for FOVs with
5the specified number of cells. FOVs are selected to meet the specified number of
6FOVs within each cell volume bin. For each selected FOV, the image is downloaded
7to the working location under **images**.
8"""
10from dataclasses import dataclass
12from abm_initialization_collection.image import select_fov_images
13from io_collection.keys import check_key, make_key
14from io_collection.load import load_dataframe
15from io_collection.quilt import load_quilt_package, save_quilt_item
16from prefect import flow
19@dataclass
20class ParametersConfig:
21 """Parameter configuration for download images flow."""
23 cells_per_fov: int
24 """Number of cells per FOV."""
26 bins: list[int]
27 """Cell volume bin boundaries."""
29 counts: list[int]
30 """Number of FOVs to select from each cell volume bin."""
32 quilt_package: str = "aics/hipsc_single_cell_image_dataset"
33 """Name of Quilt package."""
35 quilt_registry: str = "s3://allencell"
36 """Name of Quilt registry."""
39@dataclass
40class ContextConfig:
41 """Context configuration for download images flow."""
43 working_location: str
44 """Location for input and output files (local path or S3 bucket)."""
46 metadata_location: str
47 """Location of metadata file (local path or S3 bucket)."""
50@dataclass
51class SeriesConfig:
52 """Series configuration for download images flow."""
54 name: str
55 """Name of the simulation series."""
57 metadata_key: str
58 """Key for metadata file."""
61@flow(name="download-images")
62def run_flow(context: ContextConfig, series: SeriesConfig, parameters: ParametersConfig) -> None:
63 """Main download images flow."""
65 package = load_quilt_package(parameters.quilt_package, parameters.quilt_registry)
66 key_exists = check_key(context.metadata_location, series.metadata_key)
68 if not key_exists:
69 save_quilt_item(context.metadata_location, series.metadata_key, package, "metadata.csv")
71 metadata = load_dataframe(
72 context.metadata_location,
73 series.metadata_key,
74 usecols=[
75 "CellId",
76 "cell_stage",
77 "outlier",
78 "fov_seg_path",
79 "this_cell_index",
80 "MEM_shape_volume",
81 ],
82 )
84 selected_fovs = select_fov_images(
85 metadata, parameters.cells_per_fov, parameters.bins, parameters.counts
86 )
88 for fov in selected_fovs:
89 print(f"key: {fov['key']}")
90 print(f"include_ids: {', '.join([str(cell_id) for cell_id in fov['cell_ids']])}")
91 fov_key = make_key(series.name, "images", f"{series.name}_{fov['key']}.ome.tiff")
92 key_exists = check_key(context.working_location, fov_key)
94 if not key_exists:
95 save_quilt_item.submit(context.working_location, fov_key, package, fov["item"])