Coverage for src/cell_abm_pipeline/flows/calculate_image_properties.py: 0%
52 statements
« prev ^ index » next coverage.py v7.1.0, created at 2024-06-05 19:14 +0000
« prev ^ index » next coverage.py v7.1.0, created at 2024-06-05 19:14 +0000
1"""
2Workflow for calculating shape properties from existing images.
4Working location structure:
6.. code-block:: bash
8 (name)
9 ├── results
10 │ └── (name)_(key)_(seed).csv
11 └── calculations
12 └── calculations.PROPERTIES
13 ├── (name)_(key)_(seed)_(tick).PROPERTIES.csv
14 └── (name)_(key)_(seed)_(tick)_(region).PROPERTIES.csv
16Data from **results** are used to specify existing images, which are then used
17to calculate properties. Calculations are saved to **calculations.PROPERTIES**.
19If region is specified, the region is included in the output key.
20"""
22from dataclasses import dataclass, field
23from typing import Optional
25import pandas as pd
26from abm_shape_collection import get_shape_properties
27from io_collection.keys import make_key
28from io_collection.load import load_dataframe, load_image
29from io_collection.save import save_dataframe
30from prefect import flow
32from cell_abm_pipeline.flows.calculate_properties import SHAPE_PROPERTIES
35@dataclass
36class ParametersConfig:
37 """Parameter configuration for calculate image properties flow."""
39 key: str
40 """Simulation key to calculate."""
42 seed: int
43 """Simulation random seed to calculate."""
45 tick: int
46 """Simulation tick to calculate."""
48 channel: int
49 """Index of channel to calculate."""
51 region: Optional[str] = None
52 """Subcellular region to calculate."""
54 properties: list[str] = field(default_factory=lambda: SHAPE_PROPERTIES)
55 """List of shape properties to calculate."""
58@dataclass
59class ContextConfig:
60 """Context configuration for calculate image properties flow."""
62 working_location: str
63 """Location for input and output files (local path or S3 bucket)."""
66@dataclass
67class SeriesConfig:
68 """Series configuration for calculate image properties flow."""
70 name: str
71 """Name of the simulation series."""
74@flow(name="calculate-image-properties")
75def run_flow(context: ContextConfig, series: SeriesConfig, parameters: ParametersConfig) -> None:
76 """Main calculate image properties flow."""
78 calc_key = make_key(series.name, "calculations", "calculations.PROPERTIES")
79 series_key = f"{series.name}_{parameters.key}_{parameters.seed:04d}"
81 results_key = make_key(series.name, "results", f"{series_key}.csv")
82 results = load_dataframe(context.working_location, results_key)
84 all_props = []
86 for cell_id, image_file in results[results["TICK"] == parameters.tick][["ID", "IMAGE"]].values:
87 image = load_image("s3://allencell", f"aics/hipsc_single_cell_image_dataset/{image_file}")
88 array = image.get_image_data("ZYX", T=0, C=parameters.channel)
89 props = get_shape_properties(array, parameters.properties)
91 props["KEY"] = parameters.key
92 props["ID"] = cell_id
93 props["SEED"] = parameters.seed
94 props["TICK"] = parameters.tick
96 all_props.append(props)
98 props_dataframe = pd.DataFrame(all_props)
100 region_key = f"_{parameters.region}" if parameters.region is not None else ""
101 suffix = region_key
103 props_key = make_key(calc_key, f"{series_key}_{parameters.tick:06d}{suffix}.PROPERTIES.csv")
104 save_dataframe(context.working_location, props_key, props_dataframe, index=False)