Coverage for src/cell_abm_pipeline/flows/calculate_properties.py: 0%
70 statements
« prev ^ index » next coverage.py v7.1.0, created at 2024-06-05 19:14 +0000
« prev ^ index » next coverage.py v7.1.0, created at 2024-06-05 19:14 +0000
1"""
2Workflow for calculating shape properties.
4Working location structure:
6.. code-block:: bash
8 (name)
9 ├── data
10 │ └── data.LOCATIONS
11 │ └── (name)_(key)_(seed).LOCATIONS.tar.xz
12 └── calculations
13 └── calculations.PROPERTIES
14 ├── (name)_(key)_(seed)_(tick).PROPERTIES.csv
15 └── (name)_(key)_(seed)_(tick)_(region).PROPERTIES.csv
17Data from **data.LOCATIONS** are used to calculate properties, which are saved
18to **calculations.PROPERTIES**.
20If region is specified, the region is included in the output key. For
21calculations with offset but no chunking, the output key extension starts with
22``.(offset).`` to specify the index offset. For calculations with chunking, the
23output key extension starts with ``.(offset).(chunk).`` to specify the index
24offset and chunk size.
25"""
27from dataclasses import dataclass, field
28from typing import Optional
30import pandas as pd
31from abm_shape_collection import get_shape_properties, make_voxels_array
32from arcade_collection.output import extract_tick_json, get_location_voxels
33from io_collection.keys import make_key
34from io_collection.load import load_tar
35from io_collection.save import save_dataframe
36from prefect import flow
38SHAPE_PROPERTIES = [
39 "area",
40 "axis_major_length",
41 "axis_minor_length",
42 "eccentricity",
43 "orientation",
44 "perimeter",
45 "extent",
46 "solidity",
47]
50@dataclass
51class ParametersConfig:
52 """Parameter configuration for calculate properties flow."""
54 key: str
55 """Simulation key to calculate."""
57 seed: int
58 """Simulation random seed to calculate."""
60 tick: int
61 """Simulation tick to calculate."""
63 offset: int = 0
64 """Index offset for skipped calculations."""
66 chunk: Optional[int] = None
67 """Number of indices to calculate, starting from offset."""
69 region: Optional[str] = None
70 """Subcellular region to calculate."""
72 properties: list[str] = field(default_factory=lambda: SHAPE_PROPERTIES)
73 """List of shape properties to calculate."""
76@dataclass
77class ContextConfig:
78 """Context configuration for calculate properties flow."""
80 working_location: str
81 """Location for input and output files (local path or S3 bucket)."""
84@dataclass
85class SeriesConfig:
86 """Series configuration for calculate properties flow."""
88 name: str
89 """Name of the simulation series."""
92@flow(name="calculate-properties")
93def run_flow(context: ContextConfig, series: SeriesConfig, parameters: ParametersConfig) -> None:
94 """Main calculate properties flow."""
96 data_key = make_key(series.name, "data", "data.LOCATIONS")
97 calc_key = make_key(series.name, "calculations", "calculations.PROPERTIES")
98 series_key = f"{series.name}_{parameters.key}_{parameters.seed:04d}"
100 locations_key = make_key(data_key, f"{series_key}.LOCATIONS.tar.xz")
101 locations_tar = load_tar(context.working_location, locations_key)
102 locations_json = extract_tick_json(locations_tar, series_key, parameters.tick, "LOCATIONS")
104 all_props = []
106 count = 0
108 for i, location in enumerate(locations_json):
109 if i < parameters.offset:
110 continue
112 count = count + 1
113 if parameters.chunk is not None and count > parameters.chunk:
114 break
116 voxels = get_location_voxels(location, parameters.region)
118 if len(voxels) == 0:
119 continue
121 array = make_voxels_array(voxels)
122 props = get_shape_properties(array, parameters.properties)
124 props["KEY"] = parameters.key
125 props["ID"] = location["id"]
126 props["SEED"] = parameters.seed
127 props["TICK"] = parameters.tick
129 all_props.append(props)
131 props_dataframe = pd.DataFrame(all_props)
133 chunk_key = ""
134 offset_key = f".{parameters.offset:04d}" if parameters.offset > 0 else ""
136 if parameters.chunk is not None:
137 chunk_key = f".{parameters.chunk:04d}"
138 offset_key = f".{parameters.offset:04d}"
140 region_key = f"_{parameters.region}" if parameters.region is not None else ""
141 suffix = f"{region_key}{offset_key}{chunk_key}"
143 props_key = make_key(calc_key, f"{series_key}_{parameters.tick:06d}{suffix}.PROPERTIES.csv")
144 save_dataframe(context.working_location, props_key, props_dataframe, index=False)