Coverage for src/cell_abm_pipeline/flows/calculate_coefficients.py: 0%
78 statements
« prev ^ index » next coverage.py v7.1.0, created at 2024-06-05 19:14 +0000
« prev ^ index » next coverage.py v7.1.0, created at 2024-06-05 19:14 +0000
1"""
2Workflow for calculating spherical harmonic coefficients.
4Working location structure:
6.. code-block:: bash
8 (name)
9 ├── data
10 │ └── data.LOCATIONS
11 │ └── (name)_(key)_(seed).LOCATIONS.tar.xz
12 └── calculations
13 └── calculations.COEFFICIENTS
14 ├── (name)_(key)_(seed)_(tick).COEFFICIENTS.csv
15 └── (name)_(key)_(seed)_(tick)_(region).COEFFICIENTS.csv
17Data from **data.LOCATIONS** are used to calculate coefficients, which are saved
18to **calculations.COEFFICIENTS**.
20If region is specified, the region is included in the output key. For
21calculations with offset but no chunking, the output key extension starts with
22``.(offset).`` to specify the index offset. For calculations with chunking, the
23output key extension starts with ``.(offset).(chunk).`` to specify the index
24offset and chunk size.
25"""
27from dataclasses import dataclass
28from typing import Optional
30import pandas as pd
31from abm_shape_collection import get_shape_coefficients, make_voxels_array
32from arcade_collection.output import extract_tick_json, get_location_voxels
33from io_collection.keys import make_key
34from io_collection.load import load_tar
35from io_collection.save import save_dataframe
36from prefect import flow
38COEFFICIENT_ORDER = 16
41@dataclass
42class ParametersConfig:
43 """Parameter configuration for calculate coefficients flow."""
45 key: str
46 """Simulation key to calculate."""
48 seed: int
49 """Simulation random seed to calculate."""
51 tick: int
52 """Simulation tick to calculate."""
54 offset: int = 0
55 """Index offset for skipped calculations."""
57 chunk: Optional[int] = None
58 """Number of indices to calculate, starting from offset."""
60 region: Optional[str] = None
61 """Subcellular region to calculate."""
63 scale: int = 1
64 """Rescaling factor for image array."""
66 order: int = COEFFICIENT_ORDER
67 """Order of the spherical harmonics coefficient parametrization."""
70@dataclass
71class ContextConfig:
72 """Context configuration for calculate coefficients flow."""
74 working_location: str
75 """Location for input and output files (local path or S3 bucket)."""
78@dataclass
79class SeriesConfig:
80 """Series configuration for calculate coefficients flow."""
82 name: str
83 """Name of the simulation series."""
86@flow(name="calculate-coefficients")
87def run_flow(context: ContextConfig, series: SeriesConfig, parameters: ParametersConfig) -> None:
88 """Main calculate coefficients flow."""
90 data_key = make_key(series.name, "data", "data.LOCATIONS")
91 calc_key = make_key(series.name, "calculations", "calculations.COEFFICIENTS")
92 series_key = f"{series.name}_{parameters.key}_{parameters.seed:04d}"
94 locations_key = make_key(data_key, f"{series_key}.LOCATIONS.tar.xz")
95 locations_tar = load_tar(context.working_location, locations_key)
96 locations_json = extract_tick_json(locations_tar, series_key, parameters.tick, "LOCATIONS")
98 all_coeffs = []
100 count = 0
102 for i, location in enumerate(locations_json):
103 if i < parameters.offset:
104 continue
106 count = count + 1
107 if parameters.chunk is not None and count > parameters.chunk:
108 break
110 voxels = get_location_voxels(location)
112 if len(voxels) == 0:
113 continue
115 array = make_voxels_array(voxels, None, parameters.scale)
117 if parameters.region is not None:
118 region_voxels = get_location_voxels(location, parameters.region)
120 if len(region_voxels) == 0:
121 continue
123 region_array = make_voxels_array(region_voxels, None, parameters.scale)
124 coeffs = get_shape_coefficients(region_array, array, parameters.order)
125 else:
126 coeffs = get_shape_coefficients(array, array, parameters.order)
128 coeffs["KEY"] = parameters.key
129 coeffs["ID"] = location["id"]
130 coeffs["SEED"] = parameters.seed
131 coeffs["TICK"] = parameters.tick
133 all_coeffs.append(coeffs)
135 coeffs_dataframe = pd.DataFrame(all_coeffs)
137 chunk_key = ""
138 offset_key = f".{parameters.offset:04d}" if parameters.offset > 0 else ""
140 if parameters.chunk is not None:
141 chunk_key = f".{parameters.chunk:04d}"
142 offset_key = f".{parameters.offset:04d}"
144 region_key = f"_{parameters.region}" if parameters.region is not None else ""
145 suffix = f"{region_key}{offset_key}{chunk_key}"
147 coeffs_key = make_key(calc_key, f"{series_key}_{parameters.tick:06d}{suffix}.COEFFICIENTS.csv")
148 save_dataframe(context.working_location, coeffs_key, coeffs_dataframe, index=False)