Coverage for src/cell_abm_pipeline/flows/calculate_positions.py: 0%
40 statements
« prev ^ index » next coverage.py v7.1.0, created at 2024-06-05 19:14 +0000
« prev ^ index » next coverage.py v7.1.0, created at 2024-06-05 19:14 +0000
1"""
2Workflow for calculating voxel positions.
4Working location structure:
6.. code-block:: bash
8 (name)
9 ├── data
10 │ └── data.LOCATIONS
11 │ └── (name)_(key)_(seed).LOCATIONS.tar.xz
12 └── calculations
13 └── calculations.POSITIONS
14 └── (name)_(key)_(seed)_(tick).POSITIONS.csv
16Data from **data.LOCATIONS** are used to calculate positions, which are saved to
17**calculations.POSITIONS**.
18"""
20from dataclasses import dataclass
22import numpy as np
23import pandas as pd
24from arcade_collection.output import extract_tick_json, get_location_voxels
25from io_collection.keys import make_key
26from io_collection.load import load_tar
27from io_collection.save import save_dataframe
28from prefect import flow
31@dataclass
32class ParametersConfig:
33 """Parameter configuration for calculate positions flow."""
35 key: str
36 """Simulation key to calculate."""
38 seed: int
39 """Simulation random seed to calculate."""
41 tick: int
42 """Simulation tick to calculate."""
45@dataclass
46class ContextConfig:
47 """Context configuration for calculate positions flow."""
49 working_location: str
50 """Location for input and output files (local path or S3 bucket)."""
53@dataclass
54class SeriesConfig:
55 """Series configuration for calculate positions flow."""
57 name: str
58 """Name of the simulation series."""
61@flow(name="calculate-positions")
62def run_flow(context: ContextConfig, series: SeriesConfig, parameters: ParametersConfig) -> None:
63 """Main calculate positions flow."""
65 data_key = make_key(series.name, "data", "data.LOCATIONS")
66 calc_key = make_key(series.name, "calculations", "calculations.POSITIONS")
67 series_key = f"{series.name}_{parameters.key}_{parameters.seed:04d}"
69 locations_key = make_key(data_key, f"{series_key}.LOCATIONS.tar.xz")
70 locations_tar = load_tar(context.working_location, locations_key)
71 locations_json = extract_tick_json(locations_tar, series_key, parameters.tick, "LOCATIONS")
73 positions = [
74 [x, y, location["id"]]
75 for location in locations_json
76 for x, y, _ in get_location_voxels.fn(location)
77 ]
78 positions_dataframe = pd.DataFrame(positions, columns=["x", "y", "ids"])
79 positions_unique = (
80 positions_dataframe.groupby(["x", "y"])["ids"]
81 .apply(lambda x: list(np.unique(x)))
82 .reset_index()
83 )
85 positions_unique["KEY"] = parameters.key
86 positions_unique["SEED"] = parameters.seed
87 positions_unique["TICK"] = parameters.tick
89 positions_key = make_key(calc_key, f"{series_key}_{parameters.tick:06d}.POSITIONS.csv")
90 save_dataframe(context.working_location, positions_key, positions_unique, index=False)