Coverage for src/cell_abm_pipeline/flows/convert_arcade_format.py: 0%
264 statements
« prev ^ index » next coverage.py v7.1.0, created at 2024-06-05 19:14 +0000
« prev ^ index » next coverage.py v7.1.0, created at 2024-06-05 19:14 +0000
1"""
2Workflow for converting ARCADE simulations to other formats.
4Working location structure:
6.. code-block:: bash
8 (name)
9 ├── converted
10 │ ├── converted.COLORIZER
11 │ │ └── (name)_(key)_(seed)
12 │ │ ├── feature_(index).json
13 │ │ ├── frame_(index).png
14 │ │ ├── manifest.json
15 │ │ ├── outliers.json
16 │ │ ├── times.json
17 │ │ └── tracks.json
18 │ ├── converted.IMAGE
19 │ │ └── (name)_(key)_(seed)_(chunk)_(chunk).IMAGE.ome.tiff
20 │ ├── converted.MESH
21 │ │ └── (name)_(key)_(seed)_(tick)_(id)_(region).MESH.obj
22 │ ├── converted.PROJECTION
23 │ │ └── (name)_(key)_(seed)_(tick)_(regions).PROJECTION.png
24 │ └── converted.SIMULARIUM
25 │ └── (name)_(key)_(seed).simularium
26 ├── data
27 │ ├── data.CELLS
28 │ │ └── (name)_(key)_(seed).CELLS.tar.xz
29 │ └── data.LOCATIONS
30 │ └── (name)_(key)_(seed).LOCATIONS.tar.xz
31 └── results
32 └── (name)_(key)_(seed).csv
34Different formats use inputs from **results**, **data.CELLS**, and
35**data.LOCATIONS**. Formatted data are saved to **converted**.
36"""
38from dataclasses import dataclass, field
40import numpy as np
41from arcade_collection.convert import (
42 convert_to_colorizer,
43 convert_to_images,
44 convert_to_meshes,
45 convert_to_projection,
46 convert_to_simularium_objects,
47 convert_to_simularium_shapes,
48)
49from io_collection.keys import make_key
50from io_collection.load import load_dataframe, load_tar
51from io_collection.save import save_figure, save_image, save_json, save_text
52from prefect import flow
54from cell_abm_pipeline.flows.plot_basic_metrics import PHASE_COLORS
55from cell_abm_pipeline.flows.plot_cell_shapes import REGION_COLORS
57FORMATS: list[str] = [
58 "colorizer",
59 "images",
60 "meshes",
61 "projections",
62 "simularium_shapes",
63 "simularium_objects",
64]
66COLORIZER_FEATURES: list[str] = [
67 "volume",
68 "height",
69]
72@dataclass
73class ParametersConfigColorizer:
74 """Parameter configuration for convert ARCADE format flow - colorizer."""
76 seeds: list[int] = field(default_factory=lambda: [0])
77 """Simulation seeds to use for converting to colorizer."""
79 frame_spec: tuple[int, int, int] = (0, 1153, 1152)
80 """Specification for simulation ticks to use for converting to colorizer."""
82 regions: list[str] = field(default_factory=lambda: ["DEFAULT"])
83 """List of subcellular regions."""
85 box: tuple[int, int, int] = field(default_factory=lambda: (1, 1, 1))
86 """Size of bounding box."""
88 ds: float = 1.0
89 """Spatial scaling in units/um."""
91 dt: float = 1.0
92 """Temporal scaling in hours/tick."""
94 chunk_size: int = 500
95 """Image chunk size."""
97 features: list[str] = field(default_factory=lambda: COLORIZER_FEATURES)
98 """List of colorizer features."""
101@dataclass
102class ParametersConfigImages:
103 """Parameter configuration for convert ARCADE format flow - images."""
105 seeds: list[int] = field(default_factory=lambda: [0])
106 """Simulation seeds to use for converting to images."""
108 frame_spec: tuple[int, int, int] = (0, 1153, 1152)
109 """Specification for simulation ticks to use for converting to images."""
111 regions: list[str] = field(default_factory=lambda: ["DEFAULT"])
112 """List of subcellular regions."""
114 box: tuple[int, int, int] = field(default_factory=lambda: (1, 1, 1))
115 """Size of bounding box."""
117 chunk_size: int = 500
118 """Image chunk size."""
120 binary: bool = False
121 """True to generate binary images, False otherwise."""
123 separate: bool = False
124 """True to generate separate images for each tick, False otherwise."""
127@dataclass
128class ParametersConfigMeshes:
129 """Parameter configuration for convert ARCADE format flow - meshes."""
131 seeds: list[int] = field(default_factory=lambda: [0])
132 """Simulation seeds to use for converting to meshes."""
134 frame_spec: tuple[int, int, int] = (0, 1153, 1152)
135 """Specification for simulation ticks to use for converting to meshes."""
137 regions: list[str] = field(default_factory=lambda: ["DEFAULT"])
138 """List of subcellular regions."""
140 box: tuple[int, int, int] = field(default_factory=lambda: (1, 1, 1))
141 """Size of bounding box."""
143 invert: bool = False
144 """True if mesh should have inverted faces, False otherwise."""
147@dataclass
148class ParametersConfigProjections:
149 """Parameter configuration for convert ARCADE format flow - projections."""
151 seeds: list[int] = field(default_factory=lambda: [0])
152 """Simulation seeds to use for converting to projections."""
154 frame_spec: tuple[int, int, int] = (0, 1153, 1152)
155 """Specification for simulation ticks to use for converting to projections."""
157 regions: list[str] = field(default_factory=lambda: ["DEFAULT"])
158 """List of subcellular regions."""
160 box: tuple[int, int, int] = field(default_factory=lambda: (1, 1, 1))
161 """Size of bounding box."""
163 ds: float = 1.0
164 """Spatial scaling in units/um."""
166 dt: float = 1.0
167 """Temporal scaling in hours/tick."""
169 scale: int = 100
170 """Size of scale bar (in um)."""
172 region_colors: dict[str, str] = field(default_factory=lambda: REGION_COLORS)
173 """Colors for each cell region."""
176@dataclass
177class ParametersConfigSimulariumShapes:
178 """Parameter configuration for convert ARCADE format flow - simularium shapes."""
180 seeds: list[int] = field(default_factory=lambda: [0])
181 """Simulation seeds to use for converting to simularium."""
183 frame_spec: tuple[int, int, int] = (0, 1153, 1152)
184 """Specification for simulation ticks to use for converting to simularium."""
186 box: tuple[int, int, int] = field(default_factory=lambda: (1, 1, 1))
187 """Size of bounding box."""
189 ds: float = 1.0
190 """Spatial scaling in units/um."""
192 dt: float = 1.0
193 """Temporal scaling in hours/tick."""
195 phase_colors: dict[str, str] = field(default_factory=lambda: PHASE_COLORS)
196 """Colors for each cell cycle phase."""
198 resolution: int = 0
199 """Number of voxels represented by a sphere (0 for single sphere per cell)."""
202@dataclass
203class ParametersConfigSimulariumObjects:
204 """Parameter configuration for convert ARCADE format flow - simularium objects."""
206 seeds: list[int] = field(default_factory=lambda: [0])
207 """Simulation seeds to use for converting to simularium."""
209 frame_spec: tuple[int, int, int] = (0, 1153, 1152)
210 """Specification for simulation ticks to use for converting to simularium."""
212 box: tuple[int, int, int] = field(default_factory=lambda: (1, 1, 1))
213 """Size of bounding box."""
215 ds: float = 1.0
216 """Spatial scaling in units/um."""
218 dt: float = 1.0
219 """Temporal scaling in hours/tick."""
221 phase_colors: dict[str, str] = field(default_factory=lambda: PHASE_COLORS)
222 """Colors for each cell cycle phase."""
224 url: str = ""
225 """URL for object files."""
227 group_size: int = 1
228 """Mesh group size."""
231@dataclass
232class ParametersConfig:
233 """Parameter configuration for convert ARCADE format flow."""
235 formats: list[str] = field(default_factory=lambda: FORMATS)
236 """List of convert formats."""
238 colorizer: ParametersConfigColorizer = ParametersConfigColorizer()
239 """Parameters for colorizer subflow."""
241 images: ParametersConfigImages = ParametersConfigImages()
242 """Parameters for images subflow."""
244 meshes: ParametersConfigMeshes = ParametersConfigMeshes()
245 """Parameters for meshes subflow."""
247 projections: ParametersConfigProjections = ParametersConfigProjections()
248 """Parameters for projections subflow."""
250 simularium_shapes: ParametersConfigSimulariumShapes = ParametersConfigSimulariumShapes()
251 """Parameters for simularium shapes subflow."""
253 simularium_objects: ParametersConfigSimulariumObjects = ParametersConfigSimulariumObjects()
254 """Parameters for simularium objects subflow."""
257@dataclass
258class ContextConfig:
259 """Context configuration for convert ARCADE format flow."""
261 working_location: str
262 """Location for input and output files (local path or S3 bucket)."""
265@dataclass
266class SeriesConfig:
267 """Series configuration for convert ARCADE format flow."""
269 name: str
270 """Name of the simulation series."""
272 conditions: list[dict]
273 """List of series condition dictionaries (must include unique condition "key")."""
276@flow(name="convert-arcade-format")
277def run_flow(context: ContextConfig, series: SeriesConfig, parameters: ParametersConfig) -> None:
278 """
279 Main convert ARCADE format flow.
281 Calls the following subflows, if the format is specified:
283 - :py:func:`run_flow_convert_to_colorizer`
284 - :py:func:`run_flow_convert_to_images`
285 - :py:func:`run_flow_convert_to_meshes`
286 - :py:func:`run_flow_convert_to_projections`
287 - :py:func:`run_flow_convert_to_simularium_shapes`
288 - :py:func:`run_flow_convert_to_simularium_objects`
289 """
291 if "colorizer" in parameters.formats:
292 run_flow_convert_to_colorizer(context, series, parameters.colorizer)
294 if "images" in parameters.formats:
295 run_flow_convert_to_images(context, series, parameters.images)
297 if "meshes" in parameters.formats:
298 run_flow_convert_to_meshes(context, series, parameters.meshes)
300 if "projections" in parameters.formats:
301 run_flow_convert_to_projections(context, series, parameters.projections)
303 if "simularium_shapes" in parameters.formats:
304 run_flow_convert_to_simularium_shapes(context, series, parameters.simularium_shapes)
306 if "simularium_objects" in parameters.formats:
307 run_flow_convert_to_simularium_objects(context, series, parameters.simularium_objects)
310@flow(name="convert-arcade-format_convert-to-colorizer")
311def run_flow_convert_to_colorizer(
312 context: ContextConfig, series: SeriesConfig, parameters: ParametersConfigColorizer
313) -> None:
314 """Convert ARCADE format subflow for colorizer."""
316 data_key = make_key(series.name, "data", "data.LOCATIONS")
317 converted_key = make_key(series.name, "converted", "converted.COLORIZER")
318 keys = [condition["key"] for condition in series.conditions]
320 for key in keys:
321 for seed in parameters.seeds:
322 series_key = f"{series.name}_{key}_{seed:04d}"
324 tar_key = make_key(data_key, f"{series_key}.LOCATIONS.tar.xz")
325 tar = load_tar(context.working_location, tar_key)
327 chunks = convert_to_images(
328 series_key,
329 tar,
330 parameters.frame_spec,
331 parameters.regions,
332 parameters.box,
333 parameters.chunk_size,
334 binary=False,
335 separate=True,
336 flatten=True,
337 )
339 for frame_index, (_, _, chunk, _) in enumerate(chunks):
340 image_key = make_key(converted_key, series_key, f"frame_{frame_index}.png")
341 save_image(context.working_location, image_key, chunk)
343 results_key = make_key(series.name, "results", f"{series_key}.csv")
344 results = load_dataframe(context.working_location, results_key)
346 colorizer = convert_to_colorizer(
347 results,
348 parameters.features,
349 parameters.frame_spec,
350 parameters.ds,
351 parameters.dt,
352 parameters.regions,
353 )
355 manifest_key = make_key(converted_key, series_key, "manifest.json")
356 save_json(context.working_location, manifest_key, colorizer["manifest"])
358 outliers_key = make_key(converted_key, series_key, "outliers.json")
359 save_json(context.working_location, outliers_key, colorizer["outliers"])
361 tracks_key = make_key(converted_key, series_key, "tracks.json")
362 save_json(context.working_location, tracks_key, colorizer["tracks"])
364 times_key = make_key(converted_key, series_key, "times.json")
365 save_json(context.working_location, times_key, colorizer["times"])
367 for feature_index, feature in enumerate(parameters.features):
368 feature_key = make_key(converted_key, series_key, f"feature_{feature_index}.json")
369 save_json(context.working_location, feature_key, colorizer[feature])
372@flow(name="convert-arcade-format_convert-to-images")
373def run_flow_convert_to_images(
374 context: ContextConfig, series: SeriesConfig, parameters: ParametersConfigImages
375) -> None:
376 """Convert ARCADE format subflow for images."""
378 data_key = make_key(series.name, "data", "data.LOCATIONS")
379 converted_key = make_key(series.name, "converted", "converted.IMAGE")
380 keys = [condition["key"] for condition in series.conditions]
382 for key in keys:
383 for seed in parameters.seeds:
384 series_key = f"{series.name}_{key}_{seed:04d}"
386 tar_key = make_key(data_key, f"{series_key}.LOCATIONS.tar.xz")
387 tar = load_tar(context.working_location, tar_key)
389 chunks = convert_to_images(
390 series_key,
391 tar,
392 parameters.frame_spec,
393 parameters.regions,
394 parameters.box,
395 parameters.chunk_size,
396 binary=parameters.binary,
397 separate=parameters.separate,
398 flatten=False,
399 )
401 for i, j, chunk, frame in chunks:
402 chunk_key = f"{i:02d}_{j:02d}.IMAGE.ome.tiff"
404 if frame is None:
405 image_key = make_key(converted_key, f"{series_key}_{chunk_key}")
406 else:
407 image_key = make_key(converted_key, f"{series_key}_{frame:06d}_{chunk_key}")
409 save_image(context.working_location, image_key, chunk)
412@flow(name="convert-arcade-format_convert-to-meshes")
413def run_flow_convert_to_meshes(
414 context: ContextConfig, series: SeriesConfig, parameters: ParametersConfigMeshes
415) -> None:
416 """Convert ARCADE format subflow for meshes."""
418 data_key = make_key(series.name, "data", "data.LOCATIONS")
419 converted_key = make_key(series.name, "converted", "converted.MESH")
420 keys = [condition["key"] for condition in series.conditions]
422 for key in keys:
423 for seed in parameters.seeds:
424 series_key = f"{series.name}_{key}_{seed:04d}"
426 tar_key = make_key(data_key, f"{series_key}.LOCATIONS.tar.xz")
427 tar = load_tar(context.working_location, tar_key)
429 meshes = convert_to_meshes(
430 series_key,
431 tar,
432 parameters.frame_spec,
433 parameters.regions,
434 parameters.box,
435 parameters.invert,
436 )
438 for frame, cell_id, region, mesh in meshes:
439 mesh_key = make_key(
440 converted_key, f"{series_key}_{frame:06d}_{cell_id:06d}_{region}.MESH.obj"
441 )
442 save_text(context.working_location, mesh_key, mesh)
445@flow(name="convert-arcade-format_convert-to-projections")
446def run_flow_convert_to_projections(
447 context: ContextConfig, series: SeriesConfig, parameters: ParametersConfigProjections
448) -> None:
449 """Convert ARCADE format subflow for projections."""
451 data_key = make_key(series.name, "data", "data.LOCATIONS")
452 converted_key = make_key(series.name, "converted", "converted.PROJECTION")
453 region_key = "_".join(sorted(parameters.regions))
454 keys = [condition["key"] for condition in series.conditions]
456 for key in keys:
457 for seed in parameters.seeds:
458 series_key = f"{series.name}_{key}_{seed:04d}"
460 tar_key = make_key(data_key, f"{series_key}.LOCATIONS.tar.xz")
461 tar = load_tar(context.working_location, tar_key)
463 for frame in np.arange(*parameters.frame_spec):
464 projection = convert_to_projection(
465 series_key,
466 tar,
467 frame,
468 parameters.regions,
469 parameters.box,
470 parameters.ds,
471 parameters.dt,
472 parameters.scale,
473 parameters.region_colors,
474 )
476 projection_key = make_key(
477 converted_key, f"{series_key}_{frame:06d}_{region_key}.PROJECTION.png"
478 )
479 save_figure(
480 context.working_location, projection_key, projection, bbox_inches="tight"
481 )
484@flow(name="convert-arcade-format_convert-to-simularium-shapes")
485def run_flow_convert_to_simularium_shapes(
486 context: ContextConfig, series: SeriesConfig, parameters: ParametersConfigSimulariumShapes
487) -> None:
488 """Convert ARCADE format subflow for Simularium with shapes."""
490 cells_data_key = make_key(series.name, "data", "data.CELLS")
491 locs_data_key = make_key(series.name, "data", "data.LOCATIONS")
492 converted_key = make_key(series.name, "converted", "converted.SIMULARIUM")
493 keys = [condition["key"] for condition in series.conditions]
495 suffix = f"SHAPES{parameters.resolution}"
497 for key in keys:
498 for seed in parameters.seeds:
499 series_key = f"{series.name}_{key}_{seed:04d}"
501 cells_tar_key = make_key(cells_data_key, f"{series_key}.CELLS.tar.xz")
502 cells_tar = load_tar(context.working_location, cells_tar_key)
504 locs_tar_key = make_key(locs_data_key, f"{series_key}.LOCATIONS.tar.xz")
505 locs_tar = load_tar(context.working_location, locs_tar_key)
507 simularium = convert_to_simularium_shapes(
508 series_key,
509 "potts",
510 {"cells": cells_tar, "locations": locs_tar},
511 parameters.frame_spec,
512 parameters.box,
513 parameters.ds,
514 parameters.ds,
515 parameters.dt,
516 parameters.phase_colors,
517 parameters.resolution,
518 )
520 simularium_key = make_key(converted_key, f"{series_key}.{suffix}.simularium")
521 save_text(context.working_location, simularium_key, simularium)
524@flow(name="convert-arcade-format_convert-to-simularium-objects")
525def run_flow_convert_to_simularium_objects(
526 context: ContextConfig, series: SeriesConfig, parameters: ParametersConfigSimulariumObjects
527) -> None:
528 """Convert ARCADE format subflow for Simularium with objects."""
530 data_key = make_key(series.name, "data", "data.LOCATIONS")
531 converted_key = make_key(series.name, "converted", "converted.SIMULARIUM")
532 keys = [condition["key"] for condition in series.conditions]
534 suffix = f"OBJECTS{parameters.group_size}"
535 regions = ["DEFAULT", "NUCLEUS"]
536 invert = {"DEFAULT": True, "NUCLEUS": False}
538 for key in keys:
539 for seed in parameters.seeds:
540 series_key = f"{series.name}_{key}_{seed:04d}"
542 results_key = make_key(series.name, "results", f"{series_key}.csv")
543 results = load_dataframe(context.working_location, results_key)
545 tar_key = make_key(data_key, f"{series_key}.LOCATIONS.tar.xz")
546 tar = load_tar(context.working_location, tar_key)
548 categories = results[["TICK", "PHASE", "ID"]].rename(
549 columns={"TICK": "FRAME", "PHASE": "CATEGORY"}
550 )
552 meshes = convert_to_meshes(
553 series_key,
554 tar,
555 parameters.frame_spec,
556 regions,
557 parameters.box,
558 invert,
559 parameters.group_size,
560 categories,
561 )
563 mesh_path_key = make_key(converted_key, f"{series_key}.{suffix}")
565 for frame, index, region, mesh in meshes:
566 mesh_key = make_key(mesh_path_key, f"{frame:06d}_{region}_{index:03d}.MESH.obj")
567 save_text(context.working_location, mesh_key, mesh)
569 simularium = convert_to_simularium_objects(
570 series_key,
571 "potts",
572 categories,
573 parameters.frame_spec,
574 regions,
575 parameters.box,
576 parameters.ds,
577 parameters.ds,
578 parameters.dt,
579 parameters.phase_colors,
580 parameters.group_size,
581 make_key(parameters.url, mesh_path_key),
582 )
584 simularium_key = make_key(converted_key, f"{series_key}.{suffix}.simularium")
585 save_text(context.working_location, simularium_key, simularium)