Source code for cell_abm_pipeline.flows.convert_arcade_format

"""
Workflow for converting ARCADE simulations to other formats.

Working location structure:

.. code-block:: bash

    (name)
    ├── converted
    │   ├── converted.COLORIZER
    │   │   └── (name)_(key)_(seed)
    │   │       ├── feature_(index).json
    │   │       ├── frame_(index).png
    │   │       ├── manifest.json
    │   │       ├── outliers.json
    │   │       ├── times.json
    │   │       └── tracks.json
    │   ├── converted.IMAGE
    │   │   └── (name)_(key)_(seed)_(chunk)_(chunk).IMAGE.ome.tiff
    │   ├── converted.MESH
    │   │   └── (name)_(key)_(seed)_(tick)_(id)_(region).MESH.obj
    │   ├── converted.PROJECTION
    │   │   └── (name)_(key)_(seed)_(tick)_(regions).PROJECTION.png
    │   └── converted.SIMULARIUM
    │       └── (name)_(key)_(seed).simularium
    ├── data
    │   ├── data.CELLS
    │   │   └── (name)_(key)_(seed).CELLS.tar.xz
    │   └── data.LOCATIONS
    │       └── (name)_(key)_(seed).LOCATIONS.tar.xz
    └── results
        └── (name)_(key)_(seed).csv

Different formats use inputs from **results**, **data.CELLS**, and
**data.LOCATIONS**. Formatted data are saved to **converted**.
"""

from dataclasses import dataclass, field

import numpy as np
from arcade_collection.convert import (
    convert_to_colorizer,
    convert_to_images,
    convert_to_meshes,
    convert_to_projection,
    convert_to_simularium_objects,
    convert_to_simularium_shapes,
)
from io_collection.keys import make_key
from io_collection.load import load_dataframe, load_tar
from io_collection.save import save_figure, save_image, save_json, save_text
from prefect import flow

from cell_abm_pipeline.flows.plot_basic_metrics import PHASE_COLORS
from cell_abm_pipeline.flows.plot_cell_shapes import REGION_COLORS

FORMATS: list[str] = [
    "colorizer",
    "images",
    "meshes",
    "projections",
    "simularium_shapes",
    "simularium_objects",
]

COLORIZER_FEATURES: list[str] = [
    "volume",
    "height",
]


[docs]@dataclass class ParametersConfigColorizer: """Parameter configuration for convert ARCADE format flow - colorizer.""" seeds: list[int] = field(default_factory=lambda: [0]) """Simulation seeds to use for converting to colorizer.""" frame_spec: tuple[int, int, int] = (0, 1153, 1152) """Specification for simulation ticks to use for converting to colorizer.""" regions: list[str] = field(default_factory=lambda: ["DEFAULT"]) """List of subcellular regions.""" box: tuple[int, int, int] = field(default_factory=lambda: (1, 1, 1)) """Size of bounding box.""" ds: float = 1.0 """Spatial scaling in units/um.""" dt: float = 1.0 """Temporal scaling in hours/tick.""" chunk_size: int = 500 """Image chunk size.""" features: list[str] = field(default_factory=lambda: COLORIZER_FEATURES) """List of colorizer features."""
[docs]@dataclass class ParametersConfigImages: """Parameter configuration for convert ARCADE format flow - images.""" seeds: list[int] = field(default_factory=lambda: [0]) """Simulation seeds to use for converting to images.""" frame_spec: tuple[int, int, int] = (0, 1153, 1152) """Specification for simulation ticks to use for converting to images.""" regions: list[str] = field(default_factory=lambda: ["DEFAULT"]) """List of subcellular regions.""" box: tuple[int, int, int] = field(default_factory=lambda: (1, 1, 1)) """Size of bounding box.""" chunk_size: int = 500 """Image chunk size.""" binary: bool = False """True to generate binary images, False otherwise.""" separate: bool = False """True to generate separate images for each tick, False otherwise."""
[docs]@dataclass class ParametersConfigMeshes: """Parameter configuration for convert ARCADE format flow - meshes.""" seeds: list[int] = field(default_factory=lambda: [0]) """Simulation seeds to use for converting to meshes.""" frame_spec: tuple[int, int, int] = (0, 1153, 1152) """Specification for simulation ticks to use for converting to meshes.""" regions: list[str] = field(default_factory=lambda: ["DEFAULT"]) """List of subcellular regions.""" box: tuple[int, int, int] = field(default_factory=lambda: (1, 1, 1)) """Size of bounding box.""" invert: bool = False """True if mesh should have inverted faces, False otherwise."""
[docs]@dataclass class ParametersConfigProjections: """Parameter configuration for convert ARCADE format flow - projections.""" seeds: list[int] = field(default_factory=lambda: [0]) """Simulation seeds to use for converting to projections.""" frame_spec: tuple[int, int, int] = (0, 1153, 1152) """Specification for simulation ticks to use for converting to projections.""" regions: list[str] = field(default_factory=lambda: ["DEFAULT"]) """List of subcellular regions.""" box: tuple[int, int, int] = field(default_factory=lambda: (1, 1, 1)) """Size of bounding box.""" ds: float = 1.0 """Spatial scaling in units/um.""" dt: float = 1.0 """Temporal scaling in hours/tick.""" scale: int = 100 """Size of scale bar (in um).""" region_colors: dict[str, str] = field(default_factory=lambda: REGION_COLORS) """Colors for each cell region."""
[docs]@dataclass class ParametersConfigSimulariumShapes: """Parameter configuration for convert ARCADE format flow - simularium shapes.""" seeds: list[int] = field(default_factory=lambda: [0]) """Simulation seeds to use for converting to simularium.""" frame_spec: tuple[int, int, int] = (0, 1153, 1152) """Specification for simulation ticks to use for converting to simularium.""" box: tuple[int, int, int] = field(default_factory=lambda: (1, 1, 1)) """Size of bounding box.""" ds: float = 1.0 """Spatial scaling in units/um.""" dt: float = 1.0 """Temporal scaling in hours/tick.""" phase_colors: dict[str, str] = field(default_factory=lambda: PHASE_COLORS) """Colors for each cell cycle phase.""" resolution: int = 0 """Number of voxels represented by a sphere (0 for single sphere per cell)."""
[docs]@dataclass class ParametersConfigSimulariumObjects: """Parameter configuration for convert ARCADE format flow - simularium objects.""" seeds: list[int] = field(default_factory=lambda: [0]) """Simulation seeds to use for converting to simularium.""" frame_spec: tuple[int, int, int] = (0, 1153, 1152) """Specification for simulation ticks to use for converting to simularium.""" box: tuple[int, int, int] = field(default_factory=lambda: (1, 1, 1)) """Size of bounding box.""" ds: float = 1.0 """Spatial scaling in units/um.""" dt: float = 1.0 """Temporal scaling in hours/tick.""" phase_colors: dict[str, str] = field(default_factory=lambda: PHASE_COLORS) """Colors for each cell cycle phase.""" url: str = "" """URL for object files.""" group_size: int = 1 """Mesh group size."""
[docs]@dataclass class ParametersConfig: """Parameter configuration for convert ARCADE format flow.""" formats: list[str] = field(default_factory=lambda: FORMATS) """List of convert formats.""" colorizer: ParametersConfigColorizer = ParametersConfigColorizer() """Parameters for colorizer subflow.""" images: ParametersConfigImages = ParametersConfigImages() """Parameters for images subflow.""" meshes: ParametersConfigMeshes = ParametersConfigMeshes() """Parameters for meshes subflow.""" projections: ParametersConfigProjections = ParametersConfigProjections() """Parameters for projections subflow.""" simularium_shapes: ParametersConfigSimulariumShapes = ParametersConfigSimulariumShapes() """Parameters for simularium shapes subflow.""" simularium_objects: ParametersConfigSimulariumObjects = ParametersConfigSimulariumObjects() """Parameters for simularium objects subflow."""
[docs]@dataclass class ContextConfig: """Context configuration for convert ARCADE format flow.""" working_location: str """Location for input and output files (local path or S3 bucket)."""
[docs]@dataclass class SeriesConfig: """Series configuration for convert ARCADE format flow.""" name: str """Name of the simulation series.""" conditions: list[dict] """List of series condition dictionaries (must include unique condition "key")."""
[docs]@flow(name="convert-arcade-format") def run_flow(context: ContextConfig, series: SeriesConfig, parameters: ParametersConfig) -> None: """ Main convert ARCADE format flow. Calls the following subflows, if the format is specified: - :py:func:`run_flow_convert_to_colorizer` - :py:func:`run_flow_convert_to_images` - :py:func:`run_flow_convert_to_meshes` - :py:func:`run_flow_convert_to_projections` - :py:func:`run_flow_convert_to_simularium_shapes` - :py:func:`run_flow_convert_to_simularium_objects` """ if "colorizer" in parameters.formats: run_flow_convert_to_colorizer(context, series, parameters.colorizer) if "images" in parameters.formats: run_flow_convert_to_images(context, series, parameters.images) if "meshes" in parameters.formats: run_flow_convert_to_meshes(context, series, parameters.meshes) if "projections" in parameters.formats: run_flow_convert_to_projections(context, series, parameters.projections) if "simularium_shapes" in parameters.formats: run_flow_convert_to_simularium_shapes(context, series, parameters.simularium_shapes) if "simularium_objects" in parameters.formats: run_flow_convert_to_simularium_objects(context, series, parameters.simularium_objects)
[docs]@flow(name="convert-arcade-format_convert-to-colorizer") def run_flow_convert_to_colorizer( context: ContextConfig, series: SeriesConfig, parameters: ParametersConfigColorizer ) -> None: """Convert ARCADE format subflow for colorizer.""" data_key = make_key(series.name, "data", "data.LOCATIONS") converted_key = make_key(series.name, "converted", "converted.COLORIZER") keys = [condition["key"] for condition in series.conditions] for key in keys: for seed in parameters.seeds: series_key = f"{series.name}_{key}_{seed:04d}" tar_key = make_key(data_key, f"{series_key}.LOCATIONS.tar.xz") tar = load_tar(context.working_location, tar_key) chunks = convert_to_images( series_key, tar, parameters.frame_spec, parameters.regions, parameters.box, parameters.chunk_size, binary=False, separate=True, flatten=True, ) for frame_index, (_, _, chunk, _) in enumerate(chunks): image_key = make_key(converted_key, series_key, f"frame_{frame_index}.png") save_image(context.working_location, image_key, chunk) results_key = make_key(series.name, "results", f"{series_key}.csv") results = load_dataframe(context.working_location, results_key) colorizer = convert_to_colorizer( results, parameters.features, parameters.frame_spec, parameters.ds, parameters.dt, parameters.regions, ) manifest_key = make_key(converted_key, series_key, "manifest.json") save_json(context.working_location, manifest_key, colorizer["manifest"]) outliers_key = make_key(converted_key, series_key, "outliers.json") save_json(context.working_location, outliers_key, colorizer["outliers"]) tracks_key = make_key(converted_key, series_key, "tracks.json") save_json(context.working_location, tracks_key, colorizer["tracks"]) times_key = make_key(converted_key, series_key, "times.json") save_json(context.working_location, times_key, colorizer["times"]) for feature_index, feature in enumerate(parameters.features): feature_key = make_key(converted_key, series_key, f"feature_{feature_index}.json") save_json(context.working_location, feature_key, colorizer[feature])
[docs]@flow(name="convert-arcade-format_convert-to-images") def run_flow_convert_to_images( context: ContextConfig, series: SeriesConfig, parameters: ParametersConfigImages ) -> None: """Convert ARCADE format subflow for images.""" data_key = make_key(series.name, "data", "data.LOCATIONS") converted_key = make_key(series.name, "converted", "converted.IMAGE") keys = [condition["key"] for condition in series.conditions] for key in keys: for seed in parameters.seeds: series_key = f"{series.name}_{key}_{seed:04d}" tar_key = make_key(data_key, f"{series_key}.LOCATIONS.tar.xz") tar = load_tar(context.working_location, tar_key) chunks = convert_to_images( series_key, tar, parameters.frame_spec, parameters.regions, parameters.box, parameters.chunk_size, binary=parameters.binary, separate=parameters.separate, flatten=False, ) for i, j, chunk, frame in chunks: chunk_key = f"{i:02d}_{j:02d}.IMAGE.ome.tiff" if frame is None: image_key = make_key(converted_key, f"{series_key}_{chunk_key}") else: image_key = make_key(converted_key, f"{series_key}_{frame:06d}_{chunk_key}") save_image(context.working_location, image_key, chunk)
[docs]@flow(name="convert-arcade-format_convert-to-meshes") def run_flow_convert_to_meshes( context: ContextConfig, series: SeriesConfig, parameters: ParametersConfigMeshes ) -> None: """Convert ARCADE format subflow for meshes.""" data_key = make_key(series.name, "data", "data.LOCATIONS") converted_key = make_key(series.name, "converted", "converted.MESH") keys = [condition["key"] for condition in series.conditions] for key in keys: for seed in parameters.seeds: series_key = f"{series.name}_{key}_{seed:04d}" tar_key = make_key(data_key, f"{series_key}.LOCATIONS.tar.xz") tar = load_tar(context.working_location, tar_key) meshes = convert_to_meshes( series_key, tar, parameters.frame_spec, parameters.regions, parameters.box, parameters.invert, ) for frame, cell_id, region, mesh in meshes: mesh_key = make_key( converted_key, f"{series_key}_{frame:06d}_{cell_id:06d}_{region}.MESH.obj" ) save_text(context.working_location, mesh_key, mesh)
[docs]@flow(name="convert-arcade-format_convert-to-projections") def run_flow_convert_to_projections( context: ContextConfig, series: SeriesConfig, parameters: ParametersConfigProjections ) -> None: """Convert ARCADE format subflow for projections.""" data_key = make_key(series.name, "data", "data.LOCATIONS") converted_key = make_key(series.name, "converted", "converted.PROJECTION") region_key = "_".join(sorted(parameters.regions)) keys = [condition["key"] for condition in series.conditions] for key in keys: for seed in parameters.seeds: series_key = f"{series.name}_{key}_{seed:04d}" tar_key = make_key(data_key, f"{series_key}.LOCATIONS.tar.xz") tar = load_tar(context.working_location, tar_key) for frame in np.arange(*parameters.frame_spec): projection = convert_to_projection( series_key, tar, frame, parameters.regions, parameters.box, parameters.ds, parameters.dt, parameters.scale, parameters.region_colors, ) projection_key = make_key( converted_key, f"{series_key}_{frame:06d}_{region_key}.PROJECTION.png" ) save_figure( context.working_location, projection_key, projection, bbox_inches="tight" )
[docs]@flow(name="convert-arcade-format_convert-to-simularium-shapes") def run_flow_convert_to_simularium_shapes( context: ContextConfig, series: SeriesConfig, parameters: ParametersConfigSimulariumShapes ) -> None: """Convert ARCADE format subflow for Simularium with shapes.""" cells_data_key = make_key(series.name, "data", "data.CELLS") locs_data_key = make_key(series.name, "data", "data.LOCATIONS") converted_key = make_key(series.name, "converted", "converted.SIMULARIUM") keys = [condition["key"] for condition in series.conditions] suffix = f"SHAPES{parameters.resolution}" for key in keys: for seed in parameters.seeds: series_key = f"{series.name}_{key}_{seed:04d}" cells_tar_key = make_key(cells_data_key, f"{series_key}.CELLS.tar.xz") cells_tar = load_tar(context.working_location, cells_tar_key) locs_tar_key = make_key(locs_data_key, f"{series_key}.LOCATIONS.tar.xz") locs_tar = load_tar(context.working_location, locs_tar_key) simularium = convert_to_simularium_shapes( series_key, "potts", {"cells": cells_tar, "locations": locs_tar}, parameters.frame_spec, parameters.box, parameters.ds, parameters.ds, parameters.dt, parameters.phase_colors, parameters.resolution, ) simularium_key = make_key(converted_key, f"{series_key}.{suffix}.simularium") save_text(context.working_location, simularium_key, simularium)
[docs]@flow(name="convert-arcade-format_convert-to-simularium-objects") def run_flow_convert_to_simularium_objects( context: ContextConfig, series: SeriesConfig, parameters: ParametersConfigSimulariumObjects ) -> None: """Convert ARCADE format subflow for Simularium with objects.""" data_key = make_key(series.name, "data", "data.LOCATIONS") converted_key = make_key(series.name, "converted", "converted.SIMULARIUM") keys = [condition["key"] for condition in series.conditions] suffix = f"OBJECTS{parameters.group_size}" regions = ["DEFAULT", "NUCLEUS"] invert = {"DEFAULT": True, "NUCLEUS": False} for key in keys: for seed in parameters.seeds: series_key = f"{series.name}_{key}_{seed:04d}" results_key = make_key(series.name, "results", f"{series_key}.csv") results = load_dataframe(context.working_location, results_key) tar_key = make_key(data_key, f"{series_key}.LOCATIONS.tar.xz") tar = load_tar(context.working_location, tar_key) categories = results[["TICK", "PHASE", "ID"]].rename( columns={"TICK": "FRAME", "PHASE": "CATEGORY"} ) meshes = convert_to_meshes( series_key, tar, parameters.frame_spec, regions, parameters.box, invert, parameters.group_size, categories, ) mesh_path_key = make_key(converted_key, f"{series_key}.{suffix}") for frame, index, region, mesh in meshes: mesh_key = make_key(mesh_path_key, f"{frame:06d}_{region}_{index:03d}.MESH.obj") save_text(context.working_location, mesh_key, mesh) simularium = convert_to_simularium_objects( series_key, "potts", categories, parameters.frame_spec, regions, parameters.box, parameters.ds, parameters.ds, parameters.dt, parameters.phase_colors, parameters.group_size, make_key(parameters.url, mesh_path_key), ) simularium_key = make_key(converted_key, f"{series_key}.{suffix}.simularium") save_text(context.working_location, simularium_key, simularium)