Source code for cell_abm_pipeline.flows.initialize_arcade_simulations

"""
Workflow for initializing ARCADE simulations.

Working location structure:

.. code-block:: bash

    (name)
    ├── images
    │   └── (name)_(key).(extension)
    ├── inits
    │   └── inits.ARCADE
    │       ├── (name)_(key)_(margin)_(resolution).CELLS.json
    │       ├── (name)_(key)_(margin)_(resolution).LOCATIONS.json
    │       └── (name)_(key)_(margin)_(resolution).xml
    ├── plots
    │   └── plots.SAMPLE
    │       └── (name)_(key).SAMPLE.png
    └── samples
        ├── samples.PROCESSED
        │   └── (name)_(key).PROCESSED.csv
        └── samples.RAW
            └── (name)_(key).RAW.csv

Images are loaded from **images**, which are then sampled and processed into
**samples**. ARCADE initialization files are then generated and placed into
**inits.ARCADE**.
"""

import copy
from dataclasses import dataclass, field
from typing import Optional

from arcade_collection.input import (
    convert_to_cells_file,
    convert_to_locations_file,
    generate_setup_file,
    merge_region_samples,
)
from container_collection.docker import (
    create_docker_volume,
    remove_docker_volume,
    run_docker_command,
)
from io_collection.keys import check_key, make_key
from io_collection.load import load_dataframe
from io_collection.save import save_json, save_text
from prefect import flow

from cell_abm_pipeline.__config__ import make_dotlist_from_config
from cell_abm_pipeline.flows.process_sample import ContextConfig as ContextConfigProcessSample
from cell_abm_pipeline.flows.process_sample import ParametersConfig as ParametersConfigProcessSample
from cell_abm_pipeline.flows.process_sample import SeriesConfig as SeriesConfigProcessSample
from cell_abm_pipeline.flows.sample_image import ContextConfig as ContextConfigSampleImage
from cell_abm_pipeline.flows.sample_image import ParametersConfig as ParametersConfigSampleImage
from cell_abm_pipeline.flows.sample_image import SeriesConfig as SeriesConfigSampleImage

# Command for running sample image flow.
SAMPLE_IMAGE_COMMAND = ["abmpipe", "sample-image", "::"]

# Command for running process sample flow.
PROCESS_SAMPLE_COMMAND = ["abmpipe", "process-sample", "::"]

# Default volume means and standard deviations in um^3.
VOLUMES: dict[str, tuple[float, float]] = {
    "DEFAULT": (1865.0, 517.0),
    "NUCLEUS": (543.0, 157.0),
}

# Default height means and standard deviations in um.
HEIGHTS: dict[str, tuple[float, float]] = {
    "DEFAULT": (9.75, 2.4),
    "NUCLEUS": (6.86, 1.7),
}

# Default critical volume means and standard deviations in um^3.
CRITICAL_VOLUMES: dict[str, tuple[float, float]] = {
    "DEFAULT": (1300.0, 200.0),
    "NUCLEUS": (400.0, 50.0),
}

# Default critical height means and standard deviations in um.
CRITICAL_HEIGHTS: dict[str, tuple[float, float]] = {
    "DEFAULT": (9.0, 2.0),
    "NUCLEUS": (6.5, 1.5),
}

# Default cell state phase thresholds.
STATE_THRESHOLDS: dict[str, float] = {
    "APOPTOTIC_LATE": 0.25,
    "APOPTOTIC_EARLY": 0.90,
    "PROLIFERATIVE_G1": 1.124,
    "PROLIFERATIVE_S": 1.726,
    "PROLIFERATIVE_G2": 1.969,
    "PROLIFERATIVE_M": 2,
}

# Default list of Cellular Potts Model Hamiltonian terms.
POTTS_TERMS: list[str] = [
    "volume",
    "adhesion",
]


[docs]@dataclass
class ParametersConfigConvertToArcade:
    """Parameter configuration for initialize ARCADE simulations subflow - convert to ARCADE."""

    regions: dict[str, str] = field(default_factory=lambda: {"DEFAULT": "%s"})
    """Subcellular region samples used to initialize voxels."""

    margins: tuple[int, int, int] = (0, 0, 0)
    """Margins around initial voxel positions."""

    volumes: dict = field(default_factory=lambda: VOLUMES)
    """Volume means and standard deviations in um^3."""

    heights: dict = field(default_factory=lambda: HEIGHTS)
    """Height means and standard deviations in um."""

    critical_volumes: dict = field(default_factory=lambda: CRITICAL_VOLUMES)
    """Critical volume means and standard deviations in um^3."""

    critical_heights: dict = field(default_factory=lambda: CRITICAL_HEIGHTS)
    """Critical height means and standard deviations in um."""

    state_thresholds: dict[str, float] = field(default_factory=lambda: STATE_THRESHOLDS)
    """Cell state phase thresholds."""

    potts_terms: list[str] = field(default_factory=lambda: POTTS_TERMS)
    """List of Cellular Potts Model Hamiltonian terms."""


[docs]@dataclass
class ParametersConfig:
    """Parameter configuration for initialize ARCADE simulations flow."""

    image: str
    """Name of pipeline image."""

    resolution: float
    """Distance between samples in um."""

    sample_images: dict[str, ParametersConfigSampleImage]
    """Configs for sample images flow, keyed by region."""

    process_samples: dict[str, ParametersConfigProcessSample]
    """Configs for process samples flow, keyed by region."""

    convert_to_arcade: ParametersConfigConvertToArcade = ParametersConfigConvertToArcade()
    """Convert to ARCADE configuration instance."""


[docs]@dataclass
class ContextConfig:
    """Context configuration for initialize ARCADE simulations flow."""

    working_location: str
    """Location for input and output files (local path or S3 bucket)."""

    reference_location: str
    """Location of reference file (local path or S3 bucket)."""

    access_key_id: Optional[str] = None
    """AWS access key id for accessing S3 in Docker image."""

    secret_access_key: Optional[str] = None
    """AWS secret access key for accessing S3 in Docker image."""


[docs]@dataclass
class SeriesConfig:
    """Series configuration for initialize ARCADE simulations flow."""

    name: str
    """Name of the simulation series."""

    reference_key: str
    """Key for reference file."""

    conditions: list
    """List of series condition dictionaries (must include unique condition "key")."""


[docs]@flow(name="initialize-arcade-simulations")
def run_flow(context: ContextConfig, series: SeriesConfig, parameters: ParametersConfig) -> None:
    """
    Main initialize ARCADE simulations flow.

    Calls the following subflows, in order:

    1. :py:func:`run_flow_sample_images`
    2. :py:func:`run_flow_process_samples`
    3. :py:func:`run_flow_convert_to_arcade`
    """

    run_flow_sample_images(context, series, parameters)

    run_flow_process_samples(context, series, parameters)

    run_flow_convert_to_arcade(context, series, parameters)


[docs]@flow(name="initialize-arcade-simulations_sample-images")
def run_flow_sample_images(
    context: ContextConfig, series: SeriesConfig, parameters: ParametersConfig
) -> None:
    """
    Initialize ARCADE simulations subflow for sampling images.

    Iterate through conditions to sample images for each specified channel. The
    subflow `sample_image` is run via Docker for each condition and channel
    combination by passing in the subflow configuration as a dotlist.
    """

    docker_args = get_docker_arguments(context)

    if context.working_location.startswith("s3://"):
        context_config = ContextConfigSampleImage(working_location=context.working_location)
    else:
        context_config = ContextConfigSampleImage(working_location="/mnt")

    series_config = SeriesConfigSampleImage(name=series.name)

    for fov in series.conditions:
        for _, sample_image in parameters.sample_images.items():
            parameters_config = copy.deepcopy(sample_image)
            parameters_config.key = parameters_config.key % fov["key"]
            parameters_config.resolution = parameters.resolution

            config = {
                "context": context_config,
                "series": series_config,
                "parameters": parameters_config,
            }

            sample_image_command = SAMPLE_IMAGE_COMMAND + make_dotlist_from_config(config)
            run_docker_command(parameters.image, sample_image_command, **docker_args)

    if "volume" in docker_args:
        remove_docker_volume(docker_args["volume"])


[docs]@flow(name="initialize-arcade-simulations_process-samples")
def run_flow_process_samples(
    context: ContextConfig, series: SeriesConfig, parameters: ParametersConfig
) -> None:
    """
    Initialize ARCADE simulations subflow for processing samples.

    Iterate through conditions to process samples for each specified channel.
    The subflow `process_sample` is run via Docker for each condition and
    channel combination by passing in the subflow configuration as a dotlist.
    """
    docker_args = get_docker_arguments(context)

    if context.working_location.startswith("s3://"):
        context_config = ContextConfigProcessSample(working_location=context.working_location)
    else:
        context_config = ContextConfigProcessSample(working_location="/mnt")

    series_config = SeriesConfigProcessSample(name=series.name)
    resolution_key = f"R{round(parameters.resolution * 10):03d}"

    for fov in series.conditions:
        fov_key = fov["key"]

        for _, process_sample in parameters.process_samples.items():
            parameters_config = copy.deepcopy(process_sample)
            parameters_config.key = f"{parameters_config.key % fov_key}_{resolution_key}"

            if "include_ids" in fov:
                parameters_config.include_ids = fov["include_ids"]

            if "exclude_ids" in fov:
                parameters_config.exclude_ids = fov["exclude_ids"]

            config = {
                "context": context_config,
                "series": series_config,
                "parameters": parameters_config,
            }

            process_sample_command = PROCESS_SAMPLE_COMMAND + make_dotlist_from_config(config)
            run_docker_command(parameters.image, process_sample_command, **docker_args)

    if "volume" in docker_args:
        remove_docker_volume(docker_args["volume"])


[docs]@flow(name="initialize-arcade-simulations_convert-to-arcade")
def run_flow_convert_to_arcade(
    context: ContextConfig, series: SeriesConfig, parameters: ParametersConfig
) -> None:
    """
    Initialize ARCADE simulations subflow for converting to ARCADE.

    Converted processed samples into the ARCADE .CELLS and .LOCATIONS formats,
    along with a basic simulation setup XML file.
    """

    samples_key = make_key(series.name, "samples", "samples.PROCESSED")
    inits_key = make_key(series.name, "inits", "inits.ARCADE")

    resolution = parameters.resolution
    resolution_key = f"R{round(resolution * 10):03d}"

    if check_key(context.reference_location, series.reference_key):
        reference = load_dataframe(context.reference_location, series.reference_key)

        volume_columns = [column for column in reference.columns if "volume" in column]
        reference[volume_columns] = reference[volume_columns] / resolution**3

        height_columns = [column for column in reference.columns if "height" in column]
        reference[height_columns] = reference[height_columns] / resolution
    else:
        reference = None

    volumes = {
        region: (values[0] / resolution**3, values[1] / resolution**3)
        for region, values in parameters.convert_to_arcade.volumes.items()
    }
    heights = {
        region: (values[0] / resolution, values[1] / resolution)
        for region, values in parameters.convert_to_arcade.heights.items()
    }

    critical_volumes: dict[str, tuple[float, float]] = {
        region: (values[0] / resolution**3, values[1] / resolution**3)
        for region, values in parameters.convert_to_arcade.critical_volumes.items()
    }
    critical_heights: dict[str, tuple[float, float]] = {
        region: (values[0] / resolution, values[1] / resolution)
        for region, values in parameters.convert_to_arcade.critical_heights.items()
    }

    for fov in series.conditions:
        samples = {}

        for region, region_key_template in parameters.convert_to_arcade.regions.items():
            region_key = region_key_template % fov["key"]
            key = make_key(
                samples_key, f"{series.name}_{region_key}_{resolution_key}.PROCESSED.csv"
            )
            samples[region] = load_dataframe(context.working_location, key)

        margins = fov["margins"] if "margins" in fov else parameters.convert_to_arcade.margins
        merged_samples = merge_region_samples(samples, margins)
        x, y, z = margins
        key = f"{series.name}_{fov['key']}_X{x:03d}_Y{y:03d}_Z{z:03d}_{resolution_key}"

        cells = convert_to_cells_file(
            merged_samples,
            reference[reference["KEY"] == fov["key"]],
            volumes,
            heights,
            critical_volumes,
            critical_heights,
            parameters.convert_to_arcade.state_thresholds,
        )
        cells_key = make_key(inits_key, f"{key}.CELLS.json")
        save_json(context.working_location, cells_key, cells)

        locations = convert_to_locations_file(merged_samples)
        locations_key = make_key(inits_key, f"{key}.LOCATIONS.json")
        save_json(context.working_location, locations_key, locations)

        setup = generate_setup_file(
            merged_samples, margins, parameters.convert_to_arcade.potts_terms
        )
        setup_key = make_key(inits_key, f"{key}.xml")
        save_text(context.working_location, setup_key, setup)


[docs]def get_docker_arguments(context: ContextConfig) -> dict:
    """Compile Docker arguments for the given context."""

    if context.working_location.startswith("s3://"):
        environment = []

        if context.access_key_id is not None:
            environment.append(f"AWS_ACCESS_KEY_ID={context.access_key_id}")

        if context.secret_access_key is not None:
            environment.append(f"AWS_SECRET_ACCESS_KEY={context.secret_access_key}")

        docker_args = {"environment": environment}
    else:
        volume = create_docker_volume(context.working_location)
        docker_args = {"volume": volume}

    return docker_args