Source code for cell_abm_pipeline.flows.calculate_image_properties
"""
Workflow for calculating shape properties from existing images.
Working location structure:
.. code-block:: bash
(name)
├── results
│ └── (name)_(key)_(seed).csv
└── calculations
└── calculations.PROPERTIES
├── (name)_(key)_(seed)_(tick).PROPERTIES.csv
└── (name)_(key)_(seed)_(tick)_(region).PROPERTIES.csv
Data from **results** are used to specify existing images, which are then used
to calculate properties. Calculations are saved to **calculations.PROPERTIES**.
If region is specified, the region is included in the output key.
"""
from dataclasses import dataclass, field
from typing import Optional
import pandas as pd
from abm_shape_collection import get_shape_properties
from io_collection.keys import make_key
from io_collection.load import load_dataframe, load_image
from io_collection.save import save_dataframe
from prefect import flow
from cell_abm_pipeline.flows.calculate_properties import SHAPE_PROPERTIES
[docs]@dataclass
class ParametersConfig:
"""Parameter configuration for calculate image properties flow."""
key: str
"""Simulation key to calculate."""
seed: int
"""Simulation random seed to calculate."""
tick: int
"""Simulation tick to calculate."""
channel: int
"""Index of channel to calculate."""
region: Optional[str] = None
"""Subcellular region to calculate."""
properties: list[str] = field(default_factory=lambda: SHAPE_PROPERTIES)
"""List of shape properties to calculate."""
[docs]@dataclass
class ContextConfig:
"""Context configuration for calculate image properties flow."""
working_location: str
"""Location for input and output files (local path or S3 bucket)."""
[docs]@dataclass
class SeriesConfig:
"""Series configuration for calculate image properties flow."""
name: str
"""Name of the simulation series."""
[docs]@flow(name="calculate-image-properties")
def run_flow(context: ContextConfig, series: SeriesConfig, parameters: ParametersConfig) -> None:
"""Main calculate image properties flow."""
calc_key = make_key(series.name, "calculations", "calculations.PROPERTIES")
series_key = f"{series.name}_{parameters.key}_{parameters.seed:04d}"
results_key = make_key(series.name, "results", f"{series_key}.csv")
results = load_dataframe(context.working_location, results_key)
all_props = []
for cell_id, image_file in results[results["TICK"] == parameters.tick][["ID", "IMAGE"]].values:
image = load_image("s3://allencell", f"aics/hipsc_single_cell_image_dataset/{image_file}")
array = image.get_image_data("ZYX", T=0, C=parameters.channel)
props = get_shape_properties(array, parameters.properties)
props["KEY"] = parameters.key
props["ID"] = cell_id
props["SEED"] = parameters.seed
props["TICK"] = parameters.tick
all_props.append(props)
props_dataframe = pd.DataFrame(all_props)
region_key = f"_{parameters.region}" if parameters.region is not None else ""
suffix = region_key
props_key = make_key(calc_key, f"{series_key}_{parameters.tick:06d}{suffix}.PROPERTIES.csv")
save_dataframe(context.working_location, props_key, props_dataframe, index=False)