Source code for cell_abm_pipeline.flows.process_sample

"""
Workflow for processing image samples.

Working location structure:

.. code-block:: bash

    (name)
    ├── plots
    │   └── plots.SAMPLE
    │       └── (name)_(key).SAMPLE.png
    └── samples
        ├── samples.PROCESSED
        │   └── (name)_(key).PROCESSED.csv
        └── samples.RAW
            └── (name)_(key).RAW.csv

Samples to be processed are loaded from **samples.RAW**. Resulting processed
sample(s) are placed into **samples.PROCESSED** and corresponding contact
sheet(s) are placed into **plots.SAMPLE**. Note that these contact sheet(s) will
overwrite existing contact sheets generated by the sample images task.
"""

from dataclasses import dataclass
from typing import Optional

from abm_initialization_collection.image import plot_contact_sheet
from abm_initialization_collection.sample import (
    exclude_selected_ids,
    include_selected_ids,
    remove_edge_regions,
    remove_unconnected_regions,
)
from io_collection.keys import make_key
from io_collection.load import load_dataframe
from io_collection.save import save_dataframe, save_figure
from prefect import flow

# Default distance for removing unconnected regions.
UNCONNECTED_THRESHOLD: float = 2.0

# Default number of edge positions per axis needed to assign edge region.
EDGE_THRESHOLD: int = 1

# Default distance from axis limits to assign edge positions.
EDGE_PADDING: float = 1.0


[docs]@dataclass class ParametersConfig: """Parameter configuration for process sample flow.""" key: str """Sample key to process.""" remove_unconnected: bool = True """True to remove unconnected regions, False otherwise.""" unconnected_threshold: float = UNCONNECTED_THRESHOLD """Distance for removing unconnected regions.""" unconnected_filter: str = "connectivity" """Filter type for assigning unconnected coordinates.""" remove_edges: bool = True """True to remove cells touching the edge of the bounds, False otherwise.""" edge_threshold: int = EDGE_THRESHOLD """Number of edge positions per axis needed to assign edge region.""" edge_padding: float = EDGE_PADDING """Distance from axis limits to assign edge positions.""" include_ids: Optional[list[int]] = None """List of ids to include.""" exclude_ids: Optional[list[int]] = None """List of ids to exclude.""" contact_sheet: bool = True """True to save contact sheet of processed samples, False otherwise."""
[docs]@dataclass class ContextConfig: """Context configuration for process sample flow.""" working_location: str """Location for input and output files (local path or S3 bucket)."""
[docs]@dataclass class SeriesConfig: """Series configuration for process sample flow.""" name: str """Name of the simulation series."""
[docs]@flow(name="process-sample") def run_flow(context: ContextConfig, series: SeriesConfig, parameters: ParametersConfig) -> None: """Main process sample flow.""" item_key = f"{series.name}_{parameters.key}" sample_key = make_key(series.name, "samples", "samples.RAW", f"{item_key}.RAW.csv") raw_samples = load_dataframe(context.working_location, sample_key) processed_samples = raw_samples.copy() if parameters.remove_unconnected: processed_samples = remove_unconnected_regions( processed_samples, parameters.unconnected_threshold, parameters.unconnected_filter ) if parameters.remove_edges: processed_samples = remove_edge_regions( processed_samples, parameters.edge_threshold, parameters.edge_padding ) if parameters.include_ids is not None: processed_samples = include_selected_ids(processed_samples, parameters.include_ids) if parameters.exclude_ids is not None: processed_samples = exclude_selected_ids(processed_samples, parameters.exclude_ids) processed_key = make_key( series.name, "samples", "samples.PROCESSED", f"{item_key}.PROCESSED.csv" ) save_dataframe(context.working_location, processed_key, processed_samples, index=False) if parameters.contact_sheet: contact_sheet = plot_contact_sheet(processed_samples, raw_samples) plot_key = make_key(series.name, "plots", "plots.SAMPLE", f"{item_key}.SAMPLE.png") save_figure(context.working_location, plot_key, contact_sheet)