"""
Workflow for processing image samples.
Working location structure:
.. code-block:: bash
(name)
├── plots
│ └── plots.SAMPLE
│ └── (name)_(key).SAMPLE.png
└── samples
├── samples.PROCESSED
│ └── (name)_(key).PROCESSED.csv
└── samples.RAW
└── (name)_(key).RAW.csv
Samples to be processed are loaded from **samples.RAW**. Resulting processed
sample(s) are placed into **samples.PROCESSED** and corresponding contact
sheet(s) are placed into **plots.SAMPLE**. Note that these contact sheet(s) will
overwrite existing contact sheets generated by the sample images task.
"""
from dataclasses import dataclass
from typing import Optional
from abm_initialization_collection.image import plot_contact_sheet
from abm_initialization_collection.sample import (
exclude_selected_ids,
include_selected_ids,
remove_edge_regions,
remove_unconnected_regions,
)
from io_collection.keys import make_key
from io_collection.load import load_dataframe
from io_collection.save import save_dataframe, save_figure
from prefect import flow
# Default distance for removing unconnected regions.
UNCONNECTED_THRESHOLD: float = 2.0
# Default number of edge positions per axis needed to assign edge region.
EDGE_THRESHOLD: int = 1
# Default distance from axis limits to assign edge positions.
EDGE_PADDING: float = 1.0
[docs]@dataclass
class ParametersConfig:
"""Parameter configuration for process sample flow."""
key: str
"""Sample key to process."""
remove_unconnected: bool = True
"""True to remove unconnected regions, False otherwise."""
unconnected_threshold: float = UNCONNECTED_THRESHOLD
"""Distance for removing unconnected regions."""
unconnected_filter: str = "connectivity"
"""Filter type for assigning unconnected coordinates."""
remove_edges: bool = True
"""True to remove cells touching the edge of the bounds, False otherwise."""
edge_threshold: int = EDGE_THRESHOLD
"""Number of edge positions per axis needed to assign edge region."""
edge_padding: float = EDGE_PADDING
"""Distance from axis limits to assign edge positions."""
include_ids: Optional[list[int]] = None
"""List of ids to include."""
exclude_ids: Optional[list[int]] = None
"""List of ids to exclude."""
contact_sheet: bool = True
"""True to save contact sheet of processed samples, False otherwise."""
[docs]@dataclass
class ContextConfig:
"""Context configuration for process sample flow."""
working_location: str
"""Location for input and output files (local path or S3 bucket)."""
[docs]@dataclass
class SeriesConfig:
"""Series configuration for process sample flow."""
name: str
"""Name of the simulation series."""
[docs]@flow(name="process-sample")
def run_flow(context: ContextConfig, series: SeriesConfig, parameters: ParametersConfig) -> None:
"""Main process sample flow."""
item_key = f"{series.name}_{parameters.key}"
sample_key = make_key(series.name, "samples", "samples.RAW", f"{item_key}.RAW.csv")
raw_samples = load_dataframe(context.working_location, sample_key)
processed_samples = raw_samples.copy()
if parameters.remove_unconnected:
processed_samples = remove_unconnected_regions(
processed_samples, parameters.unconnected_threshold, parameters.unconnected_filter
)
if parameters.remove_edges:
processed_samples = remove_edge_regions(
processed_samples, parameters.edge_threshold, parameters.edge_padding
)
if parameters.include_ids is not None:
processed_samples = include_selected_ids(processed_samples, parameters.include_ids)
if parameters.exclude_ids is not None:
processed_samples = exclude_selected_ids(processed_samples, parameters.exclude_ids)
processed_key = make_key(
series.name, "samples", "samples.PROCESSED", f"{item_key}.PROCESSED.csv"
)
save_dataframe(context.working_location, processed_key, processed_samples, index=False)
if parameters.contact_sheet:
contact_sheet = plot_contact_sheet(processed_samples, raw_samples)
plot_key = make_key(series.name, "plots", "plots.SAMPLE", f"{item_key}.SAMPLE.png")
save_figure(context.working_location, plot_key, contact_sheet)