Coverage for src/cell_abm_pipeline/flows/process_sample.py: 0%
61 statements
« prev ^ index » next coverage.py v7.1.0, created at 2024-06-05 19:14 +0000
« prev ^ index » next coverage.py v7.1.0, created at 2024-06-05 19:14 +0000
1"""
2Workflow for processing image samples.
4Working location structure:
6.. code-block:: bash
8 (name)
9 ├── plots
10 │ └── plots.SAMPLE
11 │ └── (name)_(key).SAMPLE.png
12 └── samples
13 ├── samples.PROCESSED
14 │ └── (name)_(key).PROCESSED.csv
15 └── samples.RAW
16 └── (name)_(key).RAW.csv
18Samples to be processed are loaded from **samples.RAW**. Resulting processed
19sample(s) are placed into **samples.PROCESSED** and corresponding contact
20sheet(s) are placed into **plots.SAMPLE**. Note that these contact sheet(s) will
21overwrite existing contact sheets generated by the sample images task.
22"""
24from dataclasses import dataclass
25from typing import Optional
27from abm_initialization_collection.image import plot_contact_sheet
28from abm_initialization_collection.sample import (
29 exclude_selected_ids,
30 include_selected_ids,
31 remove_edge_regions,
32 remove_unconnected_regions,
33)
34from io_collection.keys import make_key
35from io_collection.load import load_dataframe
36from io_collection.save import save_dataframe, save_figure
37from prefect import flow
39# Default distance for removing unconnected regions.
40UNCONNECTED_THRESHOLD: float = 2.0
42# Default number of edge positions per axis needed to assign edge region.
43EDGE_THRESHOLD: int = 1
45# Default distance from axis limits to assign edge positions.
46EDGE_PADDING: float = 1.0
49@dataclass
50class ParametersConfig:
51 """Parameter configuration for process sample flow."""
53 key: str
54 """Sample key to process."""
56 remove_unconnected: bool = True
57 """True to remove unconnected regions, False otherwise."""
59 unconnected_threshold: float = UNCONNECTED_THRESHOLD
60 """Distance for removing unconnected regions."""
62 unconnected_filter: str = "connectivity"
63 """Filter type for assigning unconnected coordinates."""
65 remove_edges: bool = True
66 """True to remove cells touching the edge of the bounds, False otherwise."""
68 edge_threshold: int = EDGE_THRESHOLD
69 """Number of edge positions per axis needed to assign edge region."""
71 edge_padding: float = EDGE_PADDING
72 """Distance from axis limits to assign edge positions."""
74 include_ids: Optional[list[int]] = None
75 """List of ids to include."""
77 exclude_ids: Optional[list[int]] = None
78 """List of ids to exclude."""
80 contact_sheet: bool = True
81 """True to save contact sheet of processed samples, False otherwise."""
84@dataclass
85class ContextConfig:
86 """Context configuration for process sample flow."""
88 working_location: str
89 """Location for input and output files (local path or S3 bucket)."""
92@dataclass
93class SeriesConfig:
94 """Series configuration for process sample flow."""
96 name: str
97 """Name of the simulation series."""
100@flow(name="process-sample")
101def run_flow(context: ContextConfig, series: SeriesConfig, parameters: ParametersConfig) -> None:
102 """Main process sample flow."""
104 item_key = f"{series.name}_{parameters.key}"
105 sample_key = make_key(series.name, "samples", "samples.RAW", f"{item_key}.RAW.csv")
107 raw_samples = load_dataframe(context.working_location, sample_key)
108 processed_samples = raw_samples.copy()
110 if parameters.remove_unconnected:
111 processed_samples = remove_unconnected_regions(
112 processed_samples, parameters.unconnected_threshold, parameters.unconnected_filter
113 )
115 if parameters.remove_edges:
116 processed_samples = remove_edge_regions(
117 processed_samples, parameters.edge_threshold, parameters.edge_padding
118 )
120 if parameters.include_ids is not None:
121 processed_samples = include_selected_ids(processed_samples, parameters.include_ids)
123 if parameters.exclude_ids is not None:
124 processed_samples = exclude_selected_ids(processed_samples, parameters.exclude_ids)
126 processed_key = make_key(
127 series.name, "samples", "samples.PROCESSED", f"{item_key}.PROCESSED.csv"
128 )
129 save_dataframe(context.working_location, processed_key, processed_samples, index=False)
131 if parameters.contact_sheet:
132 contact_sheet = plot_contact_sheet(processed_samples, raw_samples)
133 plot_key = make_key(series.name, "plots", "plots.SAMPLE", f"{item_key}.SAMPLE.png")
134 save_figure(context.working_location, plot_key, contact_sheet)