Coverage for src/cell_abm_pipeline/flows/initialize_arcade_simulations.py: 0%
163 statements
« prev ^ index » next coverage.py v7.1.0, created at 2024-06-05 19:14 +0000
« prev ^ index » next coverage.py v7.1.0, created at 2024-06-05 19:14 +0000
1"""
2Workflow for initializing ARCADE simulations.
4Working location structure:
6.. code-block:: bash
8 (name)
9 ├── images
10 │ └── (name)_(key).(extension)
11 ├── inits
12 │ └── inits.ARCADE
13 │ ├── (name)_(key)_(margin)_(resolution).CELLS.json
14 │ ├── (name)_(key)_(margin)_(resolution).LOCATIONS.json
15 │ └── (name)_(key)_(margin)_(resolution).xml
16 ├── plots
17 │ └── plots.SAMPLE
18 │ └── (name)_(key).SAMPLE.png
19 └── samples
20 ├── samples.PROCESSED
21 │ └── (name)_(key).PROCESSED.csv
22 └── samples.RAW
23 └── (name)_(key).RAW.csv
25Images are loaded from **images**, which are then sampled and processed into
26**samples**. ARCADE initialization files are then generated and placed into
27**inits.ARCADE**.
28"""
30import copy
31from dataclasses import dataclass, field
32from typing import Optional
34from arcade_collection.input import (
35 convert_to_cells_file,
36 convert_to_locations_file,
37 generate_setup_file,
38 merge_region_samples,
39)
40from container_collection.docker import (
41 create_docker_volume,
42 remove_docker_volume,
43 run_docker_command,
44)
45from io_collection.keys import check_key, make_key
46from io_collection.load import load_dataframe
47from io_collection.save import save_json, save_text
48from prefect import flow
50from cell_abm_pipeline.__config__ import make_dotlist_from_config
51from cell_abm_pipeline.flows.process_sample import ContextConfig as ContextConfigProcessSample
52from cell_abm_pipeline.flows.process_sample import ParametersConfig as ParametersConfigProcessSample
53from cell_abm_pipeline.flows.process_sample import SeriesConfig as SeriesConfigProcessSample
54from cell_abm_pipeline.flows.sample_image import ContextConfig as ContextConfigSampleImage
55from cell_abm_pipeline.flows.sample_image import ParametersConfig as ParametersConfigSampleImage
56from cell_abm_pipeline.flows.sample_image import SeriesConfig as SeriesConfigSampleImage
58# Command for running sample image flow.
59SAMPLE_IMAGE_COMMAND = ["abmpipe", "sample-image", "::"]
61# Command for running process sample flow.
62PROCESS_SAMPLE_COMMAND = ["abmpipe", "process-sample", "::"]
64# Default volume means and standard deviations in um^3.
65VOLUMES: dict[str, tuple[float, float]] = {
66 "DEFAULT": (1865.0, 517.0),
67 "NUCLEUS": (543.0, 157.0),
68}
70# Default height means and standard deviations in um.
71HEIGHTS: dict[str, tuple[float, float]] = {
72 "DEFAULT": (9.75, 2.4),
73 "NUCLEUS": (6.86, 1.7),
74}
76# Default critical volume means and standard deviations in um^3.
77CRITICAL_VOLUMES: dict[str, tuple[float, float]] = {
78 "DEFAULT": (1300.0, 200.0),
79 "NUCLEUS": (400.0, 50.0),
80}
82# Default critical height means and standard deviations in um.
83CRITICAL_HEIGHTS: dict[str, tuple[float, float]] = {
84 "DEFAULT": (9.0, 2.0),
85 "NUCLEUS": (6.5, 1.5),
86}
88# Default cell state phase thresholds.
89STATE_THRESHOLDS: dict[str, float] = {
90 "APOPTOTIC_LATE": 0.25,
91 "APOPTOTIC_EARLY": 0.90,
92 "PROLIFERATIVE_G1": 1.124,
93 "PROLIFERATIVE_S": 1.726,
94 "PROLIFERATIVE_G2": 1.969,
95 "PROLIFERATIVE_M": 2,
96}
98# Default list of Cellular Potts Model Hamiltonian terms.
99POTTS_TERMS: list[str] = [
100 "volume",
101 "adhesion",
102]
105@dataclass
106class ParametersConfigConvertToArcade:
107 """Parameter configuration for initialize ARCADE simulations subflow - convert to ARCADE."""
109 regions: dict[str, str] = field(default_factory=lambda: {"DEFAULT": "%s"})
110 """Subcellular region samples used to initialize voxels."""
112 margins: tuple[int, int, int] = (0, 0, 0)
113 """Margins around initial voxel positions."""
115 volumes: dict = field(default_factory=lambda: VOLUMES)
116 """Volume means and standard deviations in um^3."""
118 heights: dict = field(default_factory=lambda: HEIGHTS)
119 """Height means and standard deviations in um."""
121 critical_volumes: dict = field(default_factory=lambda: CRITICAL_VOLUMES)
122 """Critical volume means and standard deviations in um^3."""
124 critical_heights: dict = field(default_factory=lambda: CRITICAL_HEIGHTS)
125 """Critical height means and standard deviations in um."""
127 state_thresholds: dict[str, float] = field(default_factory=lambda: STATE_THRESHOLDS)
128 """Cell state phase thresholds."""
130 potts_terms: list[str] = field(default_factory=lambda: POTTS_TERMS)
131 """List of Cellular Potts Model Hamiltonian terms."""
134@dataclass
135class ParametersConfig:
136 """Parameter configuration for initialize ARCADE simulations flow."""
138 image: str
139 """Name of pipeline image."""
141 resolution: float
142 """Distance between samples in um."""
144 sample_images: dict[str, ParametersConfigSampleImage]
145 """Configs for sample images flow, keyed by region."""
147 process_samples: dict[str, ParametersConfigProcessSample]
148 """Configs for process samples flow, keyed by region."""
150 convert_to_arcade: ParametersConfigConvertToArcade = ParametersConfigConvertToArcade()
151 """Convert to ARCADE configuration instance."""
154@dataclass
155class ContextConfig:
156 """Context configuration for initialize ARCADE simulations flow."""
158 working_location: str
159 """Location for input and output files (local path or S3 bucket)."""
161 reference_location: str
162 """Location of reference file (local path or S3 bucket)."""
164 access_key_id: Optional[str] = None
165 """AWS access key id for accessing S3 in Docker image."""
167 secret_access_key: Optional[str] = None
168 """AWS secret access key for accessing S3 in Docker image."""
171@dataclass
172class SeriesConfig:
173 """Series configuration for initialize ARCADE simulations flow."""
175 name: str
176 """Name of the simulation series."""
178 reference_key: str
179 """Key for reference file."""
181 conditions: list
182 """List of series condition dictionaries (must include unique condition "key")."""
185@flow(name="initialize-arcade-simulations")
186def run_flow(context: ContextConfig, series: SeriesConfig, parameters: ParametersConfig) -> None:
187 """
188 Main initialize ARCADE simulations flow.
190 Calls the following subflows, in order:
192 1. :py:func:`run_flow_sample_images`
193 2. :py:func:`run_flow_process_samples`
194 3. :py:func:`run_flow_convert_to_arcade`
195 """
197 run_flow_sample_images(context, series, parameters)
199 run_flow_process_samples(context, series, parameters)
201 run_flow_convert_to_arcade(context, series, parameters)
204@flow(name="initialize-arcade-simulations_sample-images")
205def run_flow_sample_images(
206 context: ContextConfig, series: SeriesConfig, parameters: ParametersConfig
207) -> None:
208 """
209 Initialize ARCADE simulations subflow for sampling images.
211 Iterate through conditions to sample images for each specified channel. The
212 subflow `sample_image` is run via Docker for each condition and channel
213 combination by passing in the subflow configuration as a dotlist.
214 """
216 docker_args = get_docker_arguments(context)
218 if context.working_location.startswith("s3://"):
219 context_config = ContextConfigSampleImage(working_location=context.working_location)
220 else:
221 context_config = ContextConfigSampleImage(working_location="/mnt")
223 series_config = SeriesConfigSampleImage(name=series.name)
225 for fov in series.conditions:
226 for _, sample_image in parameters.sample_images.items():
227 parameters_config = copy.deepcopy(sample_image)
228 parameters_config.key = parameters_config.key % fov["key"]
229 parameters_config.resolution = parameters.resolution
231 config = {
232 "context": context_config,
233 "series": series_config,
234 "parameters": parameters_config,
235 }
237 sample_image_command = SAMPLE_IMAGE_COMMAND + make_dotlist_from_config(config)
238 run_docker_command(parameters.image, sample_image_command, **docker_args)
240 if "volume" in docker_args:
241 remove_docker_volume(docker_args["volume"])
244@flow(name="initialize-arcade-simulations_process-samples")
245def run_flow_process_samples(
246 context: ContextConfig, series: SeriesConfig, parameters: ParametersConfig
247) -> None:
248 """
249 Initialize ARCADE simulations subflow for processing samples.
251 Iterate through conditions to process samples for each specified channel.
252 The subflow `process_sample` is run via Docker for each condition and
253 channel combination by passing in the subflow configuration as a dotlist.
254 """
255 docker_args = get_docker_arguments(context)
257 if context.working_location.startswith("s3://"):
258 context_config = ContextConfigProcessSample(working_location=context.working_location)
259 else:
260 context_config = ContextConfigProcessSample(working_location="/mnt")
262 series_config = SeriesConfigProcessSample(name=series.name)
263 resolution_key = f"R{round(parameters.resolution * 10):03d}"
265 for fov in series.conditions:
266 fov_key = fov["key"]
268 for _, process_sample in parameters.process_samples.items():
269 parameters_config = copy.deepcopy(process_sample)
270 parameters_config.key = f"{parameters_config.key % fov_key}_{resolution_key}"
272 if "include_ids" in fov:
273 parameters_config.include_ids = fov["include_ids"]
275 if "exclude_ids" in fov:
276 parameters_config.exclude_ids = fov["exclude_ids"]
278 config = {
279 "context": context_config,
280 "series": series_config,
281 "parameters": parameters_config,
282 }
284 process_sample_command = PROCESS_SAMPLE_COMMAND + make_dotlist_from_config(config)
285 run_docker_command(parameters.image, process_sample_command, **docker_args)
287 if "volume" in docker_args:
288 remove_docker_volume(docker_args["volume"])
291@flow(name="initialize-arcade-simulations_convert-to-arcade")
292def run_flow_convert_to_arcade(
293 context: ContextConfig, series: SeriesConfig, parameters: ParametersConfig
294) -> None:
295 """
296 Initialize ARCADE simulations subflow for converting to ARCADE.
298 Converted processed samples into the ARCADE .CELLS and .LOCATIONS formats,
299 along with a basic simulation setup XML file.
300 """
302 samples_key = make_key(series.name, "samples", "samples.PROCESSED")
303 inits_key = make_key(series.name, "inits", "inits.ARCADE")
305 resolution = parameters.resolution
306 resolution_key = f"R{round(resolution * 10):03d}"
308 if check_key(context.reference_location, series.reference_key):
309 reference = load_dataframe(context.reference_location, series.reference_key)
311 volume_columns = [column for column in reference.columns if "volume" in column]
312 reference[volume_columns] = reference[volume_columns] / resolution**3
314 height_columns = [column for column in reference.columns if "height" in column]
315 reference[height_columns] = reference[height_columns] / resolution
316 else:
317 reference = None
319 volumes = {
320 region: (values[0] / resolution**3, values[1] / resolution**3)
321 for region, values in parameters.convert_to_arcade.volumes.items()
322 }
323 heights = {
324 region: (values[0] / resolution, values[1] / resolution)
325 for region, values in parameters.convert_to_arcade.heights.items()
326 }
328 critical_volumes: dict[str, tuple[float, float]] = {
329 region: (values[0] / resolution**3, values[1] / resolution**3)
330 for region, values in parameters.convert_to_arcade.critical_volumes.items()
331 }
332 critical_heights: dict[str, tuple[float, float]] = {
333 region: (values[0] / resolution, values[1] / resolution)
334 for region, values in parameters.convert_to_arcade.critical_heights.items()
335 }
337 for fov in series.conditions:
338 samples = {}
340 for region, region_key_template in parameters.convert_to_arcade.regions.items():
341 region_key = region_key_template % fov["key"]
342 key = make_key(
343 samples_key, f"{series.name}_{region_key}_{resolution_key}.PROCESSED.csv"
344 )
345 samples[region] = load_dataframe(context.working_location, key)
347 margins = fov["margins"] if "margins" in fov else parameters.convert_to_arcade.margins
348 merged_samples = merge_region_samples(samples, margins)
349 x, y, z = margins
350 key = f"{series.name}_{fov['key']}_X{x:03d}_Y{y:03d}_Z{z:03d}_{resolution_key}"
352 cells = convert_to_cells_file(
353 merged_samples,
354 reference[reference["KEY"] == fov["key"]],
355 volumes,
356 heights,
357 critical_volumes,
358 critical_heights,
359 parameters.convert_to_arcade.state_thresholds,
360 )
361 cells_key = make_key(inits_key, f"{key}.CELLS.json")
362 save_json(context.working_location, cells_key, cells)
364 locations = convert_to_locations_file(merged_samples)
365 locations_key = make_key(inits_key, f"{key}.LOCATIONS.json")
366 save_json(context.working_location, locations_key, locations)
368 setup = generate_setup_file(
369 merged_samples, margins, parameters.convert_to_arcade.potts_terms
370 )
371 setup_key = make_key(inits_key, f"{key}.xml")
372 save_text(context.working_location, setup_key, setup)
375def get_docker_arguments(context: ContextConfig) -> dict:
376 """Compile Docker arguments for the given context."""
378 if context.working_location.startswith("s3://"):
379 environment = []
381 if context.access_key_id is not None:
382 environment.append(f"AWS_ACCESS_KEY_ID={context.access_key_id}")
384 if context.secret_access_key is not None:
385 environment.append(f"AWS_SECRET_ACCESS_KEY={context.secret_access_key}")
387 docker_args = {"environment": environment}
388 else:
389 volume = create_docker_volume(context.working_location)
390 docker_args = {"volume": volume}
392 return docker_args