Coverage for src/cell_abm_pipeline/flows/initialize_arcade

1"""

2Workflow for initializing ARCADE simulations.

4Working location structure:

6.. code-block:: bash

8 (name)

9 ├── images

10 │ └── (name)_(key).(extension)

11 ├── inits

12 │ └── inits.ARCADE

13 │ ├── (name)_(key)_(margin)_(resolution).CELLS.json

14 │ ├── (name)_(key)_(margin)_(resolution).LOCATIONS.json

15 │ └── (name)_(key)_(margin)_(resolution).xml

16 ├── plots

17 │ └── plots.SAMPLE

18 │ └── (name)_(key).SAMPLE.png

19 └── samples

20 ├── samples.PROCESSED

21 │ └── (name)_(key).PROCESSED.csv

22 └── samples.RAW

23 └── (name)_(key).RAW.csv

25Images are loaded from **images**, which are then sampled and processed into

26**samples**. ARCADE initialization files are then generated and placed into

27**inits.ARCADE**.

28"""

30import copy

31from dataclasses import dataclass, field

32from typing import Optional

34from arcade_collection.input import (

35 convert_to_cells_file,

36 convert_to_locations_file,

37 generate_setup_file,

38 merge_region_samples,

39)

40from container_collection.docker import (

41 create_docker_volume,

42 remove_docker_volume,

43 run_docker_command,

44)

45from io_collection.keys import check_key, make_key

46from io_collection.load import load_dataframe

47from io_collection.save import save_json, save_text

48from prefect import flow

50from cell_abm_pipeline.__config__ import make_dotlist_from_config

51from cell_abm_pipeline.flows.process_sample import ContextConfig as ContextConfigProcessSample

52from cell_abm_pipeline.flows.process_sample import ParametersConfig as ParametersConfigProcessSample

53from cell_abm_pipeline.flows.process_sample import SeriesConfig as SeriesConfigProcessSample

54from cell_abm_pipeline.flows.sample_image import ContextConfig as ContextConfigSampleImage

55from cell_abm_pipeline.flows.sample_image import ParametersConfig as ParametersConfigSampleImage

56from cell_abm_pipeline.flows.sample_image import SeriesConfig as SeriesConfigSampleImage

58# Command for running sample image flow.

59SAMPLE_IMAGE_COMMAND = ["abmpipe", "sample-image", "::"]

61# Command for running process sample flow.

62PROCESS_SAMPLE_COMMAND = ["abmpipe", "process-sample", "::"]

64# Default volume means and standard deviations in um^3.

65VOLUMES: dict[str, tuple[float, float]] = {

66 "DEFAULT": (1865.0, 517.0),

67 "NUCLEUS": (543.0, 157.0),

68}

70# Default height means and standard deviations in um.

71HEIGHTS: dict[str, tuple[float, float]] = {

72 "DEFAULT": (9.75, 2.4),

73 "NUCLEUS": (6.86, 1.7),

74}

76# Default critical volume means and standard deviations in um^3.

77CRITICAL_VOLUMES: dict[str, tuple[float, float]] = {

78 "DEFAULT": (1300.0, 200.0),

79 "NUCLEUS": (400.0, 50.0),

80}

82# Default critical height means and standard deviations in um.

83CRITICAL_HEIGHTS: dict[str, tuple[float, float]] = {

84 "DEFAULT": (9.0, 2.0),

85 "NUCLEUS": (6.5, 1.5),

86}

88# Default cell state phase thresholds.

89STATE_THRESHOLDS: dict[str, float] = {

90 "APOPTOTIC_LATE": 0.25,

91 "APOPTOTIC_EARLY": 0.90,

92 "PROLIFERATIVE_G1": 1.124,

93 "PROLIFERATIVE_S": 1.726,

94 "PROLIFERATIVE_G2": 1.969,

95 "PROLIFERATIVE_M": 2,

96}

98# Default list of Cellular Potts Model Hamiltonian terms.

99POTTS_TERMS: list[str] = [

100 "volume",

101 "adhesion",

102]

103

104

105@dataclass

106class ParametersConfigConvertToArcade:

107 """Parameter configuration for initialize ARCADE simulations subflow - convert to ARCADE."""

108

109 regions: dict[str, str] = field(default_factory=lambda: {"DEFAULT": "%s"})

110 """Subcellular region samples used to initialize voxels."""

111

112 margins: tuple[int, int, int] = (0, 0, 0)

113 """Margins around initial voxel positions."""

114

115 volumes: dict = field(default_factory=lambda: VOLUMES)

116 """Volume means and standard deviations in um^3."""

117

118 heights: dict = field(default_factory=lambda: HEIGHTS)

119 """Height means and standard deviations in um."""

120

121 critical_volumes: dict = field(default_factory=lambda: CRITICAL_VOLUMES)

122 """Critical volume means and standard deviations in um^3."""

123

124 critical_heights: dict = field(default_factory=lambda: CRITICAL_HEIGHTS)

125 """Critical height means and standard deviations in um."""

126

127 state_thresholds: dict[str, float] = field(default_factory=lambda: STATE_THRESHOLDS)

128 """Cell state phase thresholds."""

129

130 potts_terms: list[str] = field(default_factory=lambda: POTTS_TERMS)

131 """List of Cellular Potts Model Hamiltonian terms."""

132

133

134@dataclass

135class ParametersConfig:

136 """Parameter configuration for initialize ARCADE simulations flow."""

137

138 image: str

139 """Name of pipeline image."""

140

141 resolution: float

142 """Distance between samples in um."""

143

144 sample_images: dict[str, ParametersConfigSampleImage]

145 """Configs for sample images flow, keyed by region."""

146

147 process_samples: dict[str, ParametersConfigProcessSample]

148 """Configs for process samples flow, keyed by region."""

149

150 convert_to_arcade: ParametersConfigConvertToArcade = ParametersConfigConvertToArcade()

151 """Convert to ARCADE configuration instance."""

152

153

154@dataclass

155class ContextConfig:

156 """Context configuration for initialize ARCADE simulations flow."""

157

158 working_location: str

159 """Location for input and output files (local path or S3 bucket)."""

160

161 reference_location: str

162 """Location of reference file (local path or S3 bucket)."""

163

164 access_key_id: Optional[str] = None

165 """AWS access key id for accessing S3 in Docker image."""

166

167 secret_access_key: Optional[str] = None

168 """AWS secret access key for accessing S3 in Docker image."""

169

170

171@dataclass

172class SeriesConfig:

173 """Series configuration for initialize ARCADE simulations flow."""

174

175 name: str

176 """Name of the simulation series."""

177

178 reference_key: str

179 """Key for reference file."""

180

181 conditions: list

182 """List of series condition dictionaries (must include unique condition "key")."""

183

184

185@flow(name="initialize-arcade-simulations")

186def run_flow(context: ContextConfig, series: SeriesConfig, parameters: ParametersConfig) -> None:

187 """

188 Main initialize ARCADE simulations flow.

189

190 Calls the following subflows, in order:

191

192 1. :py:func:`run_flow_sample_images`

193 2. :py:func:`run_flow_process_samples`

194 3. :py:func:`run_flow_convert_to_arcade`

195 """

196

197 run_flow_sample_images(context, series, parameters)

198

199 run_flow_process_samples(context, series, parameters)

200

201 run_flow_convert_to_arcade(context, series, parameters)

202

203

204@flow(name="initialize-arcade-simulations_sample-images")

205def run_flow_sample_images(

206 context: ContextConfig, series: SeriesConfig, parameters: ParametersConfig

207) -> None:

208 """

209 Initialize ARCADE simulations subflow for sampling images.

210

211 Iterate through conditions to sample images for each specified channel. The

212 subflow `sample_image` is run via Docker for each condition and channel

213 combination by passing in the subflow configuration as a dotlist.

214 """

215

216 docker_args = get_docker_arguments(context)

217

218 if context.working_location.startswith("s3://"):

219 context_config = ContextConfigSampleImage(working_location=context.working_location)

220 else:

221 context_config = ContextConfigSampleImage(working_location="/mnt")

222

223 series_config = SeriesConfigSampleImage(name=series.name)

224

225 for fov in series.conditions:

226 for _, sample_image in parameters.sample_images.items():

227 parameters_config = copy.deepcopy(sample_image)

228 parameters_config.key = parameters_config.key % fov["key"]

229 parameters_config.resolution = parameters.resolution

230

231 config = {

232 "context": context_config,

233 "series": series_config,

234 "parameters": parameters_config,

235 }

236

237 sample_image_command = SAMPLE_IMAGE_COMMAND + make_dotlist_from_config(config)

238 run_docker_command(parameters.image, sample_image_command, **docker_args)

239

240 if "volume" in docker_args:

241 remove_docker_volume(docker_args["volume"])

242

243

244@flow(name="initialize-arcade-simulations_process-samples")

245def run_flow_process_samples(

246 context: ContextConfig, series: SeriesConfig, parameters: ParametersConfig

247) -> None:

248 """

249 Initialize ARCADE simulations subflow for processing samples.

250

251 Iterate through conditions to process samples for each specified channel.

252 The subflow `process_sample` is run via Docker for each condition and

253 channel combination by passing in the subflow configuration as a dotlist.

254 """

255 docker_args = get_docker_arguments(context)

256

257 if context.working_location.startswith("s3://"):

258 context_config = ContextConfigProcessSample(working_location=context.working_location)

259 else:

260 context_config = ContextConfigProcessSample(working_location="/mnt")

261

262 series_config = SeriesConfigProcessSample(name=series.name)

263 resolution_key = f"R{round(parameters.resolution * 10):03d}"

264

265 for fov in series.conditions:

266 fov_key = fov["key"]

267

268 for _, process_sample in parameters.process_samples.items():

269 parameters_config = copy.deepcopy(process_sample)

270 parameters_config.key = f"{parameters_config.key % fov_key}_{resolution_key}"

271

272 if "include_ids" in fov:

273 parameters_config.include_ids = fov["include_ids"]

274

275 if "exclude_ids" in fov:

276 parameters_config.exclude_ids = fov["exclude_ids"]

277

278 config = {

279 "context": context_config,

280 "series": series_config,

281 "parameters": parameters_config,

282 }

283

284 process_sample_command = PROCESS_SAMPLE_COMMAND + make_dotlist_from_config(config)

285 run_docker_command(parameters.image, process_sample_command, **docker_args)

286

287 if "volume" in docker_args:

288 remove_docker_volume(docker_args["volume"])

289

290

291@flow(name="initialize-arcade-simulations_convert-to-arcade")

292def run_flow_convert_to_arcade(

293 context: ContextConfig, series: SeriesConfig, parameters: ParametersConfig

294) -> None:

295 """

296 Initialize ARCADE simulations subflow for converting to ARCADE.

297

298 Converted processed samples into the ARCADE .CELLS and .LOCATIONS formats,

299 along with a basic simulation setup XML file.

300 """

301

302 samples_key = make_key(series.name, "samples", "samples.PROCESSED")

303 inits_key = make_key(series.name, "inits", "inits.ARCADE")

304

305 resolution = parameters.resolution

306 resolution_key = f"R{round(resolution * 10):03d}"

307

308 if check_key(context.reference_location, series.reference_key):

309 reference = load_dataframe(context.reference_location, series.reference_key)

310

311 volume_columns = [column for column in reference.columns if "volume" in column]

312 reference[volume_columns] = reference[volume_columns] / resolution**3

313

314 height_columns = [column for column in reference.columns if "height" in column]

315 reference[height_columns] = reference[height_columns] / resolution

316 else:

317 reference = None

318

319 volumes = {

320 region: (values[0] / resolution**3, values[1] / resolution**3)

321 for region, values in parameters.convert_to_arcade.volumes.items()

322 }

323 heights = {

324 region: (values[0] / resolution, values[1] / resolution)

325 for region, values in parameters.convert_to_arcade.heights.items()

326 }

327

328 critical_volumes: dict[str, tuple[float, float]] = {

329 region: (values[0] / resolution**3, values[1] / resolution**3)

330 for region, values in parameters.convert_to_arcade.critical_volumes.items()

331 }

332 critical_heights: dict[str, tuple[float, float]] = {

333 region: (values[0] / resolution, values[1] / resolution)

334 for region, values in parameters.convert_to_arcade.critical_heights.items()

335 }

336

337 for fov in series.conditions:

338 samples = {}

339

340 for region, region_key_template in parameters.convert_to_arcade.regions.items():

341 region_key = region_key_template % fov["key"]

342 key = make_key(

343 samples_key, f"{series.name}_{region_key}_{resolution_key}.PROCESSED.csv"

344 )

345 samples[region] = load_dataframe(context.working_location, key)

346

347 margins = fov["margins"] if "margins" in fov else parameters.convert_to_arcade.margins

348 merged_samples = merge_region_samples(samples, margins)

349 x, y, z = margins

350 key = f"{series.name}_{fov['key']}_X{x:03d}_Y{y:03d}_Z{z:03d}_{resolution_key}"

351

352 cells = convert_to_cells_file(

353 merged_samples,

354 reference[reference["KEY"] == fov["key"]],

355 volumes,

356 heights,

357 critical_volumes,

358 critical_heights,

359 parameters.convert_to_arcade.state_thresholds,

360 )

361 cells_key = make_key(inits_key, f"{key}.CELLS.json")

362 save_json(context.working_location, cells_key, cells)

363

364 locations = convert_to_locations_file(merged_samples)

365 locations_key = make_key(inits_key, f"{key}.LOCATIONS.json")

366 save_json(context.working_location, locations_key, locations)

367

368 setup = generate_setup_file(

369 merged_samples, margins, parameters.convert_to_arcade.potts_terms

370 )

371 setup_key = make_key(inits_key, f"{key}.xml")

372 save_text(context.working_location, setup_key, setup)

373

374

375def get_docker_arguments(context: ContextConfig) -> dict:

376 """Compile Docker arguments for the given context."""

377

378 if context.working_location.startswith("s3://"):

379 environment = []

380

381 if context.access_key_id is not None:

382 environment.append(f"AWS_ACCESS_KEY_ID={context.access_key_id}")

383

384 if context.secret_access_key is not None:

385 environment.append(f"AWS_SECRET_ACCESS_KEY={context.secret_access_key}")

386

387 docker_args = {"environment": environment}

388 else:

389 volume = create_docker_volume(context.working_location)

390 docker_args = {"volume": volume}

391

392 return docker_args

Coverage for src/cell_abm_pipeline/flows/initialize_arcade_simulations.py: 0%

163 statements