Coverage for src/cell_abm_pipeline/flows/initialize_arcade_simulations.py: 0%

163 statements  

« prev     ^ index     » next       coverage.py v7.1.0, created at 2024-06-05 19:14 +0000

1""" 

2Workflow for initializing ARCADE simulations. 

3 

4Working location structure: 

5 

6.. code-block:: bash 

7 

8 (name) 

9 ├── images 

10 │ └── (name)_(key).(extension) 

11 ├── inits 

12 │ └── inits.ARCADE 

13 │ ├── (name)_(key)_(margin)_(resolution).CELLS.json 

14 │ ├── (name)_(key)_(margin)_(resolution).LOCATIONS.json 

15 │ └── (name)_(key)_(margin)_(resolution).xml 

16 ├── plots 

17 │ └── plots.SAMPLE 

18 │ └── (name)_(key).SAMPLE.png 

19 └── samples 

20 ├── samples.PROCESSED 

21 │ └── (name)_(key).PROCESSED.csv 

22 └── samples.RAW 

23 └── (name)_(key).RAW.csv 

24 

25Images are loaded from **images**, which are then sampled and processed into 

26**samples**. ARCADE initialization files are then generated and placed into 

27**inits.ARCADE**. 

28""" 

29 

30import copy 

31from dataclasses import dataclass, field 

32from typing import Optional 

33 

34from arcade_collection.input import ( 

35 convert_to_cells_file, 

36 convert_to_locations_file, 

37 generate_setup_file, 

38 merge_region_samples, 

39) 

40from container_collection.docker import ( 

41 create_docker_volume, 

42 remove_docker_volume, 

43 run_docker_command, 

44) 

45from io_collection.keys import check_key, make_key 

46from io_collection.load import load_dataframe 

47from io_collection.save import save_json, save_text 

48from prefect import flow 

49 

50from cell_abm_pipeline.__config__ import make_dotlist_from_config 

51from cell_abm_pipeline.flows.process_sample import ContextConfig as ContextConfigProcessSample 

52from cell_abm_pipeline.flows.process_sample import ParametersConfig as ParametersConfigProcessSample 

53from cell_abm_pipeline.flows.process_sample import SeriesConfig as SeriesConfigProcessSample 

54from cell_abm_pipeline.flows.sample_image import ContextConfig as ContextConfigSampleImage 

55from cell_abm_pipeline.flows.sample_image import ParametersConfig as ParametersConfigSampleImage 

56from cell_abm_pipeline.flows.sample_image import SeriesConfig as SeriesConfigSampleImage 

57 

58# Command for running sample image flow. 

59SAMPLE_IMAGE_COMMAND = ["abmpipe", "sample-image", "::"] 

60 

61# Command for running process sample flow. 

62PROCESS_SAMPLE_COMMAND = ["abmpipe", "process-sample", "::"] 

63 

64# Default volume means and standard deviations in um^3. 

65VOLUMES: dict[str, tuple[float, float]] = { 

66 "DEFAULT": (1865.0, 517.0), 

67 "NUCLEUS": (543.0, 157.0), 

68} 

69 

70# Default height means and standard deviations in um. 

71HEIGHTS: dict[str, tuple[float, float]] = { 

72 "DEFAULT": (9.75, 2.4), 

73 "NUCLEUS": (6.86, 1.7), 

74} 

75 

76# Default critical volume means and standard deviations in um^3. 

77CRITICAL_VOLUMES: dict[str, tuple[float, float]] = { 

78 "DEFAULT": (1300.0, 200.0), 

79 "NUCLEUS": (400.0, 50.0), 

80} 

81 

82# Default critical height means and standard deviations in um. 

83CRITICAL_HEIGHTS: dict[str, tuple[float, float]] = { 

84 "DEFAULT": (9.0, 2.0), 

85 "NUCLEUS": (6.5, 1.5), 

86} 

87 

88# Default cell state phase thresholds. 

89STATE_THRESHOLDS: dict[str, float] = { 

90 "APOPTOTIC_LATE": 0.25, 

91 "APOPTOTIC_EARLY": 0.90, 

92 "PROLIFERATIVE_G1": 1.124, 

93 "PROLIFERATIVE_S": 1.726, 

94 "PROLIFERATIVE_G2": 1.969, 

95 "PROLIFERATIVE_M": 2, 

96} 

97 

98# Default list of Cellular Potts Model Hamiltonian terms. 

99POTTS_TERMS: list[str] = [ 

100 "volume", 

101 "adhesion", 

102] 

103 

104 

105@dataclass 

106class ParametersConfigConvertToArcade: 

107 """Parameter configuration for initialize ARCADE simulations subflow - convert to ARCADE.""" 

108 

109 regions: dict[str, str] = field(default_factory=lambda: {"DEFAULT": "%s"}) 

110 """Subcellular region samples used to initialize voxels.""" 

111 

112 margins: tuple[int, int, int] = (0, 0, 0) 

113 """Margins around initial voxel positions.""" 

114 

115 volumes: dict = field(default_factory=lambda: VOLUMES) 

116 """Volume means and standard deviations in um^3.""" 

117 

118 heights: dict = field(default_factory=lambda: HEIGHTS) 

119 """Height means and standard deviations in um.""" 

120 

121 critical_volumes: dict = field(default_factory=lambda: CRITICAL_VOLUMES) 

122 """Critical volume means and standard deviations in um^3.""" 

123 

124 critical_heights: dict = field(default_factory=lambda: CRITICAL_HEIGHTS) 

125 """Critical height means and standard deviations in um.""" 

126 

127 state_thresholds: dict[str, float] = field(default_factory=lambda: STATE_THRESHOLDS) 

128 """Cell state phase thresholds.""" 

129 

130 potts_terms: list[str] = field(default_factory=lambda: POTTS_TERMS) 

131 """List of Cellular Potts Model Hamiltonian terms.""" 

132 

133 

134@dataclass 

135class ParametersConfig: 

136 """Parameter configuration for initialize ARCADE simulations flow.""" 

137 

138 image: str 

139 """Name of pipeline image.""" 

140 

141 resolution: float 

142 """Distance between samples in um.""" 

143 

144 sample_images: dict[str, ParametersConfigSampleImage] 

145 """Configs for sample images flow, keyed by region.""" 

146 

147 process_samples: dict[str, ParametersConfigProcessSample] 

148 """Configs for process samples flow, keyed by region.""" 

149 

150 convert_to_arcade: ParametersConfigConvertToArcade = ParametersConfigConvertToArcade() 

151 """Convert to ARCADE configuration instance.""" 

152 

153 

154@dataclass 

155class ContextConfig: 

156 """Context configuration for initialize ARCADE simulations flow.""" 

157 

158 working_location: str 

159 """Location for input and output files (local path or S3 bucket).""" 

160 

161 reference_location: str 

162 """Location of reference file (local path or S3 bucket).""" 

163 

164 access_key_id: Optional[str] = None 

165 """AWS access key id for accessing S3 in Docker image.""" 

166 

167 secret_access_key: Optional[str] = None 

168 """AWS secret access key for accessing S3 in Docker image.""" 

169 

170 

171@dataclass 

172class SeriesConfig: 

173 """Series configuration for initialize ARCADE simulations flow.""" 

174 

175 name: str 

176 """Name of the simulation series.""" 

177 

178 reference_key: str 

179 """Key for reference file.""" 

180 

181 conditions: list 

182 """List of series condition dictionaries (must include unique condition "key").""" 

183 

184 

185@flow(name="initialize-arcade-simulations") 

186def run_flow(context: ContextConfig, series: SeriesConfig, parameters: ParametersConfig) -> None: 

187 """ 

188 Main initialize ARCADE simulations flow. 

189 

190 Calls the following subflows, in order: 

191 

192 1. :py:func:`run_flow_sample_images` 

193 2. :py:func:`run_flow_process_samples` 

194 3. :py:func:`run_flow_convert_to_arcade` 

195 """ 

196 

197 run_flow_sample_images(context, series, parameters) 

198 

199 run_flow_process_samples(context, series, parameters) 

200 

201 run_flow_convert_to_arcade(context, series, parameters) 

202 

203 

204@flow(name="initialize-arcade-simulations_sample-images") 

205def run_flow_sample_images( 

206 context: ContextConfig, series: SeriesConfig, parameters: ParametersConfig 

207) -> None: 

208 """ 

209 Initialize ARCADE simulations subflow for sampling images. 

210 

211 Iterate through conditions to sample images for each specified channel. The 

212 subflow `sample_image` is run via Docker for each condition and channel 

213 combination by passing in the subflow configuration as a dotlist. 

214 """ 

215 

216 docker_args = get_docker_arguments(context) 

217 

218 if context.working_location.startswith("s3://"): 

219 context_config = ContextConfigSampleImage(working_location=context.working_location) 

220 else: 

221 context_config = ContextConfigSampleImage(working_location="/mnt") 

222 

223 series_config = SeriesConfigSampleImage(name=series.name) 

224 

225 for fov in series.conditions: 

226 for _, sample_image in parameters.sample_images.items(): 

227 parameters_config = copy.deepcopy(sample_image) 

228 parameters_config.key = parameters_config.key % fov["key"] 

229 parameters_config.resolution = parameters.resolution 

230 

231 config = { 

232 "context": context_config, 

233 "series": series_config, 

234 "parameters": parameters_config, 

235 } 

236 

237 sample_image_command = SAMPLE_IMAGE_COMMAND + make_dotlist_from_config(config) 

238 run_docker_command(parameters.image, sample_image_command, **docker_args) 

239 

240 if "volume" in docker_args: 

241 remove_docker_volume(docker_args["volume"]) 

242 

243 

244@flow(name="initialize-arcade-simulations_process-samples") 

245def run_flow_process_samples( 

246 context: ContextConfig, series: SeriesConfig, parameters: ParametersConfig 

247) -> None: 

248 """ 

249 Initialize ARCADE simulations subflow for processing samples. 

250 

251 Iterate through conditions to process samples for each specified channel. 

252 The subflow `process_sample` is run via Docker for each condition and 

253 channel combination by passing in the subflow configuration as a dotlist. 

254 """ 

255 docker_args = get_docker_arguments(context) 

256 

257 if context.working_location.startswith("s3://"): 

258 context_config = ContextConfigProcessSample(working_location=context.working_location) 

259 else: 

260 context_config = ContextConfigProcessSample(working_location="/mnt") 

261 

262 series_config = SeriesConfigProcessSample(name=series.name) 

263 resolution_key = f"R{round(parameters.resolution * 10):03d}" 

264 

265 for fov in series.conditions: 

266 fov_key = fov["key"] 

267 

268 for _, process_sample in parameters.process_samples.items(): 

269 parameters_config = copy.deepcopy(process_sample) 

270 parameters_config.key = f"{parameters_config.key % fov_key}_{resolution_key}" 

271 

272 if "include_ids" in fov: 

273 parameters_config.include_ids = fov["include_ids"] 

274 

275 if "exclude_ids" in fov: 

276 parameters_config.exclude_ids = fov["exclude_ids"] 

277 

278 config = { 

279 "context": context_config, 

280 "series": series_config, 

281 "parameters": parameters_config, 

282 } 

283 

284 process_sample_command = PROCESS_SAMPLE_COMMAND + make_dotlist_from_config(config) 

285 run_docker_command(parameters.image, process_sample_command, **docker_args) 

286 

287 if "volume" in docker_args: 

288 remove_docker_volume(docker_args["volume"]) 

289 

290 

291@flow(name="initialize-arcade-simulations_convert-to-arcade") 

292def run_flow_convert_to_arcade( 

293 context: ContextConfig, series: SeriesConfig, parameters: ParametersConfig 

294) -> None: 

295 """ 

296 Initialize ARCADE simulations subflow for converting to ARCADE. 

297 

298 Converted processed samples into the ARCADE .CELLS and .LOCATIONS formats, 

299 along with a basic simulation setup XML file. 

300 """ 

301 

302 samples_key = make_key(series.name, "samples", "samples.PROCESSED") 

303 inits_key = make_key(series.name, "inits", "inits.ARCADE") 

304 

305 resolution = parameters.resolution 

306 resolution_key = f"R{round(resolution * 10):03d}" 

307 

308 if check_key(context.reference_location, series.reference_key): 

309 reference = load_dataframe(context.reference_location, series.reference_key) 

310 

311 volume_columns = [column for column in reference.columns if "volume" in column] 

312 reference[volume_columns] = reference[volume_columns] / resolution**3 

313 

314 height_columns = [column for column in reference.columns if "height" in column] 

315 reference[height_columns] = reference[height_columns] / resolution 

316 else: 

317 reference = None 

318 

319 volumes = { 

320 region: (values[0] / resolution**3, values[1] / resolution**3) 

321 for region, values in parameters.convert_to_arcade.volumes.items() 

322 } 

323 heights = { 

324 region: (values[0] / resolution, values[1] / resolution) 

325 for region, values in parameters.convert_to_arcade.heights.items() 

326 } 

327 

328 critical_volumes: dict[str, tuple[float, float]] = { 

329 region: (values[0] / resolution**3, values[1] / resolution**3) 

330 for region, values in parameters.convert_to_arcade.critical_volumes.items() 

331 } 

332 critical_heights: dict[str, tuple[float, float]] = { 

333 region: (values[0] / resolution, values[1] / resolution) 

334 for region, values in parameters.convert_to_arcade.critical_heights.items() 

335 } 

336 

337 for fov in series.conditions: 

338 samples = {} 

339 

340 for region, region_key_template in parameters.convert_to_arcade.regions.items(): 

341 region_key = region_key_template % fov["key"] 

342 key = make_key( 

343 samples_key, f"{series.name}_{region_key}_{resolution_key}.PROCESSED.csv" 

344 ) 

345 samples[region] = load_dataframe(context.working_location, key) 

346 

347 margins = fov["margins"] if "margins" in fov else parameters.convert_to_arcade.margins 

348 merged_samples = merge_region_samples(samples, margins) 

349 x, y, z = margins 

350 key = f"{series.name}_{fov['key']}_X{x:03d}_Y{y:03d}_Z{z:03d}_{resolution_key}" 

351 

352 cells = convert_to_cells_file( 

353 merged_samples, 

354 reference[reference["KEY"] == fov["key"]], 

355 volumes, 

356 heights, 

357 critical_volumes, 

358 critical_heights, 

359 parameters.convert_to_arcade.state_thresholds, 

360 ) 

361 cells_key = make_key(inits_key, f"{key}.CELLS.json") 

362 save_json(context.working_location, cells_key, cells) 

363 

364 locations = convert_to_locations_file(merged_samples) 

365 locations_key = make_key(inits_key, f"{key}.LOCATIONS.json") 

366 save_json(context.working_location, locations_key, locations) 

367 

368 setup = generate_setup_file( 

369 merged_samples, margins, parameters.convert_to_arcade.potts_terms 

370 ) 

371 setup_key = make_key(inits_key, f"{key}.xml") 

372 save_text(context.working_location, setup_key, setup) 

373 

374 

375def get_docker_arguments(context: ContextConfig) -> dict: 

376 """Compile Docker arguments for the given context.""" 

377 

378 if context.working_location.startswith("s3://"): 

379 environment = [] 

380 

381 if context.access_key_id is not None: 

382 environment.append(f"AWS_ACCESS_KEY_ID={context.access_key_id}") 

383 

384 if context.secret_access_key is not None: 

385 environment.append(f"AWS_SECRET_ACCESS_KEY={context.secret_access_key}") 

386 

387 docker_args = {"environment": environment} 

388 else: 

389 volume = create_docker_volume(context.working_location) 

390 docker_args = {"volume": volume} 

391 

392 return docker_args