Coverage for src/cell_abm_pipeline/flows/convert_arcade_format.py: 0%

264 statements  

« prev     ^ index     » next       coverage.py v7.1.0, created at 2024-06-05 19:14 +0000

1""" 

2Workflow for converting ARCADE simulations to other formats. 

3 

4Working location structure: 

5 

6.. code-block:: bash 

7 

8 (name) 

9 ├── converted 

10 │ ├── converted.COLORIZER 

11 │ │ └── (name)_(key)_(seed) 

12 │ │ ├── feature_(index).json 

13 │ │ ├── frame_(index).png 

14 │ │ ├── manifest.json 

15 │ │ ├── outliers.json 

16 │ │ ├── times.json 

17 │ │ └── tracks.json 

18 │ ├── converted.IMAGE 

19 │ │ └── (name)_(key)_(seed)_(chunk)_(chunk).IMAGE.ome.tiff 

20 │ ├── converted.MESH 

21 │ │ └── (name)_(key)_(seed)_(tick)_(id)_(region).MESH.obj 

22 │ ├── converted.PROJECTION 

23 │ │ └── (name)_(key)_(seed)_(tick)_(regions).PROJECTION.png 

24 │ └── converted.SIMULARIUM 

25 │ └── (name)_(key)_(seed).simularium 

26 ├── data 

27 │ ├── data.CELLS 

28 │ │ └── (name)_(key)_(seed).CELLS.tar.xz 

29 │ └── data.LOCATIONS 

30 │ └── (name)_(key)_(seed).LOCATIONS.tar.xz 

31 └── results 

32 └── (name)_(key)_(seed).csv 

33 

34Different formats use inputs from **results**, **data.CELLS**, and 

35**data.LOCATIONS**. Formatted data are saved to **converted**. 

36""" 

37 

38from dataclasses import dataclass, field 

39 

40import numpy as np 

41from arcade_collection.convert import ( 

42 convert_to_colorizer, 

43 convert_to_images, 

44 convert_to_meshes, 

45 convert_to_projection, 

46 convert_to_simularium_objects, 

47 convert_to_simularium_shapes, 

48) 

49from io_collection.keys import make_key 

50from io_collection.load import load_dataframe, load_tar 

51from io_collection.save import save_figure, save_image, save_json, save_text 

52from prefect import flow 

53 

54from cell_abm_pipeline.flows.plot_basic_metrics import PHASE_COLORS 

55from cell_abm_pipeline.flows.plot_cell_shapes import REGION_COLORS 

56 

57FORMATS: list[str] = [ 

58 "colorizer", 

59 "images", 

60 "meshes", 

61 "projections", 

62 "simularium_shapes", 

63 "simularium_objects", 

64] 

65 

66COLORIZER_FEATURES: list[str] = [ 

67 "volume", 

68 "height", 

69] 

70 

71 

72@dataclass 

73class ParametersConfigColorizer: 

74 """Parameter configuration for convert ARCADE format flow - colorizer.""" 

75 

76 seeds: list[int] = field(default_factory=lambda: [0]) 

77 """Simulation seeds to use for converting to colorizer.""" 

78 

79 frame_spec: tuple[int, int, int] = (0, 1153, 1152) 

80 """Specification for simulation ticks to use for converting to colorizer.""" 

81 

82 regions: list[str] = field(default_factory=lambda: ["DEFAULT"]) 

83 """List of subcellular regions.""" 

84 

85 box: tuple[int, int, int] = field(default_factory=lambda: (1, 1, 1)) 

86 """Size of bounding box.""" 

87 

88 ds: float = 1.0 

89 """Spatial scaling in units/um.""" 

90 

91 dt: float = 1.0 

92 """Temporal scaling in hours/tick.""" 

93 

94 chunk_size: int = 500 

95 """Image chunk size.""" 

96 

97 features: list[str] = field(default_factory=lambda: COLORIZER_FEATURES) 

98 """List of colorizer features.""" 

99 

100 

101@dataclass 

102class ParametersConfigImages: 

103 """Parameter configuration for convert ARCADE format flow - images.""" 

104 

105 seeds: list[int] = field(default_factory=lambda: [0]) 

106 """Simulation seeds to use for converting to images.""" 

107 

108 frame_spec: tuple[int, int, int] = (0, 1153, 1152) 

109 """Specification for simulation ticks to use for converting to images.""" 

110 

111 regions: list[str] = field(default_factory=lambda: ["DEFAULT"]) 

112 """List of subcellular regions.""" 

113 

114 box: tuple[int, int, int] = field(default_factory=lambda: (1, 1, 1)) 

115 """Size of bounding box.""" 

116 

117 chunk_size: int = 500 

118 """Image chunk size.""" 

119 

120 binary: bool = False 

121 """True to generate binary images, False otherwise.""" 

122 

123 separate: bool = False 

124 """True to generate separate images for each tick, False otherwise.""" 

125 

126 

127@dataclass 

128class ParametersConfigMeshes: 

129 """Parameter configuration for convert ARCADE format flow - meshes.""" 

130 

131 seeds: list[int] = field(default_factory=lambda: [0]) 

132 """Simulation seeds to use for converting to meshes.""" 

133 

134 frame_spec: tuple[int, int, int] = (0, 1153, 1152) 

135 """Specification for simulation ticks to use for converting to meshes.""" 

136 

137 regions: list[str] = field(default_factory=lambda: ["DEFAULT"]) 

138 """List of subcellular regions.""" 

139 

140 box: tuple[int, int, int] = field(default_factory=lambda: (1, 1, 1)) 

141 """Size of bounding box.""" 

142 

143 invert: bool = False 

144 """True if mesh should have inverted faces, False otherwise.""" 

145 

146 

147@dataclass 

148class ParametersConfigProjections: 

149 """Parameter configuration for convert ARCADE format flow - projections.""" 

150 

151 seeds: list[int] = field(default_factory=lambda: [0]) 

152 """Simulation seeds to use for converting to projections.""" 

153 

154 frame_spec: tuple[int, int, int] = (0, 1153, 1152) 

155 """Specification for simulation ticks to use for converting to projections.""" 

156 

157 regions: list[str] = field(default_factory=lambda: ["DEFAULT"]) 

158 """List of subcellular regions.""" 

159 

160 box: tuple[int, int, int] = field(default_factory=lambda: (1, 1, 1)) 

161 """Size of bounding box.""" 

162 

163 ds: float = 1.0 

164 """Spatial scaling in units/um.""" 

165 

166 dt: float = 1.0 

167 """Temporal scaling in hours/tick.""" 

168 

169 scale: int = 100 

170 """Size of scale bar (in um).""" 

171 

172 region_colors: dict[str, str] = field(default_factory=lambda: REGION_COLORS) 

173 """Colors for each cell region.""" 

174 

175 

176@dataclass 

177class ParametersConfigSimulariumShapes: 

178 """Parameter configuration for convert ARCADE format flow - simularium shapes.""" 

179 

180 seeds: list[int] = field(default_factory=lambda: [0]) 

181 """Simulation seeds to use for converting to simularium.""" 

182 

183 frame_spec: tuple[int, int, int] = (0, 1153, 1152) 

184 """Specification for simulation ticks to use for converting to simularium.""" 

185 

186 box: tuple[int, int, int] = field(default_factory=lambda: (1, 1, 1)) 

187 """Size of bounding box.""" 

188 

189 ds: float = 1.0 

190 """Spatial scaling in units/um.""" 

191 

192 dt: float = 1.0 

193 """Temporal scaling in hours/tick.""" 

194 

195 phase_colors: dict[str, str] = field(default_factory=lambda: PHASE_COLORS) 

196 """Colors for each cell cycle phase.""" 

197 

198 resolution: int = 0 

199 """Number of voxels represented by a sphere (0 for single sphere per cell).""" 

200 

201 

202@dataclass 

203class ParametersConfigSimulariumObjects: 

204 """Parameter configuration for convert ARCADE format flow - simularium objects.""" 

205 

206 seeds: list[int] = field(default_factory=lambda: [0]) 

207 """Simulation seeds to use for converting to simularium.""" 

208 

209 frame_spec: tuple[int, int, int] = (0, 1153, 1152) 

210 """Specification for simulation ticks to use for converting to simularium.""" 

211 

212 box: tuple[int, int, int] = field(default_factory=lambda: (1, 1, 1)) 

213 """Size of bounding box.""" 

214 

215 ds: float = 1.0 

216 """Spatial scaling in units/um.""" 

217 

218 dt: float = 1.0 

219 """Temporal scaling in hours/tick.""" 

220 

221 phase_colors: dict[str, str] = field(default_factory=lambda: PHASE_COLORS) 

222 """Colors for each cell cycle phase.""" 

223 

224 url: str = "" 

225 """URL for object files.""" 

226 

227 group_size: int = 1 

228 """Mesh group size.""" 

229 

230 

231@dataclass 

232class ParametersConfig: 

233 """Parameter configuration for convert ARCADE format flow.""" 

234 

235 formats: list[str] = field(default_factory=lambda: FORMATS) 

236 """List of convert formats.""" 

237 

238 colorizer: ParametersConfigColorizer = ParametersConfigColorizer() 

239 """Parameters for colorizer subflow.""" 

240 

241 images: ParametersConfigImages = ParametersConfigImages() 

242 """Parameters for images subflow.""" 

243 

244 meshes: ParametersConfigMeshes = ParametersConfigMeshes() 

245 """Parameters for meshes subflow.""" 

246 

247 projections: ParametersConfigProjections = ParametersConfigProjections() 

248 """Parameters for projections subflow.""" 

249 

250 simularium_shapes: ParametersConfigSimulariumShapes = ParametersConfigSimulariumShapes() 

251 """Parameters for simularium shapes subflow.""" 

252 

253 simularium_objects: ParametersConfigSimulariumObjects = ParametersConfigSimulariumObjects() 

254 """Parameters for simularium objects subflow.""" 

255 

256 

257@dataclass 

258class ContextConfig: 

259 """Context configuration for convert ARCADE format flow.""" 

260 

261 working_location: str 

262 """Location for input and output files (local path or S3 bucket).""" 

263 

264 

265@dataclass 

266class SeriesConfig: 

267 """Series configuration for convert ARCADE format flow.""" 

268 

269 name: str 

270 """Name of the simulation series.""" 

271 

272 conditions: list[dict] 

273 """List of series condition dictionaries (must include unique condition "key").""" 

274 

275 

276@flow(name="convert-arcade-format") 

277def run_flow(context: ContextConfig, series: SeriesConfig, parameters: ParametersConfig) -> None: 

278 """ 

279 Main convert ARCADE format flow. 

280 

281 Calls the following subflows, if the format is specified: 

282 

283 - :py:func:`run_flow_convert_to_colorizer` 

284 - :py:func:`run_flow_convert_to_images` 

285 - :py:func:`run_flow_convert_to_meshes` 

286 - :py:func:`run_flow_convert_to_projections` 

287 - :py:func:`run_flow_convert_to_simularium_shapes` 

288 - :py:func:`run_flow_convert_to_simularium_objects` 

289 """ 

290 

291 if "colorizer" in parameters.formats: 

292 run_flow_convert_to_colorizer(context, series, parameters.colorizer) 

293 

294 if "images" in parameters.formats: 

295 run_flow_convert_to_images(context, series, parameters.images) 

296 

297 if "meshes" in parameters.formats: 

298 run_flow_convert_to_meshes(context, series, parameters.meshes) 

299 

300 if "projections" in parameters.formats: 

301 run_flow_convert_to_projections(context, series, parameters.projections) 

302 

303 if "simularium_shapes" in parameters.formats: 

304 run_flow_convert_to_simularium_shapes(context, series, parameters.simularium_shapes) 

305 

306 if "simularium_objects" in parameters.formats: 

307 run_flow_convert_to_simularium_objects(context, series, parameters.simularium_objects) 

308 

309 

310@flow(name="convert-arcade-format_convert-to-colorizer") 

311def run_flow_convert_to_colorizer( 

312 context: ContextConfig, series: SeriesConfig, parameters: ParametersConfigColorizer 

313) -> None: 

314 """Convert ARCADE format subflow for colorizer.""" 

315 

316 data_key = make_key(series.name, "data", "data.LOCATIONS") 

317 converted_key = make_key(series.name, "converted", "converted.COLORIZER") 

318 keys = [condition["key"] for condition in series.conditions] 

319 

320 for key in keys: 

321 for seed in parameters.seeds: 

322 series_key = f"{series.name}_{key}_{seed:04d}" 

323 

324 tar_key = make_key(data_key, f"{series_key}.LOCATIONS.tar.xz") 

325 tar = load_tar(context.working_location, tar_key) 

326 

327 chunks = convert_to_images( 

328 series_key, 

329 tar, 

330 parameters.frame_spec, 

331 parameters.regions, 

332 parameters.box, 

333 parameters.chunk_size, 

334 binary=False, 

335 separate=True, 

336 flatten=True, 

337 ) 

338 

339 for frame_index, (_, _, chunk, _) in enumerate(chunks): 

340 image_key = make_key(converted_key, series_key, f"frame_{frame_index}.png") 

341 save_image(context.working_location, image_key, chunk) 

342 

343 results_key = make_key(series.name, "results", f"{series_key}.csv") 

344 results = load_dataframe(context.working_location, results_key) 

345 

346 colorizer = convert_to_colorizer( 

347 results, 

348 parameters.features, 

349 parameters.frame_spec, 

350 parameters.ds, 

351 parameters.dt, 

352 parameters.regions, 

353 ) 

354 

355 manifest_key = make_key(converted_key, series_key, "manifest.json") 

356 save_json(context.working_location, manifest_key, colorizer["manifest"]) 

357 

358 outliers_key = make_key(converted_key, series_key, "outliers.json") 

359 save_json(context.working_location, outliers_key, colorizer["outliers"]) 

360 

361 tracks_key = make_key(converted_key, series_key, "tracks.json") 

362 save_json(context.working_location, tracks_key, colorizer["tracks"]) 

363 

364 times_key = make_key(converted_key, series_key, "times.json") 

365 save_json(context.working_location, times_key, colorizer["times"]) 

366 

367 for feature_index, feature in enumerate(parameters.features): 

368 feature_key = make_key(converted_key, series_key, f"feature_{feature_index}.json") 

369 save_json(context.working_location, feature_key, colorizer[feature]) 

370 

371 

372@flow(name="convert-arcade-format_convert-to-images") 

373def run_flow_convert_to_images( 

374 context: ContextConfig, series: SeriesConfig, parameters: ParametersConfigImages 

375) -> None: 

376 """Convert ARCADE format subflow for images.""" 

377 

378 data_key = make_key(series.name, "data", "data.LOCATIONS") 

379 converted_key = make_key(series.name, "converted", "converted.IMAGE") 

380 keys = [condition["key"] for condition in series.conditions] 

381 

382 for key in keys: 

383 for seed in parameters.seeds: 

384 series_key = f"{series.name}_{key}_{seed:04d}" 

385 

386 tar_key = make_key(data_key, f"{series_key}.LOCATIONS.tar.xz") 

387 tar = load_tar(context.working_location, tar_key) 

388 

389 chunks = convert_to_images( 

390 series_key, 

391 tar, 

392 parameters.frame_spec, 

393 parameters.regions, 

394 parameters.box, 

395 parameters.chunk_size, 

396 binary=parameters.binary, 

397 separate=parameters.separate, 

398 flatten=False, 

399 ) 

400 

401 for i, j, chunk, frame in chunks: 

402 chunk_key = f"{i:02d}_{j:02d}.IMAGE.ome.tiff" 

403 

404 if frame is None: 

405 image_key = make_key(converted_key, f"{series_key}_{chunk_key}") 

406 else: 

407 image_key = make_key(converted_key, f"{series_key}_{frame:06d}_{chunk_key}") 

408 

409 save_image(context.working_location, image_key, chunk) 

410 

411 

412@flow(name="convert-arcade-format_convert-to-meshes") 

413def run_flow_convert_to_meshes( 

414 context: ContextConfig, series: SeriesConfig, parameters: ParametersConfigMeshes 

415) -> None: 

416 """Convert ARCADE format subflow for meshes.""" 

417 

418 data_key = make_key(series.name, "data", "data.LOCATIONS") 

419 converted_key = make_key(series.name, "converted", "converted.MESH") 

420 keys = [condition["key"] for condition in series.conditions] 

421 

422 for key in keys: 

423 for seed in parameters.seeds: 

424 series_key = f"{series.name}_{key}_{seed:04d}" 

425 

426 tar_key = make_key(data_key, f"{series_key}.LOCATIONS.tar.xz") 

427 tar = load_tar(context.working_location, tar_key) 

428 

429 meshes = convert_to_meshes( 

430 series_key, 

431 tar, 

432 parameters.frame_spec, 

433 parameters.regions, 

434 parameters.box, 

435 parameters.invert, 

436 ) 

437 

438 for frame, cell_id, region, mesh in meshes: 

439 mesh_key = make_key( 

440 converted_key, f"{series_key}_{frame:06d}_{cell_id:06d}_{region}.MESH.obj" 

441 ) 

442 save_text(context.working_location, mesh_key, mesh) 

443 

444 

445@flow(name="convert-arcade-format_convert-to-projections") 

446def run_flow_convert_to_projections( 

447 context: ContextConfig, series: SeriesConfig, parameters: ParametersConfigProjections 

448) -> None: 

449 """Convert ARCADE format subflow for projections.""" 

450 

451 data_key = make_key(series.name, "data", "data.LOCATIONS") 

452 converted_key = make_key(series.name, "converted", "converted.PROJECTION") 

453 region_key = "_".join(sorted(parameters.regions)) 

454 keys = [condition["key"] for condition in series.conditions] 

455 

456 for key in keys: 

457 for seed in parameters.seeds: 

458 series_key = f"{series.name}_{key}_{seed:04d}" 

459 

460 tar_key = make_key(data_key, f"{series_key}.LOCATIONS.tar.xz") 

461 tar = load_tar(context.working_location, tar_key) 

462 

463 for frame in np.arange(*parameters.frame_spec): 

464 projection = convert_to_projection( 

465 series_key, 

466 tar, 

467 frame, 

468 parameters.regions, 

469 parameters.box, 

470 parameters.ds, 

471 parameters.dt, 

472 parameters.scale, 

473 parameters.region_colors, 

474 ) 

475 

476 projection_key = make_key( 

477 converted_key, f"{series_key}_{frame:06d}_{region_key}.PROJECTION.png" 

478 ) 

479 save_figure( 

480 context.working_location, projection_key, projection, bbox_inches="tight" 

481 ) 

482 

483 

484@flow(name="convert-arcade-format_convert-to-simularium-shapes") 

485def run_flow_convert_to_simularium_shapes( 

486 context: ContextConfig, series: SeriesConfig, parameters: ParametersConfigSimulariumShapes 

487) -> None: 

488 """Convert ARCADE format subflow for Simularium with shapes.""" 

489 

490 cells_data_key = make_key(series.name, "data", "data.CELLS") 

491 locs_data_key = make_key(series.name, "data", "data.LOCATIONS") 

492 converted_key = make_key(series.name, "converted", "converted.SIMULARIUM") 

493 keys = [condition["key"] for condition in series.conditions] 

494 

495 suffix = f"SHAPES{parameters.resolution}" 

496 

497 for key in keys: 

498 for seed in parameters.seeds: 

499 series_key = f"{series.name}_{key}_{seed:04d}" 

500 

501 cells_tar_key = make_key(cells_data_key, f"{series_key}.CELLS.tar.xz") 

502 cells_tar = load_tar(context.working_location, cells_tar_key) 

503 

504 locs_tar_key = make_key(locs_data_key, f"{series_key}.LOCATIONS.tar.xz") 

505 locs_tar = load_tar(context.working_location, locs_tar_key) 

506 

507 simularium = convert_to_simularium_shapes( 

508 series_key, 

509 "potts", 

510 {"cells": cells_tar, "locations": locs_tar}, 

511 parameters.frame_spec, 

512 parameters.box, 

513 parameters.ds, 

514 parameters.ds, 

515 parameters.dt, 

516 parameters.phase_colors, 

517 parameters.resolution, 

518 ) 

519 

520 simularium_key = make_key(converted_key, f"{series_key}.{suffix}.simularium") 

521 save_text(context.working_location, simularium_key, simularium) 

522 

523 

524@flow(name="convert-arcade-format_convert-to-simularium-objects") 

525def run_flow_convert_to_simularium_objects( 

526 context: ContextConfig, series: SeriesConfig, parameters: ParametersConfigSimulariumObjects 

527) -> None: 

528 """Convert ARCADE format subflow for Simularium with objects.""" 

529 

530 data_key = make_key(series.name, "data", "data.LOCATIONS") 

531 converted_key = make_key(series.name, "converted", "converted.SIMULARIUM") 

532 keys = [condition["key"] for condition in series.conditions] 

533 

534 suffix = f"OBJECTS{parameters.group_size}" 

535 regions = ["DEFAULT", "NUCLEUS"] 

536 invert = {"DEFAULT": True, "NUCLEUS": False} 

537 

538 for key in keys: 

539 for seed in parameters.seeds: 

540 series_key = f"{series.name}_{key}_{seed:04d}" 

541 

542 results_key = make_key(series.name, "results", f"{series_key}.csv") 

543 results = load_dataframe(context.working_location, results_key) 

544 

545 tar_key = make_key(data_key, f"{series_key}.LOCATIONS.tar.xz") 

546 tar = load_tar(context.working_location, tar_key) 

547 

548 categories = results[["TICK", "PHASE", "ID"]].rename( 

549 columns={"TICK": "FRAME", "PHASE": "CATEGORY"} 

550 ) 

551 

552 meshes = convert_to_meshes( 

553 series_key, 

554 tar, 

555 parameters.frame_spec, 

556 regions, 

557 parameters.box, 

558 invert, 

559 parameters.group_size, 

560 categories, 

561 ) 

562 

563 mesh_path_key = make_key(converted_key, f"{series_key}.{suffix}") 

564 

565 for frame, index, region, mesh in meshes: 

566 mesh_key = make_key(mesh_path_key, f"{frame:06d}_{region}_{index:03d}.MESH.obj") 

567 save_text(context.working_location, mesh_key, mesh) 

568 

569 simularium = convert_to_simularium_objects( 

570 series_key, 

571 "potts", 

572 categories, 

573 parameters.frame_spec, 

574 regions, 

575 parameters.box, 

576 parameters.ds, 

577 parameters.ds, 

578 parameters.dt, 

579 parameters.phase_colors, 

580 parameters.group_size, 

581 make_key(parameters.url, mesh_path_key), 

582 ) 

583 

584 simularium_key = make_key(converted_key, f"{series_key}.{suffix}.simularium") 

585 save_text(context.working_location, simularium_key, simularium)