Coverage for src/cell_abm_pipeline/flows/group_cell_shapes.py: 0%

537 statements  

« prev     ^ index     » next       coverage.py v7.1.0, created at 2024-06-05 19:14 +0000

1""" 

2Workflow for grouping cell shapes. 

3 

4Working location structure: 

5 

6.. code-block:: bash 

7 

8 (name) 

9 ├── analysis 

10 │ ├── analysis.CELL_SHAPES_DATA 

11 │ │ └── (name)_(key).CELL_SHAPES_DATA.csv 

12 │ └── analysis.CELL_SHAPES_MODELS 

13 │ └── (name)_(key).CELL_SHAPES_MODELS.pkl 

14 ├── data 

15 │ └── data.LOCATIONS 

16 │ └── (name)_(key)_(seed).LOCATIONS.tar.xz 

17 └── groups 

18 └── groups.CELL_SHAPES 

19 ├── (name).feature_components.csv 

20 ├── (name).feature_correlations.(key).(region).csv 

21 ├── (name).feature_correlations.(key).(mode).(property).(region).csv 

22 ├── (name).feature_distributions.(feature).json 

23 ├── (name).mode_correlations.csv 

24 ├── (name).population_counts.(time).csv 

25 ├── (name).population_stats.json 

26 ├── (name).shape_average.(key).(projection).json 

27 ├── (name).shape_contours.(key).(seed).(time).(region).(projection).json 

28 ├── (name).shape_errors.json 

29 ├── (name).shape_modes.(key).(region).(mode).(projection).json 

30 ├── (name).shape_samples.json 

31 └── (name).variance_explained.csv 

32 

33Different groups use inputs from **data.LOCATIONS**, 

34**analysis.CELL_SHAPES_DATA**, and **analysis.CELL_SHAPES_MODELS**. Grouped data 

35are saved to **groups.CELL_SHAPES**. 

36 

37Different groups can be visualized using the corresponding plotting workflow or 

38loaded into alternative tools. 

39""" 

40 

41from dataclasses import dataclass, field 

42from datetime import timedelta 

43from typing import Optional, Union 

44 

45import numpy as np 

46import pandas as pd 

47from abm_shape_collection import ( 

48 construct_mesh_from_array, 

49 construct_mesh_from_coeffs, 

50 extract_mesh_projections, 

51 extract_mesh_wireframe, 

52 extract_shape_modes, 

53 extract_voxel_contours, 

54 make_voxels_array, 

55) 

56from arcade_collection.output import extract_tick_json, get_location_voxels 

57from arcade_collection.output.convert_model_units import ( 

58 estimate_spatial_resolution, 

59 estimate_temporal_resolution, 

60) 

61from io_collection.keys import make_key 

62from io_collection.load import load_dataframe, load_pickle, load_tar 

63from io_collection.save import save_dataframe, save_json 

64from prefect import flow, get_run_logger 

65from prefect.tasks import task_input_hash 

66from scipy.spatial import ConvexHull, KDTree 

67from scipy.stats import pearsonr 

68from sklearn.decomposition import PCA 

69 

70from cell_abm_pipeline.flows.analyze_cell_shapes import PCA_COMPONENTS 

71from cell_abm_pipeline.flows.calculate_coefficients import COEFFICIENT_ORDER 

72from cell_abm_pipeline.tasks import bin_to_hex, calculate_data_bins, check_data_bounds 

73 

74OPTIONS = { 

75 "cache_result_in_memory": False, 

76 "cache_key_fn": task_input_hash, 

77 "cache_expiration": timedelta(hours=12), 

78} 

79 

80GROUPS: list[str] = [ 

81 "feature_components", 

82 "feature_correlations", 

83 "feature_distributions", 

84 "mode_correlations", 

85 "population_counts", 

86 "population_stats", 

87 "shape_average", 

88 "shape_contours", 

89 "shape_errors", 

90 "shape_modes", 

91 "shape_samples", 

92 "variance_explained", 

93] 

94 

95COMPONENT_FEATURES: list[str] = [ 

96 "volume", 

97 "height", 

98 "area", 

99 "axis_major_length", 

100 "axis_minor_length", 

101 "eccentricity", 

102 "orientation", 

103 "perimeter", 

104 "extent", 

105 "solidity", 

106] 

107 

108CORRELATION_PROPERTIES: list[str] = [ 

109 "volume", 

110 "height", 

111 "area", 

112 "axis_major_length", 

113 "axis_minor_length", 

114 "eccentricity", 

115 "perimeter", 

116] 

117 

118DISTRIBUTION_PROPERTIES: list[str] = [ 

119 "volume", 

120 "height", 

121] 

122 

123PROJECTIONS: list[str] = [ 

124 "top", 

125 "side1", 

126 "side2", 

127] 

128 

129LIMITS: dict[str, list] = { 

130 "volume.DEFAULT": [500, 4000], 

131 "volume.NUCLEUS": [0, 1500], 

132 "height.DEFAULT": [0, 20], 

133 "height.NUCLEUS": [0, 20], 

134 "area.DEFAULT": [0, 1000], 

135 "area.NUCLEUS": [0, 250], 

136 "axis_major_length.DEFAULT": [0, 100], 

137 "axis_major_length.NUCLEUS": [0, 50], 

138 "axis_minor_length.DEFAULT": [0, 50], 

139 "axis_minor_length.NUCLEUS": [0, 20], 

140 "eccentricity.DEFAULT": [0, 1], 

141 "eccentricity.NUCLEUS": [0, 1], 

142 "perimeter.DEFAULT": [0, 250], 

143 "perimeter.NUCLEUS": [0, 100], 

144 "PC1": [-60, 60], 

145 "PC2": [-50, 50], 

146 "PC3": [-50, 50], 

147 "PC4": [-50, 50], 

148 "PC5": [-40, 40], 

149 "PC6": [-40, 40], 

150 "PC7": [-50, 50], 

151 "PC8": [-50, 50], 

152} 

153 

154BOUNDS: dict[str, list] = { 

155 "volume.DEFAULT": [0, 6000], 

156 "volume.NUCLEUS": [0, 2000], 

157 "height.DEFAULT": [0, 21], 

158 "height.NUCLEUS": [0, 21], 

159 "area.DEFAULT": [0, 2500], 

160 "area.NUCLEUS": [0, 1000], 

161 "perimeter.DEFAULT": [0, 2000], 

162 "perimeter.NUCLEUS": [0, 700], 

163 "axis_major_length.DEFAULT": [0, 300], 

164 "axis_major_length.NUCLEUS": [0, 150], 

165 "axis_minor_length.DEFAULT": [0, 150], 

166 "axis_minor_length.NUCLEUS": [0, 100], 

167 "eccentricity.DEFAULT": [0, 1], 

168 "eccentricity.NUCLEUS": [0, 1], 

169 "orientation.DEFAULT": [-2, 2], 

170 "orientation.NUCLEUS": [-2, 2], 

171 "extent.DEFAULT": [0, 1], 

172 "extent.NUCLEUS": [0, 1], 

173 "solidity.DEFAULT": [0, 1], 

174 "solidity.NUCLEUS": [0, 1], 

175 "PC1": [-50, 50], 

176 "PC2": [-50, 50], 

177 "PC3": [-50, 50], 

178 "PC4": [-50, 50], 

179 "PC5": [-50, 50], 

180 "PC6": [-50, 50], 

181 "PC7": [-50, 50], 

182 "PC8": [-50, 50], 

183} 

184 

185BANDWIDTH: dict[str, float] = { 

186 "volume.DEFAULT": 100, 

187 "volume.NUCLEUS": 50, 

188 "height.DEFAULT": 1, 

189 "height.NUCLEUS": 1, 

190 "area.DEFAULT": 50, 

191 "area.NUCLEUS": 10, 

192 "perimeter.DEFAULT": 50, 

193 "perimeter.NUCLEUS": 10, 

194 "axis_major_length.DEFAULT": 10, 

195 "axis_major_length.NUCLEUS": 5, 

196 "axis_minor_length.DEFAULT": 5, 

197 "axis_minor_length.NUCLEUS": 2, 

198 "eccentricity.DEFAULT": 0.01, 

199 "eccentricity.NUCLEUS": 0.01, 

200 "orientation.DEFAULT": 0.05, 

201 "orientation.NUCLEUS": 0.05, 

202 "extent.DEFAULT": 0.01, 

203 "extent.NUCLEUS": 0.01, 

204 "solidity.DEFAULT": 0.01, 

205 "solidity.NUCLEUS": 0.01, 

206 "PC1": 5, 

207 "PC2": 5, 

208 "PC3": 5, 

209 "PC4": 5, 

210 "PC5": 5, 

211 "PC6": 5, 

212 "PC7": 5, 

213 "PC8": 5, 

214} 

215 

216 

217@dataclass 

218class ParametersConfigFeatureComponents: 

219 """Parameter configuration for group cell shapes subflow - feature components.""" 

220 

221 features: list[str] = field(default_factory=lambda: COMPONENT_FEATURES) 

222 """List of shape features.""" 

223 

224 regions: list[str] = field(default_factory=lambda: ["DEFAULT"]) 

225 """List of subcellular regions.""" 

226 

227 components: int = PCA_COMPONENTS 

228 """Number of principal components.""" 

229 

230 reference_metrics: Optional[str] = None 

231 """Full key for reference metrics data.""" 

232 

233 reference_properties: Optional[str] = None 

234 """Full key for reference properties data.""" 

235 

236 

237@dataclass 

238class ParametersConfigFeatureCorrelations: 

239 """Parameter configuration for group cell shapes subflow - feature correlations.""" 

240 

241 properties: list[str] = field(default_factory=lambda: CORRELATION_PROPERTIES) 

242 """List of shape properties.""" 

243 

244 regions: list[str] = field(default_factory=lambda: ["DEFAULT"]) 

245 """List of subcellular regions.""" 

246 

247 components: int = PCA_COMPONENTS 

248 """Number of principal components (i.e. shape modes).""" 

249 

250 include_bins: bool = False 

251 """True if correlations are binned, False otherwise""" 

252 

253 limits: dict[str, list] = field(default_factory=lambda: LIMITS) 

254 """Limits for scaling feature correlations bins.""" 

255 

256 

257@dataclass 

258class ParametersConfigFeatureDistributions: 

259 """Parameter configuration for group cell shapes subflow - feature distributions.""" 

260 

261 reference_metrics: Optional[str] = None 

262 """Full key for reference metrics data.""" 

263 

264 reference_properties: Optional[str] = None 

265 """Full key for reference properties data.""" 

266 

267 reference_coefficients: Optional[str] = None 

268 """Full key for reference coefficients data.""" 

269 

270 reference_model: Optional[str] = None 

271 """Full key for reference PCA model.""" 

272 

273 properties: list[str] = field(default_factory=lambda: DISTRIBUTION_PROPERTIES) 

274 """List of shape properties.""" 

275 

276 regions: list[str] = field(default_factory=lambda: ["DEFAULT"]) 

277 """List of subcellular regions.""" 

278 

279 components: int = PCA_COMPONENTS 

280 """Number of principal components (i.e. shape modes).""" 

281 

282 bounds: dict[str, list] = field(default_factory=lambda: BOUNDS) 

283 """Bounds for feature distributions.""" 

284 

285 bandwidth: dict[str, float] = field(default_factory=lambda: BANDWIDTH) 

286 """Bandwidths for feature distributions.""" 

287 

288 

289@dataclass 

290class ParametersConfigModeCorrelations: 

291 """Parameter configuration for group cell shapes subflow - mode correlations.""" 

292 

293 reference_model: Optional[str] = None 

294 """Full key for reference PCA model.""" 

295 

296 reference_data: Optional[str] = None 

297 """Full key for reference coefficients data.""" 

298 

299 regions: list[str] = field(default_factory=lambda: ["DEFAULT"]) 

300 """List of subcellular regions.""" 

301 

302 components: int = PCA_COMPONENTS 

303 """Number of principal components (i.e. shape modes).""" 

304 

305 

306@dataclass 

307class ParametersConfigPopulationCounts: 

308 """Parameter configuration for group cell shapes subflow - population counts.""" 

309 

310 regions: list[str] = field(default_factory=lambda: ["DEFAULT"]) 

311 """List of subcellular regions.""" 

312 

313 seeds: list[int] = field(default_factory=lambda: [0]) 

314 """Simulation seed(s) to use for grouping population counts.""" 

315 

316 time: int = 0 

317 """Simulation time (in hours) to use for grouping population counts.""" 

318 

319 

320@dataclass 

321class ParametersConfigPopulationStats: 

322 """Parameter configuration for group cell shapes subflow - population stats.""" 

323 

324 regions: list[str] = field(default_factory=lambda: ["DEFAULT"]) 

325 """List of subcellular regions.""" 

326 

327 

328@dataclass 

329class ParametersConfigShapeAverage: 

330 """Parameter configuration for group cell shapes subflow - shape average.""" 

331 

332 regions: list[str] = field(default_factory=lambda: ["DEFAULT"]) 

333 """List of subcellular regions.""" 

334 

335 components: int = PCA_COMPONENTS 

336 """Number of principal components (i.e. shape modes).""" 

337 

338 order: int = COEFFICIENT_ORDER 

339 """Order of the spherical harmonics coefficient parametrization.""" 

340 

341 scale: float = 1 

342 """Scaling for spherical harmonics reconstruction mesh.""" 

343 

344 projections: list[str] = field(default_factory=lambda: PROJECTIONS) 

345 """List of shape projections.""" 

346 

347 

348@dataclass 

349class ParametersConfigShapeContours: 

350 """Parameter configuration for group cell shapes subflow - shape contours.""" 

351 

352 regions: list[Optional[str]] = field(default_factory=lambda: ["DEFAULT"]) 

353 """List of subcellular regions.""" 

354 

355 seed: int = 0 

356 """Simulation random seed to use for grouping shape contours.""" 

357 

358 time: int = 0 

359 """Simulation time (in hours) to use for grouping shape contours.""" 

360 

361 ds: Optional[float] = None 

362 """Spatial scaling in units/um.""" 

363 

364 dt: Optional[float] = None 

365 """Temporal scaling in hours/tick.""" 

366 

367 projection: str = "top" 

368 """Selected shape projection.""" 

369 

370 box: tuple[int, int, int] = field(default_factory=lambda: (1, 1, 1)) 

371 """Size of projection bounding box.""" 

372 

373 slice_index: Optional[int] = None 

374 """Slice index along the shape projection axis.""" 

375 

376 

377@dataclass 

378class ParametersConfigShapeErrors: 

379 """Parameter configuration for group cell shapes subflow - shape errors.""" 

380 

381 regions: list[str] = field(default_factory=lambda: ["DEFAULT"]) 

382 """List of subcellular regions.""" 

383 

384 

385@dataclass 

386class ParametersConfigShapeModes: 

387 """Parameter configuration for group cell shapes subflow - shape modes.""" 

388 

389 regions: list[str] = field(default_factory=lambda: ["DEFAULT"]) 

390 """List of subcellular regions.""" 

391 

392 components: int = PCA_COMPONENTS 

393 """Number of principal components (i.e. shape modes).""" 

394 

395 order: int = COEFFICIENT_ORDER 

396 """Order of the spherical harmonics coefficient parametrization.""" 

397 

398 delta: float = 0.5 

399 """Increment for shape mode map points.""" 

400 

401 projections: list[str] = field(default_factory=lambda: PROJECTIONS) 

402 """List of shape projections.""" 

403 

404 

405@dataclass 

406class ParametersConfigShapeSamples: 

407 """Parameter configuration for group cell shapes subflow - shape samples.""" 

408 

409 regions: list[str] = field(default_factory=lambda: ["DEFAULT"]) 

410 """List of subcellular regions.""" 

411 

412 seed: int = 0 

413 """Simulation random seed to use for grouping shape samples.""" 

414 

415 tick: int = 0 

416 """Simulation tick to use for grouping shape samples.""" 

417 

418 indices: list[int] = field(default_factory=lambda: [0]) 

419 """Cell indices for shape samples.""" 

420 

421 

422@dataclass 

423class ParametersConfigVarianceExplained: 

424 """Parameter configuration for group cell shapes subflow - variance explained.""" 

425 

426 regions: list[str] = field(default_factory=lambda: ["DEFAULT"]) 

427 """List of subcellular regions.""" 

428 

429 components: int = PCA_COMPONENTS 

430 """Number of principal components (i.e. shape modes).""" 

431 

432 

433@dataclass 

434class ParametersConfig: 

435 """Parameter configuration for group cell shapes flow.""" 

436 

437 groups: list[str] = field(default_factory=lambda: GROUPS) 

438 """List of cell shapes groups.""" 

439 

440 feature_components: ParametersConfigFeatureComponents = ParametersConfigFeatureComponents() 

441 """Parameters for group feature components subflow.""" 

442 

443 feature_correlations: ParametersConfigFeatureCorrelations = ( 

444 ParametersConfigFeatureCorrelations() 

445 ) 

446 """Parameters for group feature correlations subflow.""" 

447 

448 feature_distributions: ParametersConfigFeatureDistributions = ( 

449 ParametersConfigFeatureDistributions() 

450 ) 

451 """Parameters for group feature distributions subflow.""" 

452 

453 mode_correlations: ParametersConfigModeCorrelations = ParametersConfigModeCorrelations() 

454 """Parameters for group mode correlations subflow.""" 

455 

456 population_counts: ParametersConfigPopulationCounts = ParametersConfigPopulationCounts() 

457 """Parameters for group population counts subflow.""" 

458 

459 population_stats: ParametersConfigPopulationStats = ParametersConfigPopulationStats() 

460 """Parameters for group population stats subflow.""" 

461 

462 shape_average: ParametersConfigShapeAverage = ParametersConfigShapeAverage() 

463 """Parameters for group shape average subflow.""" 

464 

465 shape_contours: ParametersConfigShapeContours = ParametersConfigShapeContours() 

466 """Parameters for group shape contours subflow.""" 

467 

468 shape_errors: ParametersConfigShapeErrors = ParametersConfigShapeErrors() 

469 """Parameters for group shape errors subflow.""" 

470 

471 shape_modes: ParametersConfigShapeModes = ParametersConfigShapeModes() 

472 """Parameters for group shape modes subflow.""" 

473 

474 shape_samples: ParametersConfigShapeSamples = ParametersConfigShapeSamples() 

475 """Parameters for group shape samples subflow.""" 

476 

477 variance_explained: ParametersConfigVarianceExplained = ParametersConfigVarianceExplained() 

478 """Parameters for group variance explained subflow.""" 

479 

480 

481@dataclass 

482class ContextConfig: 

483 """Context configuration for group cell shapes flow.""" 

484 

485 working_location: str 

486 """Location for input and output files (local path or S3 bucket).""" 

487 

488 

489@dataclass 

490class SeriesConfig: 

491 """Series configuration for group cell shapes flow.""" 

492 

493 name: str 

494 """Name of the simulation series.""" 

495 

496 conditions: list[dict] 

497 """List of series condition dictionaries (must include unique condition "key").""" 

498 

499 

500@flow(name="group-cell-shapes") 

501def run_flow(context: ContextConfig, series: SeriesConfig, parameters: ParametersConfig) -> None: 

502 """ 

503 Main group cell shapes flow. 

504 

505 Calls the following subflows, if the group is specified: 

506 

507 - :py:func:`run_flow_group_feature_components` 

508 - :py:func:`run_flow_group_feature_correlations` 

509 - :py:func:`run_flow_group_feature_distributions` 

510 - :py:func:`run_flow_group_mode_correlations` 

511 - :py:func:`run_flow_group_population_counts` 

512 - :py:func:`run_flow_group_population_stats` 

513 - :py:func:`run_flow_group_shape_average` 

514 - :py:func:`run_flow_group_shape_contours` 

515 - :py:func:`run_flow_group_shape_errors` 

516 - :py:func:`run_flow_group_shape_modes` 

517 - :py:func:`run_flow_group_shape_samples` 

518 - :py:func:`run_flow_group_variance_explained` 

519 """ 

520 

521 if "feature_components" in parameters.groups: 

522 run_flow_group_feature_components(context, series, parameters.feature_components) 

523 

524 if "feature_correlations" in parameters.groups: 

525 run_flow_group_feature_correlations(context, series, parameters.feature_correlations) 

526 

527 if "feature_distributions" in parameters.groups: 

528 run_flow_group_feature_distributions(context, series, parameters.feature_distributions) 

529 

530 if "mode_correlations" in parameters.groups: 

531 run_flow_group_mode_correlations(context, series, parameters.mode_correlations) 

532 

533 if "population_counts" in parameters.groups: 

534 run_flow_group_population_counts(context, series, parameters.population_counts) 

535 

536 if "population_stats" in parameters.groups: 

537 run_flow_group_population_stats(context, series, parameters.population_stats) 

538 

539 if "shape_average" in parameters.groups: 

540 run_flow_group_shape_average(context, series, parameters.shape_average) 

541 

542 if "shape_contours" in parameters.groups: 

543 run_flow_group_shape_contours(context, series, parameters.shape_contours) 

544 

545 if "shape_errors" in parameters.groups: 

546 run_flow_group_shape_errors(context, series, parameters.shape_errors) 

547 

548 if "shape_modes" in parameters.groups: 

549 run_flow_group_shape_modes(context, series, parameters.shape_modes) 

550 

551 if "shape_samples" in parameters.groups: 

552 run_flow_group_shape_samples(context, series, parameters.shape_samples) 

553 

554 if "variance_explained" in parameters.groups: 

555 run_flow_group_variance_explained(context, series, parameters.variance_explained) 

556 

557 

558@flow(name="group-cell-shapes_group-feature-components") 

559def run_flow_group_feature_components( 

560 context: ContextConfig, series: SeriesConfig, parameters: ParametersConfigFeatureComponents 

561) -> None: 

562 """Group cell shapes subflow for feature components.""" 

563 

564 analysis_key = make_key(series.name, "analysis", "analysis.CELL_SHAPES_DATA") 

565 group_key = make_key(series.name, "groups", "groups.CELL_SHAPES") 

566 

567 # Get feature columns 

568 columns = [ 

569 f"{feature}.{region}" if region != "DEFAULT" else feature 

570 for region in parameters.regions 

571 for feature in parameters.features 

572 ] 

573 

574 # Load data. 

575 data_key = make_key(analysis_key, f"{series.name}.CELL_SHAPES_DATA.csv") 

576 data = load_dataframe.with_options(**OPTIONS)(context.working_location, data_key) 

577 

578 # Fit model. 

579 pca_data = data[columns] 

580 pca_data_mean = pca_data.mean(axis=0) 

581 pca_data_std = pca_data.std(axis=0) 

582 pca_data_zscore = (pca_data - pca_data_mean) / pca_data_std 

583 pca = PCA(n_components=parameters.components) 

584 pca = pca.fit(pca_data_zscore) 

585 transform = pca.transform(pca_data_zscore) 

586 

587 # Create output data. 

588 feature_components = data[["KEY"]].copy() 

589 feature_components.rename(columns={"KEY": "key"}, inplace=True) 

590 for comp in range(parameters.components): 

591 feature_components[f"component_{comp + 1}"] = transform[:, comp] 

592 

593 # Save dataframe. 

594 save_dataframe( 

595 context.working_location, 

596 make_key(group_key, f"{series.name}.feature_components.csv"), 

597 feature_components, 

598 index=False, 

599 ) 

600 

601 # Get reference data convex hull. 

602 if parameters.reference_metrics is not None and parameters.reference_properties is not None: 

603 index_columns = ["KEY", "ID", "SEED", "TICK"] 

604 reference_metrics = load_dataframe.with_options(**OPTIONS)( 

605 context.working_location, parameters.reference_metrics 

606 ) 

607 reference_properties = load_dataframe.with_options(**OPTIONS)( 

608 context.working_location, parameters.reference_properties 

609 ) 

610 

611 reference_metrics.set_index(index_columns, inplace=True) 

612 reference_properties.set_index(index_columns, inplace=True) 

613 

614 reference = reference_metrics.join(reference_properties, on=index_columns).reset_index() 

615 reference_zscore = (reference[columns] - pca_data_mean) / pca_data_std 

616 reference_transform = pca.transform(reference_zscore) 

617 

618 hull = ConvexHull(reference_transform) 

619 points = pd.DataFrame(reference_transform[hull.vertices, :], columns=["x", "y"]) 

620 

621 save_dataframe( 

622 context.working_location, 

623 make_key(group_key, f"{series.name}.feature_components.REFERENCE.csv"), 

624 pd.DataFrame(points), 

625 index=False, 

626 ) 

627 

628 

629@flow(name="group-cell-shapes_group-feature-correlations") 

630def run_flow_group_feature_correlations( 

631 context: ContextConfig, series: SeriesConfig, parameters: ParametersConfigFeatureCorrelations 

632) -> None: 

633 """Group cell shapes subflow for feature correlations.""" 

634 

635 analysis_shapes_key = make_key(series.name, "analysis", "analysis.SHAPES") 

636 analysis_pca_key = make_key(series.name, "analysis", "analysis.PCA") 

637 group_key = make_key(series.name, "groups", "groups.SHAPES") 

638 region_key = "_".join(sorted(parameters.regions)) 

639 keys = [condition["key"] for condition in series.conditions] 

640 

641 for key in keys: 

642 feature_key = f"{series.name}.feature_correlations.{key}" 

643 series_key = f"{series.name}_{key}_{region_key}" 

644 

645 # Load model. 

646 model_key = make_key(analysis_pca_key, f"{series_key}.PCA.pkl") 

647 model = load_pickle.with_options(**OPTIONS)(context.working_location, model_key) 

648 

649 # Load dataframe. 

650 dataframe_key = make_key(analysis_shapes_key, f"{series_key}.SHAPES.csv") 

651 data = load_dataframe.with_options(**OPTIONS)(context.working_location, dataframe_key) 

652 

653 # Transform data into shape mode space. 

654 columns = data.filter(like="shcoeffs").columns 

655 transform = model.transform(data[columns].values) 

656 

657 for region in parameters.regions: 

658 correlations: list[dict[str, Union[str, float]]] = [] 

659 

660 for component in range(parameters.components): 

661 mode_key = f"PC{component + 1}" 

662 component_data = transform[:, component] 

663 

664 for prop in parameters.properties: 

665 prop_key = prop.upper() 

666 prop_data = data[f"{prop}.{region}".replace(".DEFAULT", "")] 

667 

668 slope, intercept = np.polyfit(component_data, prop_data, 1) 

669 

670 correlations.append( 

671 { 

672 "mode": mode_key, 

673 "property": prop.upper(), 

674 "correlation": pearsonr(prop_data, component_data).statistic, 

675 "correlation_symmetric": pearsonr( 

676 prop_data, abs(component_data) 

677 ).statistic, 

678 "slope": slope, 

679 "intercept": intercept, 

680 } 

681 ) 

682 

683 if not parameters.include_bins: 

684 continue 

685 

686 prop_limits = parameters.limits[f"{prop}.{region}"] 

687 mode_limits = parameters.limits[mode_key] 

688 

689 bins = bin_to_hex( 

690 component_data, 

691 prop_data, 

692 np.ones(len(prop_data)), 

693 scale=0.025, 

694 limits=(mode_limits[0], mode_limits[1], prop_limits[0], prop_limits[1]), 

695 ) 

696 bins_df = pd.DataFrame( 

697 [[x, y, np.sum(v)] for (x, y), v in bins.items()], columns=["x", "y", "v"] 

698 ) 

699 

700 save_dataframe( 

701 context.working_location, 

702 make_key(group_key, f"{feature_key}.{mode_key}.{prop_key}.{region}.csv"), 

703 bins_df, 

704 index=False, 

705 ) 

706 

707 save_dataframe( 

708 context.working_location, 

709 make_key(group_key, f"{feature_key}.{region}.csv"), 

710 pd.DataFrame(correlations), 

711 index=False, 

712 ) 

713 

714 

715@flow(name="group-cell-shapes_group-feature-distributions") 

716def run_flow_group_feature_distributions( 

717 context: ContextConfig, series: SeriesConfig, parameters: ParametersConfigFeatureDistributions 

718) -> None: 

719 """Group cell shapes subflow for feature distributions.""" 

720 

721 analysis_key = make_key(series.name, "analysis", "analysis.CELL_SHAPES_DATA") 

722 group_key = make_key(series.name, "groups", "groups.CELL_SHAPES") 

723 

724 keys = [condition["key"] for condition in series.conditions] 

725 superkeys = {key_group for key in keys for key_group in key.split("_")} 

726 

727 features = [ 

728 (f"{prop}.{region}", False) 

729 for prop in parameters.properties 

730 for region in parameters.regions 

731 ] 

732 

733 if parameters.reference_metrics is not None: 

734 ref_metrics = load_dataframe.with_options(**OPTIONS)( 

735 context.working_location, parameters.reference_metrics 

736 ) 

737 features.extend( 

738 [ 

739 (feature, True) 

740 for feature, _ in features 

741 if feature.replace(".DEFAULT", "") in ref_metrics.columns 

742 ] 

743 ) 

744 

745 if parameters.reference_properties is not None: 

746 ref_props = load_dataframe.with_options(**OPTIONS)( 

747 context.working_location, parameters.reference_properties 

748 ) 

749 features.extend( 

750 [ 

751 (feature, True) 

752 for feature, _ in features 

753 if feature.replace(".DEFAULT", "") in ref_props.columns 

754 ] 

755 ) 

756 

757 if parameters.reference_model is not None and parameters.reference_coefficients is not None: 

758 ref_coeffs = load_dataframe.with_options(**OPTIONS)( 

759 context.working_location, parameters.reference_coefficients, nrows=1 

760 ) 

761 ref_model = load_pickle.with_options(**OPTIONS)( 

762 context.working_location, parameters.reference_model 

763 ) 

764 features.extend( 

765 [(f"PC{component + 1}", False) for component in range(parameters.components)] 

766 ) 

767 

768 distribution_bins: dict[tuple[str, bool], dict] = {feature: {} for feature in features} 

769 distribution_means: dict[tuple[str, bool], dict] = {feature: {} for feature in features} 

770 distribution_stdevs: dict[tuple[str, bool], dict] = {feature: {} for feature in features} 

771 distribution_mins: dict[tuple[str, bool], dict] = {feature: {} for feature in features} 

772 distribution_maxs: dict[tuple[str, bool], dict] = {feature: {} for feature in features} 

773 

774 for key in superkeys: 

775 dataframe_key = make_key(analysis_key, f"{series.name}_{key}.CELL_SHAPES_DATA.csv") 

776 data = load_dataframe.with_options(**OPTIONS)(context.working_location, dataframe_key) 

777 

778 if parameters.reference_model is not None: 

779 transform = ref_model.transform(data[ref_coeffs.filter(like="shcoeffs").columns].values) 

780 for component in range(parameters.components): 

781 data[f"PC{component + 1}"] = transform[:, component] 

782 

783 for feature, filtered in features: 

784 feature_column = feature.replace(".DEFAULT", "") 

785 values = data[feature_column].values 

786 

787 if filtered: 

788 if ref_metrics is not None and feature_column in ref_metrics.columns: 

789 ref_max = ref_metrics[feature_column].max() 

790 ref_min = ref_metrics[feature_column].min() 

791 values = values[(values >= ref_min) & (values <= ref_max)] 

792 

793 if ref_props is not None and feature_column in ref_props.columns: 

794 ref_max = ref_props[feature_column].max() 

795 ref_min = ref_props[feature_column].min() 

796 values = values[(values >= ref_min) & (values <= ref_max)] 

797 

798 bounds = (parameters.bounds[feature][0], parameters.bounds[feature][1]) 

799 bandwidth = parameters.bandwidth[feature] 

800 

801 valid = check_data_bounds(values, bounds, f"[ {key} ] feature [ {feature} ]") 

802 

803 if not valid: 

804 continue 

805 

806 distribution_means[(feature, filtered)][key] = np.mean(values) 

807 distribution_stdevs[(feature, filtered)][key] = np.std(values, ddof=1) 

808 distribution_bins[(feature, filtered)][key] = calculate_data_bins( 

809 values, bounds, bandwidth 

810 ) 

811 distribution_mins[(feature, filtered)][key] = np.min(values) 

812 distribution_maxs[(feature, filtered)][key] = np.max(values) 

813 

814 for (feature, filtered), distribution in distribution_bins.items(): 

815 distribution["*"] = { 

816 "bandwidth": parameters.bandwidth[feature], 

817 "means": distribution_means[(feature, filtered)], 

818 "stdevs": distribution_stdevs[(feature, filtered)], 

819 "mins": distribution_mins[(feature, filtered)], 

820 "maxs": distribution_maxs[(feature, filtered)], 

821 } 

822 

823 feature_key = f"{feature.upper()}{'.FILTERED' if filtered else ''}" 

824 save_json( 

825 context.working_location, 

826 make_key(group_key, f"{series.name}.feature_distributions.{feature_key}.json"), 

827 distribution, 

828 ) 

829 

830 

831@flow(name="group-cell-shapes_group-mode-correlations") 

832def run_flow_group_mode_correlations( 

833 context: ContextConfig, series: SeriesConfig, parameters: ParametersConfigModeCorrelations 

834) -> None: 

835 """Group cell shapes subflow for mode correlations.""" 

836 

837 analysis_shapes_key = make_key(series.name, "analysis", "analysis.SHAPES") 

838 analysis_pca_key = make_key(series.name, "analysis", "analysis.PCA") 

839 group_key = make_key(series.name, "groups", "groups.SHAPES") 

840 region_key = "_".join(sorted(parameters.regions)) 

841 keys = [condition["key"] for condition in series.conditions] 

842 

843 all_models = {} 

844 all_data = {} 

845 

846 for key in keys: 

847 series_key = f"{series.name}_{key}_{region_key}" 

848 

849 # Load model. 

850 model_key = make_key(analysis_pca_key, f"{series_key}.PCA.pkl") 

851 model = load_pickle.with_options(**OPTIONS)(context.working_location, model_key) 

852 all_models[key] = model 

853 

854 # Load dataframe. 

855 dataframe_key = make_key(analysis_shapes_key, f"{series_key}.SHAPES.csv") 

856 data = load_dataframe.with_options(**OPTIONS)(context.working_location, dataframe_key) 

857 all_data[key] = data 

858 

859 if parameters.reference_model is not None and parameters.reference_data is not None: 

860 keys.append("reference") 

861 all_models["reference"] = load_pickle.with_options(**OPTIONS)( 

862 context.working_location, parameters.reference_model 

863 ) 

864 all_data["reference"] = load_dataframe.with_options(**OPTIONS)( 

865 context.working_location, parameters.reference_data 

866 ) 

867 

868 correlations: list[dict[str, Union[str, int, float]]] = [] 

869 

870 for source_key in keys: 

871 for target_key in keys: 

872 if source_key == target_key: 

873 continue 

874 

875 # Select data sets. 

876 data_source = all_data[source_key] 

877 data_target = all_data[target_key] 

878 

879 # Select models. 

880 model_source = all_models[source_key] 

881 model_target = all_models[target_key] 

882 

883 # Get column order for model. 

884 columns_source = data_source.filter(like="shcoeffs").columns 

885 columns_target = data_target.filter(like="shcoeffs").columns 

886 

887 # Transform the data. 

888 transform_source = model_source.transform( 

889 np.append( 

890 data_source[columns_source].values, 

891 data_target[columns_source].values, 

892 axis=0, 

893 ) 

894 ) 

895 transform_target = model_target.transform( 

896 np.append( 

897 data_source[columns_target].values, 

898 data_target[columns_target].values, 

899 axis=0, 

900 ) 

901 ) 

902 

903 # Calculate correlations. 

904 correlations = correlations + [ 

905 { 

906 "source_key": source_key, 

907 "target_key": target_key, 

908 "source_mode": f"PC{si + 1}", 

909 "target_mode": f"PC{ti + 1}", 

910 "correlation": pearsonr( 

911 transform_source[:, si], transform_target[:, ti] 

912 ).statistic, 

913 } 

914 for si in range(parameters.components) 

915 for ti in range(parameters.components) 

916 ] 

917 

918 save_dataframe( 

919 context.working_location, 

920 make_key(group_key, f"{series.name}.mode_correlations.csv"), 

921 pd.DataFrame(correlations), 

922 index=False, 

923 ) 

924 

925 

926@flow(name="group-cell-shapes_group-population-counts") 

927def run_flow_group_population_counts( 

928 context: ContextConfig, series: SeriesConfig, parameters: ParametersConfigPopulationCounts 

929) -> None: 

930 """Group cell shapes subflow for population counts.""" 

931 

932 analysis_key = make_key(series.name, "analysis", "analysis.CELL_SHAPES_DATA") 

933 group_key = make_key(series.name, "groups", "groups.CELL_SHAPES") 

934 

935 keys = [condition["key"] for condition in series.conditions] 

936 superkeys = {key_group for key in keys for key_group in key.split("_")} 

937 

938 counts: list[dict] = [] 

939 

940 for key in superkeys: 

941 dataframe_key = make_key(analysis_key, f"{series.name}_{key}.CELL_SHAPES_DATA.csv") 

942 data = load_dataframe.with_options(**OPTIONS)( 

943 context.working_location, dataframe_key, usecols=["KEY", "SEED", "time"] 

944 ) 

945 data = data[data["SEED"].isin(parameters.seeds) & (data["time"] == parameters.time)] 

946 

947 counts.extend( 

948 [ 

949 { 

950 "key": record["KEY"], 

951 "seed": record["SEED"], 

952 "count": record[0], 

953 } 

954 for record in data.groupby(["KEY", "SEED"]).size().reset_index().to_dict("records") 

955 ] 

956 ) 

957 

958 save_dataframe( 

959 context.working_location, 

960 make_key(group_key, f"{series.name}.population_counts.{parameters.time:03d}.csv"), 

961 pd.DataFrame(counts).drop_duplicates(), 

962 index=False, 

963 ) 

964 

965 

966@flow(name="group-cell-shapes_group-population-stats") 

967def run_flow_group_population_stats( 

968 context: ContextConfig, series: SeriesConfig, parameters: ParametersConfigPopulationStats 

969) -> None: 

970 """Group cell shapes subflow for population stats.""" 

971 

972 analysis_key = make_key(series.name, "analysis", "analysis.STATISTICS") 

973 group_key = make_key(series.name, "groups", "groups.SHAPES") 

974 region_key = "_".join(sorted(parameters.regions)) 

975 keys = [condition["key"] for condition in series.conditions] 

976 

977 stats: dict[str, dict] = {key: {} for key in keys} 

978 

979 for key in keys: 

980 dataframe_key = make_key(analysis_key, f"{series.name}_{key}_{region_key}.STATISTICS.csv") 

981 data = load_dataframe.with_options(**OPTIONS)(context.working_location, dataframe_key) 

982 

983 for feature, group in data.groupby("FEATURE"): 

984 feature_name = f"{feature}.DEFAULT" if feature in ["VOLUME", "HEIGHT"] else feature 

985 

986 stats[key][feature_name.upper()] = { 

987 "size": int(group["SIZE"].sum()), 

988 "replicates": len(group), 

989 "mean": group["KS_STATISTIC"].mean(), 

990 "std": group["KS_STATISTIC"].std(ddof=1), 

991 } 

992 

993 save_json( 

994 context.working_location, 

995 make_key(group_key, f"{series.name}.population_stats.json"), 

996 stats, 

997 ) 

998 

999 

1000@flow(name="group-cell-shapes_group-shape-average") 

1001def run_flow_group_shape_average( 

1002 context: ContextConfig, series: SeriesConfig, parameters: ParametersConfigShapeAverage 

1003) -> None: 

1004 """ 

1005 Group cell shapes subflow for shape average. 

1006 

1007 Find the cell closest to the average shape. Extract original mesh slice and 

1008 extent projections. Created reconstructed mesh and extract mesh slice and 

1009 extent projections. 

1010 """ 

1011 

1012 logger = get_run_logger() 

1013 

1014 analysis_shapes_key = make_key(series.name, "analysis", "analysis.SHAPES") 

1015 analysis_pca_key = make_key(series.name, "analysis", "analysis.PCA") 

1016 data_key = make_key(series.name, "data", "data.LOCATIONS") 

1017 group_key = make_key(series.name, "groups", "groups.SHAPES") 

1018 region_key = "_".join(sorted(parameters.regions)) 

1019 keys = [condition["key"] for condition in series.conditions] 

1020 

1021 for key in keys: 

1022 series_key = f"{series.name}_{key}_{region_key}" 

1023 

1024 # Load model. 

1025 model_key = make_key(analysis_pca_key, f"{series_key}.PCA.pkl") 

1026 model = load_pickle.with_options(**OPTIONS)(context.working_location, model_key) 

1027 

1028 # Load dataframe. 

1029 dataframe_key = make_key(analysis_shapes_key, f"{series_key}.SHAPES.csv") 

1030 data = load_dataframe.with_options(**OPTIONS)(context.working_location, dataframe_key) 

1031 

1032 # Transform data into shape mode space. 

1033 columns = data.filter(like="shcoeffs").columns 

1034 transform = model.transform(data[columns].values) 

1035 

1036 # Select the cell closest to average. 

1037 distance, index = KDTree(transform).query([0] * parameters.components) 

1038 selected = data.iloc[index, :] 

1039 logger.info( 

1040 "[ %s ] seed [ %d ] tick [ %d ] cell [ %d ] with distance [ %.2f ]", 

1041 key, 

1042 selected["SEED"], 

1043 selected["TICK"], 

1044 selected["ID"], 

1045 distance, 

1046 ) 

1047 

1048 # Get the matching location for the selected cell. 

1049 series_key = f"{series.name}_{key}_{selected['SEED']:04d}" 

1050 tar_key = make_key(data_key, f"{series_key}.LOCATIONS.tar.xz") 

1051 tar = load_tar(context.working_location, tar_key) 

1052 

1053 # Load matching location voxels. 

1054 locations = extract_tick_json(tar, series_key, selected["TICK"], "LOCATIONS") 

1055 location = next(location for location in locations if location["id"] == selected["ID"]) 

1056 voxels = get_location_voxels(location) 

1057 array = make_voxels_array(voxels) 

1058 

1059 # Create original mesh and get projections. 

1060 original_mesh = construct_mesh_from_array(array, array) 

1061 original_mesh_projections = extract_mesh_projections(original_mesh) 

1062 

1063 # Create reconstructed mesh and get projections. 

1064 reconstructed_mesh = construct_mesh_from_coeffs( 

1065 selected, parameters.order, scale=parameters.scale 

1066 ) 

1067 reconstructed_mesh_projections = extract_mesh_projections(reconstructed_mesh) 

1068 

1069 # Save json for each projection. 

1070 for projection in parameters.projections: 

1071 shape_average: dict[str, dict] = { 

1072 "original_slice": original_mesh_projections[f"{projection}_slice"], 

1073 "original_extent": original_mesh_projections[f"{projection}_extent"], 

1074 "reconstructed_slice": reconstructed_mesh_projections[f"{projection}_slice"], 

1075 } 

1076 

1077 save_json( 

1078 context.working_location, 

1079 make_key(group_key, f"{series.name}.shape_average.{key}.{projection.upper()}.json"), 

1080 shape_average, 

1081 ) 

1082 

1083 

1084@flow(name="group-cell-shapes_group-shape-contours") 

1085def run_flow_group_shape_contours( 

1086 context: ContextConfig, series: SeriesConfig, parameters: ParametersConfigShapeContours 

1087) -> None: 

1088 """Group cell shapes subflow for shape contours.""" 

1089 

1090 data_key = make_key(series.name, "data", "data.LOCATIONS") 

1091 group_key = make_key(series.name, "groups", "groups.CELL_SHAPES") 

1092 keys = [condition["key"] for condition in series.conditions] 

1093 

1094 projection = parameters.projection 

1095 projection_index = list(reversed(PROJECTIONS)).index(projection) 

1096 

1097 for key in keys: 

1098 series_key = f"{series.name}_{key}_{parameters.seed:04d}" 

1099 tar_key = make_key(data_key, f"{series_key}.LOCATIONS.tar.xz") 

1100 tar = load_tar(context.working_location, tar_key) 

1101 

1102 ds = parameters.ds if parameters.ds is not None else estimate_spatial_resolution(key) 

1103 dt = parameters.dt if parameters.dt is not None else estimate_temporal_resolution(key) 

1104 

1105 tick = int(parameters.time / dt) 

1106 length, width, height = parameters.box 

1107 box = (int((length - 2) / ds) + 2, int((width - 2) / ds) + 2, int((height - 2) / ds) + 2) 

1108 

1109 locations = extract_tick_json(tar, series_key, tick, "LOCATIONS") 

1110 

1111 for region in parameters.regions: 

1112 all_contours = [] 

1113 

1114 for location in locations: 

1115 voxels = get_location_voxels(location, None if region == "DEFAULT" else region) 

1116 

1117 if parameters.slice_index is not None: 

1118 voxels = [ 

1119 voxel 

1120 for voxel in voxels 

1121 if voxel[projection_index] == parameters.slice_index 

1122 ] 

1123 

1124 if len(voxels) == 0: 

1125 continue 

1126 

1127 contours = [ 

1128 (np.array(contour) * ds).astype("int").tolist() 

1129 for contour in extract_voxel_contours(voxels, projection, box) 

1130 ] 

1131 

1132 all_contours.append({"id": location["id"], "contours": contours}) 

1133 

1134 contour_key = f"{key}.{parameters.seed:04d}.{parameters.time:03d}.{region}" 

1135 save_json( 

1136 context.working_location, 

1137 make_key( 

1138 group_key, 

1139 f"{series.name}.shape_contours.{contour_key}.{projection.upper()}.json", 

1140 ), 

1141 all_contours, 

1142 ) 

1143 

1144 

1145@flow(name="group-cell-shapes_group-shape-errors") 

1146def run_flow_group_shape_errors( 

1147 context: ContextConfig, series: SeriesConfig, parameters: ParametersConfigShapeErrors 

1148) -> None: 

1149 """Group cell shapes subflow for shape errors.""" 

1150 

1151 analysis_key = make_key(series.name, "analysis", "analysis.SHAPES") 

1152 group_key = make_key(series.name, "groups", "groups.SHAPES") 

1153 region_key = "_".join(sorted(parameters.regions)) 

1154 keys = [condition["key"] for condition in series.conditions] 

1155 

1156 errors: dict[str, dict] = {key: {} for key in keys} 

1157 

1158 for key in keys: 

1159 dataframe_key = make_key(analysis_key, f"{series.name}_{key}_{region_key}.SHAPES.csv") 

1160 data = load_dataframe.with_options(**OPTIONS)(context.working_location, dataframe_key) 

1161 

1162 for region in parameters.regions: 

1163 errors[key][region] = { 

1164 "mean": data[f"mse.{region}".replace(".DEFAULT", "")].mean(), 

1165 "std": data[f"mse.{region}".replace(".DEFAULT", "")].std(ddof=1), 

1166 } 

1167 

1168 save_json( 

1169 context.working_location, 

1170 make_key(group_key, f"{series.name}.shape_errors.json"), 

1171 errors, 

1172 ) 

1173 

1174 

1175@flow(name="group-cell-shapes_group-shape-modes") 

1176def run_flow_group_shape_modes( 

1177 context: ContextConfig, series: SeriesConfig, parameters: ParametersConfigShapeModes 

1178) -> None: 

1179 """ 

1180 Group cell shapes subflow for shape modes. 

1181 

1182 Extract shape modes from PCAs as dictionaries of svg paths for each map 

1183 point and projection. Consolidate shape modes from keys into single json. 

1184 """ 

1185 

1186 analysis_data_key = make_key(series.name, "analysis", "analysis.CELL_SHAPES_DATA") 

1187 analysis_model_key = make_key(series.name, "analysis", "analysis.CELL_SHAPES_MODELS") 

1188 group_key = make_key(series.name, "groups", "groups.CELL_SHAPES") 

1189 

1190 keys = [condition["key"] for condition in series.conditions] 

1191 superkeys = {key_group for key in keys for key_group in key.split("_")} 

1192 

1193 projections = ["top", "side1", "side2"] 

1194 

1195 for superkey in superkeys: 

1196 series_key = f"{series.name}_{superkey}" 

1197 

1198 # Load model. 

1199 model_key = make_key(analysis_model_key, f"{series_key}.CELL_SHAPES_MODELS.pkl") 

1200 model = load_pickle.with_options(**OPTIONS)(context.working_location, model_key) 

1201 

1202 # Load dataframe. 

1203 dataframe_key = make_key(analysis_data_key, f"{series_key}.CELL_SHAPES_DATA.csv") 

1204 data = load_dataframe.with_options(**OPTIONS)(context.working_location, dataframe_key) 

1205 

1206 # Extract shape modes. 

1207 shape_modes = extract_shape_modes( 

1208 model, 

1209 data, 

1210 parameters.components, 

1211 parameters.regions, 

1212 parameters.order, 

1213 parameters.delta, 

1214 ) 

1215 

1216 for region in parameters.regions: 

1217 shape_mode_projections: dict[str, list] = { 

1218 f"PC{component + 1}.{projection}": [] 

1219 for component in range(parameters.components) 

1220 for projection in parameters.projections 

1221 } 

1222 

1223 for shape_mode in shape_modes[region]: 

1224 for projection in parameters.projections: 

1225 shape_mode_projections[f"PC{shape_mode['mode']}.{projection}"].append( 

1226 { 

1227 "point": shape_mode["point"], 

1228 "projection": shape_mode["projections"][f"{projection}_slice"], 

1229 } 

1230 ) 

1231 

1232 for proj_key, projections in shape_mode_projections.items(): 

1233 save_json( 

1234 context.working_location, 

1235 make_key( 

1236 group_key, 

1237 f"{series.name}.shape_modes.{superkey}.{region}.{proj_key.upper()}.json", 

1238 ).replace("..", "."), 

1239 projections, 

1240 ) 

1241 

1242 

1243@flow(name="group-cell-shapes_group-shape-samples") 

1244def run_flow_group_shape_samples( 

1245 context: ContextConfig, series: SeriesConfig, parameters: ParametersConfigShapeSamples 

1246) -> None: 

1247 """ 

1248 Group cell shapes subflow for shape samples. 

1249 

1250 Extract sample cell shapes from specified simulations. Construct wireframes 

1251 from the cell shape mesh. 

1252 """ 

1253 

1254 data_key = make_key(series.name, "data", "data.LOCATIONS") 

1255 group_key = make_key(series.name, "groups", "groups.SHAPES") 

1256 keys = [condition["key"] for condition in series.conditions] 

1257 

1258 shape_samples: dict[str, dict] = {} 

1259 

1260 for key in keys: 

1261 shape_samples[key] = {region: [] for region in parameters.regions} 

1262 

1263 # Load location data. 

1264 series_key = f"{series.name}_{key}_{parameters.seed:04d}" 

1265 tar_key = make_key(data_key, f"{series_key}.LOCATIONS.tar.xz") 

1266 tar = load_tar(context.working_location, tar_key) 

1267 locations = extract_tick_json(tar, series_key, parameters.tick, "LOCATIONS") 

1268 

1269 for index in parameters.indices: 

1270 location = locations[index] 

1271 

1272 for region in parameters.regions: 

1273 voxels = get_location_voxels(location) 

1274 array = make_voxels_array(voxels) 

1275 

1276 if region != "DEFAULT": 

1277 region_voxels = get_location_voxels(locations[index], region) 

1278 region_array = make_voxels_array(region_voxels, reference=voxels) 

1279 mesh = construct_mesh_from_array(region_array, array) 

1280 else: 

1281 mesh = construct_mesh_from_array(array, array) 

1282 

1283 shape_samples[key][region].append(extract_mesh_wireframe(mesh)) 

1284 

1285 save_json( 

1286 context.working_location, 

1287 make_key(group_key, f"{series.name}.shape_samples.json"), 

1288 shape_samples, 

1289 ) 

1290 

1291 

1292@flow(name="group-cell-shapes_group-variance-explained") 

1293def run_flow_group_variance_explained( 

1294 context: ContextConfig, series: SeriesConfig, parameters: ParametersConfigVarianceExplained 

1295) -> None: 

1296 """Group cell shapes subflow for variance explained.""" 

1297 

1298 analysis_key = make_key(series.name, "analysis", "analysis.CELL_SHAPES_MODELS") 

1299 group_key = make_key(series.name, "groups", "groups.CELL_SHAPES") 

1300 

1301 keys = [condition["key"] for condition in series.conditions] 

1302 superkeys = {key_group for key in keys for key_group in key.split("_")} 

1303 

1304 variance = [] 

1305 

1306 for superkey in superkeys: 

1307 model_key = make_key(analysis_key, f"{series.name}_{superkey}.CELL_SHAPES_MODELS.pkl") 

1308 model = load_pickle.with_options(**OPTIONS)(context.working_location, model_key) 

1309 

1310 variance.append( 

1311 pd.DataFrame( 

1312 { 

1313 "key": [superkey] * parameters.components, 

1314 "mode": [f"PC{i}" for i in range(1, parameters.components + 1)], 

1315 "variance": model.explained_variance_ratio_, 

1316 } 

1317 ) 

1318 ) 

1319 

1320 save_dataframe( 

1321 context.working_location, 

1322 make_key(group_key, f"{series.name}.variance_explained.csv"), 

1323 pd.concat(variance), 

1324 index=False, 

1325 )