Coverage for src/cell_abm_pipeline/flows/parse_physicell_simulations.py: 0%
36 statements
« prev ^ index » next coverage.py v7.1.0, created at 2024-06-05 19:14 +0000
« prev ^ index » next coverage.py v7.1.0, created at 2024-06-05 19:14 +0000
1"""
2Workflow for parsing PhysiCell simulations into tidy data.
4Working location structure:
6.. code-block:: bash
8 (name)
9 ├── data
10 │ └── (name)_(key)_(seed).tar.xz
11 └── results
12 └── (name)_(key)_(seed).csv
14Data from **data** are parsed into **results**.
15"""
17from dataclasses import dataclass, field
19from container_collection.manifest import filter_manifest_files
20from io_collection.keys import make_key
21from io_collection.load import load_dataframe, load_tar
22from io_collection.save import save_dataframe
23from prefect import flow
25from cell_abm_pipeline.tasks.physicell import parse_mcds_file
28@dataclass
29class ParametersConfig:
30 """Parameter configuration for parse physicell simulations flow."""
32 include_filters: list[str] = field(default_factory=lambda: ["*"])
33 """List of Unix filename patterns for files to include in parsing."""
35 exclude_filters: list[str] = field(default_factory=lambda: [])
36 """List of Unix filename patterns for files to exclude from parsing."""
39@dataclass
40class ContextConfig:
41 """Context configuration for parse physicell simulations flow."""
43 working_location: str
44 """Location for input and output files (local path or S3 bucket)."""
46 manifest_location: str
47 """Location of manifest file (local path or S3 bucket)."""
50@dataclass
51class SeriesConfig:
52 """Series configuration for parse physicell simulations flow."""
54 name: str
55 """Name of the simulation series."""
57 manifest_key: str
58 """Key for manifest file."""
60 extensions: list[str]
61 """List of file extensions in complete run."""
64@flow(name="parse-physicell-simulations")
65def run_flow(context: ContextConfig, series: SeriesConfig, parameters: ParametersConfig) -> None:
66 """Main parse physicell simulations flow."""
68 manifest = load_dataframe(context.manifest_location, series.manifest_key)
69 filtered_files = filter_manifest_files(
70 manifest, series.extensions, parameters.include_filters, parameters.exclude_filters
71 )
73 for key, files in filtered_files.items():
74 tar_file = load_tar(**files["tar.xz"])
75 results = parse_mcds_file(tar_file)
77 results_key = make_key(series.name, "{{timestamp}}", "results", f"{key}.csv")
78 save_dataframe(context.working_location, results_key, results, index=False)